From a36a56b8ac5fea612c1d0614d4e1cb14ffc3a21b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal <jrosenthal@jhu.edu> Date: Sun, 6 Aug 2017 19:26:50 -0700 Subject: [PATCH] Docx reader: Avoid 0-level headers. We used to parse paragraphs styled with "HeadingN" as "nth-level header." But if a document has a custom style named "Heading0", this will produce a 0-level header, which shouldn't exist. We only parse this style if N>0. Otherwise we treat it as a normal style name, and follow its dependencies, if any. Closes #3830. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 24615ba94..05ce691a6 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -61,7 +61,7 @@ import Control.Monad.Reader import Control.Monad.State.Strict import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B -import Data.Char (chr, isDigit, ord, readLitChar) +import Data.Char (chr, ord, readLitChar) import Data.List import qualified Data.Map as M import Data.Maybe @@ -939,19 +939,18 @@ elemToRunStyle ns element parentStyle } elemToRunStyle _ _ _ = defaultRunStyle -isNumericNotNull :: String -> Bool -isNumericNotNull str = (str /= []) && (all isDigit str) - getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int) getHeaderLevel ns element | Just styleId <- findAttrByName ns "w" "styleId" element , Just index <- stripPrefix "Heading" styleId - , isNumericNotNull index = Just (styleId, read index) + , Just n <- stringToInteger index + , n > 0 = Just (styleId, fromInteger n) | Just styleId <- findAttrByName ns "w" "styleId" element , Just index <- findChildByName ns "w" "name" element >>= findAttrByName ns "w" "val" >>= stripPrefix "heading " - , isNumericNotNull index = Just (styleId, read index) + , Just n <- stringToInteger index + , n > 0 = Just (styleId, fromInteger n) getHeaderLevel _ _ = Nothing blockQuoteStyleIds :: [String]