From 1dda5353781fa605c00dd18af5f8527bc31956ef Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 12 May 2016 23:11:26 +0200 Subject: [PATCH 1/3] Org reader: refactor block attribute handling A parser state attribute was used to keep track of block attributes defined in meta-lines. Global state is undesirable, so block attributes are no longer saved as part of the parser state. Old functions and the respective part of the parser state are removed. --- src/Text/Pandoc/Readers/Org.hs | 154 ++++++++++----------- src/Text/Pandoc/Readers/Org/ParserState.hs | 4 - 2 files changed, 76 insertions(+), 82 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index ceab1e120..06af84494 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -49,10 +49,10 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap import Control.Arrow (first) import Control.Monad (foldM, guard, mplus, mzero, when) import Control.Monad.Reader ( Reader, runReader ) -import Data.Char (isAlphaNum, isSpace, toLower) -import Data.List (intersperse, isPrefixOf, isSuffixOf) +import Data.Char (isAlphaNum, isSpace, toLower, toUpper) +import Data.List ( foldl', intersperse, isPrefixOf, isSuffixOf ) import qualified Data.Map as M -import Data.Maybe (fromMaybe, isJust) +import Data.Maybe ( fromMaybe, isNothing ) import Network.HTTP (urlEncode) @@ -273,11 +273,9 @@ parseBlocks = mconcat <$> manyTill block eof block :: OrgParser (F Blocks) block = choice [ mempty <$ blanklines - , optionalAttributes $ choice - [ orgBlock - , figure - , table - ] + , table + , orgBlock + , figure , example , drawer , specialLine @@ -289,50 +287,53 @@ block = choice [ mempty <$ blanklines , paraOrPlain ] "block" + -- -- Block Attributes -- --- | Parse optional block attributes (like #+TITLE or #+NAME) -optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks) -optionalAttributes parser = try $ - resetBlockAttributes *> parseBlockAttributes *> parser +-- | Attributes that may be added to figures (like a name or caption). +data BlockAttributes = BlockAttributes + { blockAttrName :: Maybe String + , blockAttrCaption :: Maybe (F Inlines) + } + +stringyMetaAttribute :: (String -> Bool) -> OrgParser (String, String) +stringyMetaAttribute attrCheck = try $ do + metaLineStart + attrName <- map toUpper <$> many1Till nonspaceChar (char ':') + guard $ attrCheck attrName + skipSpaces + attrValue <- manyTill anyChar newline + return (attrName, attrValue) + +blockAttributes :: OrgParser BlockAttributes +blockAttributes = try $ do + kv <- many (stringyMetaAttribute attrCheck) + let caption = foldl' (appendValues "CAPTION") Nothing kv + let name = lookup "NAME" kv + caption' <- maybe (return Nothing) + (fmap Just . parseFromString parseInlines) + caption + return $ BlockAttributes + { blockAttrName = name + , blockAttrCaption = caption' + } where - resetBlockAttributes :: OrgParser () - resetBlockAttributes = updateState $ \s -> - s{ orgStateBlockAttributes = orgStateBlockAttributes def } + attrCheck :: String -> Bool + attrCheck attr = + case attr of + "NAME" -> True + "CAPTION" -> True + _ -> False -parseBlockAttributes :: OrgParser () -parseBlockAttributes = do - attrs <- many attribute - mapM_ (uncurry parseAndAddAttribute) attrs - where - attribute :: OrgParser (String, String) - attribute = try $ do - key <- metaLineStart *> many1Till nonspaceChar (char ':') - val <- skipSpaces *> anyLine - return (map toLower key, val) - -parseAndAddAttribute :: String -> String -> OrgParser () -parseAndAddAttribute key value = do - let key' = map toLower key - () <$ addBlockAttribute key' value - -lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines)) -lookupInlinesAttr attr = try $ do - val <- lookupBlockAttribute attr - maybe (return Nothing) - (fmap Just . parseFromString parseInlines) - val - -addBlockAttribute :: String -> String -> OrgParser () -addBlockAttribute key val = updateState $ \s -> - let attrs = orgStateBlockAttributes s - in s{ orgStateBlockAttributes = M.insert key val attrs } - -lookupBlockAttribute :: String -> OrgParser (Maybe String) -lookupBlockAttribute key = - M.lookup key . orgStateBlockAttributes <$> getState + appendValues :: String -> Maybe String -> (String, String) -> Maybe String + appendValues attrName accValue (key, value) = + if key /= attrName + then accValue + else case accValue of + Just acc -> Just $ acc ++ ' ':value + Nothing -> Just value -- @@ -346,6 +347,7 @@ updateIndent (_, blkType) indent = (indent, blkType) orgBlock :: OrgParser (F Blocks) orgBlock = try $ do + blockAttrs <- blockAttributes blockProp@(_, blkType) <- blockHeaderStart ($ blockProp) $ case blkType of @@ -356,7 +358,7 @@ orgBlock = try $ do "example" -> withRaw' (return . exampleCode) "quote" -> withParsed (fmap B.blockQuote) "verse" -> verseBlock - "src" -> codeBlock + "src" -> codeBlock blockAttrs _ -> withParsed (fmap $ divWithClass blkType) blockHeaderStart :: OrgParser (Int, String) @@ -410,20 +412,20 @@ followingResultsBlock = *> blankline *> block) -codeBlock :: BlockProperties -> OrgParser (F Blocks) -codeBlock blkProp = do +codeBlock :: BlockAttributes -> BlockProperties -> OrgParser (F Blocks) +codeBlock blockAttrs blkProp = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) - id' <- fromMaybe "" <$> lookupBlockAttribute "name" leadingIndent <- lookAhead indentation content <- rawBlockContent (updateIndent blkProp leadingIndent) resultsContent <- followingResultsBlock + let id' = fromMaybe mempty $ blockAttrName blockAttrs let includeCode = exportsCode kv let includeResults = exportsResults kv let codeBlck = B.codeBlockWith ( id', classes, kv ) content - labelledBlck <- maybe (pure codeBlck) + let labelledBlck = maybe (pure codeBlck) (labelDiv codeBlck) - <$> lookupInlinesAttr "caption" + (blockAttrCaption blockAttrs) let resultBlck = fromMaybe mempty resultsContent return $ (if includeCode then labelledBlck else mempty) <> (if includeResults then resultBlck else mempty) @@ -579,47 +581,42 @@ drawerEnd = try $ -- Figures -- --- Figures (Image on a line by itself, preceded by name and/or caption) + +-- | Figures (Image on a line by itself, preceded by name and/or caption) figure :: OrgParser (F Blocks) figure = try $ do - (cap, nam) <- nameAndCaption + figAttrs <- blockAttributes src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline + guard . not . isNothing . blockAttrCaption $ figAttrs guard (isImageFilename src) - return $ do - cap' <- cap - return $ B.para $ B.image src nam cap' + let figName = fromMaybe mempty $ blockAttrName figAttrs + let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs + return $ (B.para . B.image src (withFigPrefix figName) <$> figCaption) where - nameAndCaption = - do - maybeCap <- lookupInlinesAttr "caption" - maybeNam <- lookupBlockAttribute "name" - guard $ isJust maybeCap || isJust maybeNam - return ( fromMaybe mempty maybeCap - , withFigPrefix $ fromMaybe mempty maybeNam ) withFigPrefix cs = - if "fig:" `isPrefixOf` cs - then cs - else "fig:" ++ cs + if "fig:" `isPrefixOf` cs + then cs + else "fig:" ++ cs -- -- Comments, Options and Metadata +-- specialLine :: OrgParser (F Blocks) specialLine = fmap return . try $ metaLine <|> commentLine metaLine :: OrgParser Blocks -metaLine = try $ mempty - <$ (metaLineStart *> (optionLine <|> declarationLine)) - -commentLine :: OrgParser Blocks -commentLine = try $ commentLineStart *> anyLine *> pure mempty +metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) -- The order, in which blocks are tried, makes sure that we're not looking at -- the beginning of a block, so we don't need to check for it -metaLineStart :: OrgParser String -metaLineStart = try $ mappend <$> many spaceChar <*> string "#+" +metaLineStart :: OrgParser () +metaLineStart = try $ skipSpaces <* string "#+" -commentLineStart :: OrgParser String -commentLineStart = try $ mappend <$> many spaceChar <*> string "# " +commentLine :: OrgParser Blocks +commentLine = commentLineStart *> anyLine *> pure mempty + +commentLineStart :: OrgParser () +commentLineStart = try $ skipSpaces <* string "# " declarationLine :: OrgParser () declarationLine = try $ do @@ -738,11 +735,12 @@ data OrgTable = OrgTable table :: OrgParser (F Blocks) table = try $ do + blockAttrs <- blockAttributes lookAhead tableStart do rows <- tableRows - cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption" - return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows + let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs + return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows orgToPandocTable :: OrgTable -> Inlines diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 49cfa2be2..f84e5e51b 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -68,8 +68,6 @@ import Text.Pandoc.Parsing ( HasHeaderMap(..) type OrgNoteRecord = (String, F Blocks) -- | Table of footnotes type OrgNoteTable = [OrgNoteRecord] --- | Map of org block attributes (e.g. LABEL, CAPTION, NAME, etc) -type OrgBlockAttributes = M.Map String String -- | Map of functions for link transformations. The map key is refers to the -- link-type, the corresponding function transforms the given link string. type OrgLinkFormatters = M.Map String (String -> String) @@ -84,7 +82,6 @@ data ExportSettings = ExportSettings data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions , orgStateAnchorIds :: [String] - , orgStateBlockAttributes :: OrgBlockAttributes , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisNewlines :: Maybe Int , orgStateExportSettings :: ExportSettings @@ -140,7 +137,6 @@ defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState { orgStateOptions = def , orgStateAnchorIds = [] - , orgStateBlockAttributes = M.empty , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing , orgStateExportSettings = def From 26e8d98be207fcee24375e8636f7861679c95406 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 18 May 2016 16:21:56 +0200 Subject: [PATCH 2/3] Org reader: use custom `anyLine` Additional state changes need to be made after a newline is parsed, otherwise markup may not be recognized correctly. This fixes a bug where markup after certain block-types would not be recognized. E.g. `/emph/` in the following snippet was not parsed as emphasized. foo # comment /emph/ --- src/Text/Pandoc/Readers/Org.hs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 06af84494..da20e9407 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -37,8 +37,9 @@ import Text.Pandoc.Error import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF - , newline, orderedListMarker - , parseFromString, blanklines + , anyLine, blanklines, newline + , orderedListMarker + , parseFromString ) import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Readers.Org.ParserState @@ -247,6 +248,12 @@ blanklines = <* updateLastPreCharPos <* updateLastForbiddenCharPos +anyLine :: OrgParser String +anyLine = + P.anyLine + <* updateLastPreCharPos + <* updateLastForbiddenCharPos + -- | Succeeds when we're in list context. inList :: OrgParser () inList = do @@ -304,7 +311,7 @@ stringyMetaAttribute attrCheck = try $ do attrName <- map toUpper <$> many1Till nonspaceChar (char ':') guard $ attrCheck attrName skipSpaces - attrValue <- manyTill anyChar newline + attrValue <- anyLine return (attrName, attrValue) blockAttributes :: OrgParser BlockAttributes From 16e233475ae93d7113ef049dec272d23667fc493 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 18 May 2016 23:24:22 +0200 Subject: [PATCH 3/3] Org reader: add support for ATTR_HTML attributes Arbitrary key-value pairs can be added to some block types using a `#+ATTR_HTML` line before the block. Emacs Org-mode only includes these when exporting to HTML, but since we cannot make this distinction here, the attributes are always added. The functionality is now supported for figures. This closes #1906. --- src/Text/Pandoc/Readers/Org.hs | 35 +++++++++++++++++++++++++++------- tests/Tests/Readers/Org.hs | 11 +++++++++++ 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index da20e9407..a7120389f 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -301,8 +301,9 @@ block = choice [ mempty <$ blanklines -- | Attributes that may be added to figures (like a name or caption). data BlockAttributes = BlockAttributes - { blockAttrName :: Maybe String - , blockAttrCaption :: Maybe (F Inlines) + { blockAttrName :: Maybe String + , blockAttrCaption :: Maybe (F Inlines) + , blockAttrKeyValues :: [(String, String)] } stringyMetaAttribute :: (String -> Bool) -> OrgParser (String, String) @@ -318,21 +319,25 @@ blockAttributes :: OrgParser BlockAttributes blockAttributes = try $ do kv <- many (stringyMetaAttribute attrCheck) let caption = foldl' (appendValues "CAPTION") Nothing kv + let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv let name = lookup "NAME" kv caption' <- maybe (return Nothing) (fmap Just . parseFromString parseInlines) caption + kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs return $ BlockAttributes { blockAttrName = name , blockAttrCaption = caption' + , blockAttrKeyValues = kvAttrs' } where attrCheck :: String -> Bool attrCheck attr = case attr of - "NAME" -> True - "CAPTION" -> True - _ -> False + "NAME" -> True + "CAPTION" -> True + "ATTR_HTML" -> True + _ -> False appendValues :: String -> Maybe String -> (String, String) -> Maybe String appendValues attrName accValue (key, value) = @@ -342,6 +347,21 @@ blockAttributes = try $ do Just acc -> Just $ acc ++ ' ':value Nothing -> Just value +keyValues :: OrgParser [(String, String)] +keyValues = try $ + manyTill ((,) <$> key <*> value) newline + where + key :: OrgParser String + key = try $ skipSpaces *> char ':' *> many1 nonspaceChar + + value :: OrgParser String + value = skipSpaces *> manyTill anyChar endOfValue + + endOfValue :: OrgParser () + endOfValue = + lookAhead $ (() <$ try (many1 spaceChar <* key)) + <|> () <$ P.newline + -- -- Org Blocks (#+BEGIN_... / #+END_...) @@ -588,7 +608,6 @@ drawerEnd = try $ -- Figures -- - -- | Figures (Image on a line by itself, preceded by name and/or caption) figure :: OrgParser (F Blocks) figure = try $ do @@ -598,7 +617,9 @@ figure = try $ do guard (isImageFilename src) let figName = fromMaybe mempty $ blockAttrName figAttrs let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs - return $ (B.para . B.image src (withFigPrefix figName) <$> figCaption) + let figKeyVals = blockAttrKeyValues figAttrs + let attr = (mempty, mempty, figKeyVals) + return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption) where withFigPrefix cs = if "fig:" `isPrefixOf` cs diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index fa0c57f71..666d93a51 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -667,6 +667,17 @@ tests = para (image "the-red-queen.jpg" "fig:redqueen" "Used as a metapher in evolutionary biology.") + , "Figure with HTML attributes" =: + unlines [ "#+CAPTION: mah brain just explodid" + , "#+NAME: lambdacat" + , "#+ATTR_HTML: :style color: blue :role button" + , "[[lambdacat.jpg]]" + ] =?> + let kv = [("style", "color: blue"), ("role", "button")] + name = "fig:lambdacat" + caption = "mah brain just explodid" + in para (imageWith (mempty, mempty, kv) "lambdacat.jpg" name caption) + , "Footnote" =: unlines [ "A footnote[1]" , ""