From 5ef315cc6db868a11bd0c3e887b8c55eb2216662 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 29 Jun 2020 21:19:34 +0200 Subject: [PATCH] Org reader: keep unknown keyword lines as raw org The lines of unknown keywords, like `#+SOMEWORD: value` are no longer read as metadata, but kept as raw `org` blocks. This ensures that more information is retained when round-tripping org-mode files; additionally, this change makes it possible to support non-standard org extensions via filters. --- src/Text/Pandoc/Readers/Org/Blocks.hs | 11 +++++++++++ src/Text/Pandoc/Readers/Org/Meta.hs | 4 ++-- test/Tests/Readers/Org/Meta.hs | 7 +++++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 2fbb26d31..0e2f49a83 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -76,6 +76,7 @@ block = choice [ mempty <$ blanklines , list , latexFragment , noteBlock + , rawOrgLine , paraOrPlain ] "block" @@ -559,6 +560,8 @@ include = try $ do | otherwise -> Para content _ -> blk +-- | Parses a meta line which defines a raw block. Currently recognized: +-- @#+LATEX:@, @#+HTML:@, @#+TEXINFO:@, and @#+BEAMER@. rawExportLine :: PandocMonad m => OrgParser m Blocks rawExportLine = try $ do metaLineStart @@ -567,6 +570,14 @@ rawExportLine = try $ do then B.rawBlock key <$> anyLine else mzero +-- | Parses any meta line, i.e., a line starting with @#+@, into a raw +-- org block. This should be the last resort when trying to parse +-- keywords. Leading spaces are discarded. +rawOrgLine :: PandocMonad m => OrgParser m (F Blocks) +rawOrgLine = do + line <- metaLineStart *> anyLine + returnF $ B.rawBlock "org" $ ("#+" <> line) + commentLine :: Monad m => OrgParser m Blocks commentLine = commentLineStart *> anyLine *> pure mempty diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 7d46841b3..43de04ffa 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -57,13 +57,13 @@ removeMeta key meta' = -- The order, in which blocks are tried, makes sure that we're not looking at -- the beginning of a block, so we don't need to check for it metaLine :: PandocMonad m => OrgParser m Blocks -metaLine = mempty <$ metaLineStart <* keywordLine +metaLine = try $ mempty <$ metaLineStart <* keywordLine keywordLine :: PandocMonad m => OrgParser m () keywordLine = try $ do key <- T.toLower <$> metaKey case Map.lookup key keywordHandlers of - Nothing -> () <$ anyLine -- discard unknown lines + Nothing -> fail $ "Unknown keyword: " ++ T.unpack key Just hd -> hd metaKey :: Monad m => OrgParser m Text diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs index bbbb553ba..0bd63b15d 100644 --- a/test/Tests/Readers/Org/Meta.hs +++ b/test/Tests/Readers/Org/Meta.hs @@ -214,8 +214,11 @@ tests = ] , "Unknown keyword" =: - "#+UNKNOWN_KEYWORD: Chumbawamba" =?> - Pandoc nullMeta mempty + T.unlines [ "#+UNKNOWN_KEYWORD: Chumbawamba" + , "#+ANOTHER_UNKNOWN: Blur" + ] =?> + rawBlock "org" "#+UNKNOWN_KEYWORD: Chumbawamba" <> + rawBlock "org" "#+ANOTHER_UNKNOWN: Blur" ] , "Properties drawer" =: