Org reader: keep unknown keyword lines as raw org

The lines of unknown keywords, like `#+SOMEWORD: value` are no longer
read as metadata, but kept as raw `org` blocks. This ensures that more
information is retained when round-tripping org-mode files;
additionally, this change makes it possible to support non-standard org
extensions via filters.
This commit is contained in:
Albert Krewinkel 2020-06-29 21:19:34 +02:00
parent 90ac70c79c
commit 5ef315cc6d
No known key found for this signature in database
GPG key ID: 388DC0B21F631124
3 changed files with 18 additions and 4 deletions

View file

@ -76,6 +76,7 @@ block = choice [ mempty <$ blanklines
, list , list
, latexFragment , latexFragment
, noteBlock , noteBlock
, rawOrgLine
, paraOrPlain , paraOrPlain
] <?> "block" ] <?> "block"
@ -559,6 +560,8 @@ include = try $ do
| otherwise -> Para content | otherwise -> Para content
_ -> blk _ -> blk
-- | Parses a meta line which defines a raw block. Currently recognized:
-- @#+LATEX:@, @#+HTML:@, @#+TEXINFO:@, and @#+BEAMER@.
rawExportLine :: PandocMonad m => OrgParser m Blocks rawExportLine :: PandocMonad m => OrgParser m Blocks
rawExportLine = try $ do rawExportLine = try $ do
metaLineStart metaLineStart
@ -567,6 +570,14 @@ rawExportLine = try $ do
then B.rawBlock key <$> anyLine then B.rawBlock key <$> anyLine
else mzero else mzero
-- | Parses any meta line, i.e., a line starting with @#+@, into a raw
-- org block. This should be the last resort when trying to parse
-- keywords. Leading spaces are discarded.
rawOrgLine :: PandocMonad m => OrgParser m (F Blocks)
rawOrgLine = do
line <- metaLineStart *> anyLine
returnF $ B.rawBlock "org" $ ("#+" <> line)
commentLine :: Monad m => OrgParser m Blocks commentLine :: Monad m => OrgParser m Blocks
commentLine = commentLineStart *> anyLine *> pure mempty commentLine = commentLineStart *> anyLine *> pure mempty

View file

@ -57,13 +57,13 @@ removeMeta key meta' =
-- The order, in which blocks are tried, makes sure that we're not looking at -- The order, in which blocks are tried, makes sure that we're not looking at
-- the beginning of a block, so we don't need to check for it -- the beginning of a block, so we don't need to check for it
metaLine :: PandocMonad m => OrgParser m Blocks metaLine :: PandocMonad m => OrgParser m Blocks
metaLine = mempty <$ metaLineStart <* keywordLine metaLine = try $ mempty <$ metaLineStart <* keywordLine
keywordLine :: PandocMonad m => OrgParser m () keywordLine :: PandocMonad m => OrgParser m ()
keywordLine = try $ do keywordLine = try $ do
key <- T.toLower <$> metaKey key <- T.toLower <$> metaKey
case Map.lookup key keywordHandlers of case Map.lookup key keywordHandlers of
Nothing -> () <$ anyLine -- discard unknown lines Nothing -> fail $ "Unknown keyword: " ++ T.unpack key
Just hd -> hd Just hd -> hd
metaKey :: Monad m => OrgParser m Text metaKey :: Monad m => OrgParser m Text

View file

@ -214,8 +214,11 @@ tests =
] ]
, "Unknown keyword" =: , "Unknown keyword" =:
"#+UNKNOWN_KEYWORD: Chumbawamba" =?> T.unlines [ "#+UNKNOWN_KEYWORD: Chumbawamba"
Pandoc nullMeta mempty , "#+ANOTHER_UNKNOWN: Blur"
] =?>
rawBlock "org" "#+UNKNOWN_KEYWORD: Chumbawamba" <>
rawBlock "org" "#+ANOTHER_UNKNOWN: Blur"
] ]
, "Properties drawer" =: , "Properties drawer" =: