Org reader: unify keyword handling

Handling of export settings and other keywords (like `#+LINK`) has been
combined and unified.
This commit is contained in:
Albert Krewinkel 2020-06-29 20:21:56 +02:00
parent 1480606174
commit 90ac70c79c
No known key found for this signature in database
GPG key ID: 388DC0B21F631124
3 changed files with 127 additions and 127 deletions

View file

@ -14,13 +14,13 @@ The following export keywords are supported:
- AUTHOR: comma-separated list of author(s); fully supported.
- CREATOR: output generator; passed as metadata entry, but
ignored by most output formats.
- CREATOR: output generator; passed as plain-text metadata entry
`creator`, but not used by any default templates.
- DATE: creation or publication date; well supported by pandoc.
- EMAIL: author email address; passed as metadata entry, but not
included in most output formats.
- EMAIL: author email address; passed as plain-text metadata
field `email`, but not used by any default templates.
- LANGUAGE: currently unsupported; use `#+LANG:` instead.

View file

@ -22,7 +22,7 @@ import Text.Pandoc.Readers.Org.Inlines
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue)
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
@ -57,13 +57,13 @@ removeMeta key meta' =
-- The order, in which blocks are tried, makes sure that we're not looking at
-- the beginning of a block, so we don't need to check for it
metaLine :: PandocMonad m => OrgParser m Blocks
metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
metaLine = mempty <$ metaLineStart <* keywordLine
declarationLine :: PandocMonad m => OrgParser m ()
declarationLine = try $ do
keywordLine :: PandocMonad m => OrgParser m ()
keywordLine = try $ do
key <- T.toLower <$> metaKey
case Map.lookup key exportSettingHandlers of
Nothing -> () <$ anyLine
case Map.lookup key keywordHandlers of
Nothing -> () <$ anyLine -- discard unknown lines
Just hd -> hd
metaKey :: Monad m => OrgParser m Text
@ -71,33 +71,55 @@ metaKey = T.toLower <$> many1Char (noneOf ": \n\r")
<* char ':'
<* skipSpaces
exportSettingHandlers :: PandocMonad m => Map Text (OrgParser m ())
exportSettingHandlers = Map.fromList
[ ("result" , fmap pure anyLine `parseThen` discard)
-- Common settings
, ("author" , lineOfInlines `parseThen` collectLines "author")
, ("date" , lineOfInlines `parseThen` setField "date")
, ("description", lineOfInlines `parseThen` collectLines "description")
, ("keywords" , lineOfInlines `parseThen` collectLines "keywords")
, ("subtitle" , lineOfInlines `parseThen` collectLines "subtitle")
, ("title" , lineOfInlines `parseThen` collectLines "title")
-- LaTeX
, ("latex_class" , fmap pure anyLine `parseThen` setField "documentclass")
, ("latex_class_options", (pure . T.filter (`notElem` ("[]" :: String)) <$> anyLine)
`parseThen` setField "classoption")
, ("latex_header" , metaExportSnippet "latex" `parseThen`
collectAsList "header-includes")
, ("latex_header_extra", metaExportSnippet "latex" `parseThen`
collectAsList "header-includes")
-- HTML
, ("html_head" , metaExportSnippet "html" `parseThen`
collectAsList "header-includes")
, ("html_head_extra", metaExportSnippet "html" `parseThen`
collectAsList "header-includes")
-- pandoc-specific
, ("nocite" , lineOfInlines `parseThen` collectLines "nocite")
, ("header-includes", lineOfInlines `parseThen` collectLines "header-includes")
, ("institute" , lineOfInlines `parseThen` collectLines "institute")
infix 0 ~~>
(~~>) :: a -> b -> (a, b)
a ~~> b = (a, b)
keywordHandlers :: PandocMonad m => Map Text (OrgParser m ())
keywordHandlers = Map.fromList
[ "author" ~~> lineOfInlines `parseThen` collectLines "author"
, "creator" ~~> fmap pure anyLine `parseThen` B.setMeta "creator"
, "date" ~~> lineOfInlines `parseThen` B.setMeta "date"
, "description" ~~> lineOfInlines `parseThen` collectLines "description"
, "email" ~~> fmap pure anyLine `parseThen` B.setMeta "email"
, "exclude_tags" ~~> tagList >>= updateState . setExcludedTags
, "header-includes" ~~>
lineOfInlines `parseThen` collectLines "header-includes"
-- HTML-specifix export settings
, "html_head" ~~>
metaExportSnippet "html" `parseThen` collectAsList "header-includes"
, "html_head_extra" ~~>
metaExportSnippet "html" `parseThen` collectAsList "header-includes"
, "institute" ~~> lineOfInlines `parseThen` collectLines "institute"
-- topic keywords
, "keywords" ~~> lineOfInlines `parseThen` collectLines "keywords"
-- LaTeX-specific export settings
, "latex_class" ~~> fmap pure anyLine `parseThen` B.setMeta "documentclass"
, "latex_class_options" ~~>
(pure . T.filter (`notElem` ("[]" :: String)) <$> anyLine)
`parseThen` B.setMeta "classoption"
, "latex_header" ~~>
metaExportSnippet "latex" `parseThen` collectAsList "header-includes"
, "latex_header_extra" ~~>
metaExportSnippet "latex" `parseThen` collectAsList "header-includes"
-- link and macro
, "link" ~~> addLinkFormatter
, "macro" ~~> macroDefinition >>= updateState . registerMacro
-- pandoc-specific way to include references in the bibliography
, "nocite" ~~> lineOfInlines `parseThen` collectLines "nocite"
-- compact way to set export settings
, "options" ~~> exportSettings
-- pandoc-specific way to configure emphasis recognition
, "pandoc-emphasis-post" ~~> emphChars >>= updateState . setEmphasisPostChar
, "pandoc-emphasis-pre" ~~> emphChars >>= updateState . setEmphasisPreChar
-- result markers (ignored)
, "result" ~~> void anyLine
, "select_tags" ~~> tagList >>= updateState . setSelectedTags
, "seq_todo" ~~> todoSequence >>= updateState . registerTodoSequence
, "subtitle" ~~> lineOfInlines `parseThen` collectLines "subtitle"
, "title" ~~> lineOfInlines `parseThen` collectLines "title"
, "todo" ~~> todoSequence >>= updateState . registerTodoSequence
, "typ_todo" ~~> todoSequence >>= updateState . registerTodoSequence
]
parseThen :: PandocMonad m
@ -109,9 +131,6 @@ parseThen p modMeta = do
meta <- orgStateMeta <$> getState
updateState (\st -> st { orgStateMeta = modMeta <$> value <*> meta })
discard :: a -> Meta -> Meta
discard = const id
collectLines :: Text -> Inlines -> Meta -> Meta
collectLines key value meta =
let value' = appendValue meta (B.toList value)
@ -146,51 +165,25 @@ collectAsList key value meta =
Just x -> [x]
_ -> []
setField :: ToMetaValue a => Text -> a -> Meta -> Meta
setField field value meta = B.setMeta field (B.toMetaValue value) meta
-- | Read an format specific meta definition
metaExportSnippet :: Monad m => Text -> OrgParser m (F Inlines)
metaExportSnippet format = pure . B.rawInline format <$> anyLine
--
-- export options
--
optionLine :: PandocMonad m => OrgParser m ()
optionLine = try $ do
key <- metaKey
case key of
"link" -> parseLinkFormat >>= uncurry addLinkFormat
"options" -> exportSettings
"todo" -> todoSequence >>= updateState . registerTodoSequence
"seq_todo" -> todoSequence >>= updateState . registerTodoSequence
"typ_todo" -> todoSequence >>= updateState . registerTodoSequence
"macro" -> macroDefinition >>= updateState . registerMacro
"exclude_tags" -> tagList >>= updateState . setExcludedTags
"select_tags" -> tagList >>= updateState . setSelectedTags
"pandoc-emphasis-pre" -> emphChars >>= updateState . setEmphasisPreChar
"pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar
_ -> mzero
addLinkFormat :: Monad m => Text
-> (Text -> Text)
-> OrgParser m ()
addLinkFormat key formatter = updateState $ \s ->
let fs = orgStateLinkFormatters s
in s{ orgStateLinkFormatters = Map.insert key formatter fs }
parseLinkFormat :: Monad m => OrgParser m (Text, Text -> Text)
parseLinkFormat = try $ do
-- | Parse a link type definition (like @wp https://en.wikipedia.org/wiki/@).
addLinkFormatter :: Monad m => OrgParser m ()
addLinkFormatter = try $ do
linkType <- T.cons <$> letter <*> manyChar (alphaNum <|> oneOf "-_") <* skipSpaces
linkSubst <- parseFormat
return (linkType, linkSubst)
formatter <- parseFormat
updateState $ \s ->
let fs = orgStateLinkFormatters s
in s{ orgStateLinkFormatters = Map.insert linkType formatter fs }
-- | An ad-hoc, single-argument-only implementation of a printf-style format
-- parser.
parseFormat :: Monad m => OrgParser m (Text -> Text)
parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
where
-- inefficient, but who cares
-- inefficient
replacePlain = try $ (\x -> T.concat . flip intersperse x)
<$> sequence [tillSpecifier 's', rest]
replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack)
@ -229,6 +222,7 @@ setEmphasisPostChar csMb st =
let postChars = fromMaybe (orgStateEmphasisPostChars defaultOrgParserState) csMb
in st { orgStateEmphasisPostChars = postChars }
-- | Parses emphasis border character like @".,?!"@
emphChars :: Monad m => OrgParser m (Maybe [Char])
emphChars = do
skipSpaces
@ -239,16 +233,14 @@ lineOfInlines = do
updateLastPreCharPos
trimInlinesF . mconcat <$> manyTill inline newline
--
-- ToDo Sequences and Keywords
--
-- | Parses ToDo sequences / keywords like @TODO DOING | DONE@.
todoSequence :: Monad m => OrgParser m TodoSequence
todoSequence = try $ do
todoKws <- todoKeywords
doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
newline
-- There must be at least one DONE keyword. The last TODO keyword is taken if
-- necessary.
-- There must be at least one DONE keyword. The last TODO keyword is
-- taken if necessary.
case doneKws of
Just done -> return $ keywordsToSequence todoKws done
Nothing -> case reverse todoKws of

View file

@ -113,21 +113,21 @@ tests =
Pandoc (setMeta "institute" ("ACME Inc." :: Inlines) nullMeta) mempty
, testGroup "LaTeX"
[ "LaTeX_headers options are translated to header-includes" =:
[ "LATEX_HEADER" =:
"#+LaTeX_header: \\usepackage{tikz}" =?>
let latexInlines = rawInline "latex" "\\usepackage{tikz}"
inclList = MetaList [MetaInlines (toList latexInlines)]
meta = setMeta "header-includes" inclList nullMeta
in Pandoc meta mempty
, "LATEX_HEADER_EXTRA values are translated to header-includes" =:
, "LATEX_HEADER_EXTRA" =:
"#+LATEX_HEADER_EXTRA: \\usepackage{calc}" =?>
let latexInlines = rawInline "latex" "\\usepackage{calc}"
inclList = toMetaValue [latexInlines]
in Pandoc (setMeta "header-includes" inclList nullMeta) mempty
, testGroup "LaTeX_CLASS"
[ "LaTeX_class option is translated to documentclass" =:
[ "stored as documentclass" =:
"#+LATEX_CLASS: article" =?>
let meta = setMeta "documentclass" (MetaString "article") nullMeta
in Pandoc meta mempty
@ -140,7 +140,7 @@ tests =
in Pandoc meta mempty
]
, "LaTeX_class_options is translated to classoption" =:
, "LATEX_CLASS_OPTIONS as classoption" =:
"#+LATEX_CLASS_OPTIONS: [a4paper]" =?>
let meta = setMeta "classoption" (MetaString "a4paper") nullMeta
in Pandoc meta mempty
@ -166,6 +166,58 @@ tests =
]
]
, testGroup "Non-export keywords"
[ testGroup "#+LINK"
[ "Link abbreviation" =:
T.unlines [ "#+LINK: wp https://en.wikipedia.org/wiki/%s"
, "[[wp:Org_mode][Wikipedia on Org-mode]]"
] =?>
para (link "https://en.wikipedia.org/wiki/Org_mode" ""
("Wikipedia" <> space <> "on" <> space <> "Org-mode"))
, "Link abbreviation, defined after first use" =:
T.unlines [ "[[zl:non-sense][Non-sense articles]]"
, "#+LINK: zl http://zeitlens.com/tags/%s.html"
] =?>
para (link "http://zeitlens.com/tags/non-sense.html" ""
("Non-sense" <> space <> "articles"))
, "Link abbreviation, URL encoded arguments" =:
T.unlines [ "#+link: expl http://example.com/%h/foo"
, "[[expl:Hello, World!][Moin!]]"
] =?>
para (link "http://example.com/Hello%2C%20World%21/foo" "" "Moin!")
, "Link abbreviation, append arguments" =:
T.unlines [ "#+link: expl http://example.com/"
, "[[expl:foo][bar]]"
] =?>
para (link "http://example.com/foo" "" "bar")
]
, testGroup "emphasis config"
[ "Changing pre and post chars for emphasis" =:
T.unlines [ "#+pandoc-emphasis-pre: \"[)\""
, "#+pandoc-emphasis-post: \"]\\n\""
, "([/emph/])*foo*"
] =?>
para ("([" <> emph "emph" <> "])" <> strong "foo")
, "setting an invalid value restores the default" =:
T.unlines [ "#+pandoc-emphasis-pre: \"[\""
, "#+pandoc-emphasis-post: \"]\""
, "#+pandoc-emphasis-pre:"
, "#+pandoc-emphasis-post:"
, "[/noemph/]"
] =?>
para "[/noemph/]"
]
, "Unknown keyword" =:
"#+UNKNOWN_KEYWORD: Chumbawamba" =?>
Pandoc nullMeta mempty
]
, "Properties drawer" =:
T.unlines [ " :PROPERTIES:"
, " :setting: foo"
@ -220,48 +272,4 @@ tests =
] =?>
(para (spanWith ("link-here", [], []) mempty <> "Target.") <>
para (emph ("See" <> space <> "here!")))
, "Link abbreviation" =:
T.unlines [ "#+LINK: wp https://en.wikipedia.org/wiki/%s"
, "[[wp:Org_mode][Wikipedia on Org-mode]]"
] =?>
para (link "https://en.wikipedia.org/wiki/Org_mode" ""
("Wikipedia" <> space <> "on" <> space <> "Org-mode"))
, "Link abbreviation, defined after first use" =:
T.unlines [ "[[zl:non-sense][Non-sense articles]]"
, "#+LINK: zl http://zeitlens.com/tags/%s.html"
] =?>
para (link "http://zeitlens.com/tags/non-sense.html" ""
("Non-sense" <> space <> "articles"))
, "Link abbreviation, URL encoded arguments" =:
T.unlines [ "#+link: expl http://example.com/%h/foo"
, "[[expl:Hello, World!][Moin!]]"
] =?>
para (link "http://example.com/Hello%2C%20World%21/foo" "" "Moin!")
, "Link abbreviation, append arguments" =:
T.unlines [ "#+link: expl http://example.com/"
, "[[expl:foo][bar]]"
] =?>
para (link "http://example.com/foo" "" "bar")
, testGroup "emphasis config"
[ "Changing pre and post chars for emphasis" =:
T.unlines [ "#+pandoc-emphasis-pre: \"[)\""
, "#+pandoc-emphasis-post: \"]\\n\""
, "([/emph/])*foo*"
] =?>
para ("([" <> emph "emph" <> "])" <> strong "foo")
, "setting an invalid value restores the default" =:
T.unlines [ "#+pandoc-emphasis-pre: \"[\""
, "#+pandoc-emphasis-post: \"]\""
, "#+pandoc-emphasis-pre:"
, "#+pandoc-emphasis-post:"
, "[/noemph/]"
] =?>
para "[/noemph/]"
]
]