Merge branch 'org-meta-handling'

This commit is contained in:
Albert Krewinkel 2016-08-29 14:42:23 +02:00
commit a3a3e3fdbf
No known key found for this signature in database
GPG key ID: 388DC0B21F631124
6 changed files with 317 additions and 129 deletions

View file

@ -401,6 +401,7 @@ Library
Text.Pandoc.Readers.Org.Blocks,
Text.Pandoc.Readers.Org.ExportSettings,
Text.Pandoc.Readers.Org.Inlines,
Text.Pandoc.Readers.Org.Meta,
Text.Pandoc.Readers.Org.ParserState,
Text.Pandoc.Readers.Org.Parsing,
Text.Pandoc.Readers.Org.Shared,

View file

@ -34,8 +34,8 @@ module Text.Pandoc.Readers.Org.Blocks
) where
import Text.Pandoc.Readers.Org.BlockStarts
import Text.Pandoc.Readers.Org.ExportSettings ( exportSettings )
import Text.Pandoc.Readers.Org.Inlines
import Text.Pandoc.Readers.Org.Meta ( metaExport, metaLine )
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared
@ -52,9 +52,7 @@ import Text.Pandoc.Shared ( compactify', compactify'DL )
import Control.Monad ( foldM, guard, mzero, void )
import Data.Char ( isSpace, toLower, toUpper)
import Data.List ( foldl', intersperse, isPrefixOf )
import qualified Data.Map as M
import Data.Maybe ( fromMaybe, isNothing )
import Network.HTTP ( urlEncode )
--
-- Org headers
@ -232,8 +230,8 @@ blockList = do
-- | Get the meta information safed in the state.
meta :: OrgParser Meta
meta = do
st <- getState
return $ runF (orgStateMeta st) st
meta' <- metaExport
runF meta' <$> getState
blocks :: OrgParser (F Blocks)
blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof)
@ -631,67 +629,9 @@ exampleCode = B.codeBlockWith ("", ["example"], [])
specialLine :: OrgParser (F Blocks)
specialLine = fmap return . try $ metaLine <|> commentLine
-- The order, in which blocks are tried, makes sure that we're not looking at
-- the beginning of a block, so we don't need to check for it
metaLine :: OrgParser Blocks
metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
commentLine :: OrgParser Blocks
commentLine = commentLineStart *> anyLine *> pure mempty
declarationLine :: OrgParser ()
declarationLine = try $ do
key <- metaKey
value <- metaInlines
updateState $ \st ->
let meta' = B.setMeta key <$> value <*> pure nullMeta
in st { orgStateMeta = orgStateMeta st <> meta' }
metaInlines :: OrgParser (F MetaValue)
metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
metaKey :: OrgParser String
metaKey = map toLower <$> many1 (noneOf ": \n\r")
<* char ':'
<* skipSpaces
optionLine :: OrgParser ()
optionLine = try $ do
key <- metaKey
case key of
"link" -> parseLinkFormat >>= uncurry addLinkFormat
"options" -> exportSettings
_ -> mzero
addLinkFormat :: String
-> (String -> String)
-> OrgParser ()
addLinkFormat key formatter = updateState $ \s ->
let fs = orgStateLinkFormatters s
in s{ orgStateLinkFormatters = M.insert key formatter fs }
parseLinkFormat :: OrgParser ((String, String -> String))
parseLinkFormat = try $ do
linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
linkSubst <- parseFormat
return (linkType, linkSubst)
-- | An ad-hoc, single-argument-only implementation of a printf-style format
-- parser.
parseFormat :: OrgParser (String -> String)
parseFormat = try $ do
replacePlain <|> replaceUrl <|> justAppend
where
-- inefficient, but who cares
replacePlain = try $ (\x -> concat . flip intersperse x)
<$> sequence [tillSpecifier 's', rest]
replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
<$> sequence [tillSpecifier 'h', rest]
justAppend = try $ (++) <$> rest
rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
--
-- Tables
@ -868,9 +808,6 @@ paraOrPlain = try $ do
*> return (B.para <$> ils))
<|> (return (B.plain <$> ils))
inlinesTillNewline :: OrgParser (F Inlines)
inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
--
-- list blocks

View file

@ -54,13 +54,15 @@ exportSetting = choice
, ignoredSetting "<"
, ignoredSetting "\\n"
, archivedTreeSetting "arch" (\val es -> es { exportArchivedTrees = val })
, ignoredSetting "author"
, booleanSetting "author" (\val es -> es { exportWithAuthor = val })
, ignoredSetting "c"
, ignoredSetting "creator"
-- org-mode allows the special value `comment` for creator, which we'll
-- interpret as true as it doesn't make sense in the context of Pandoc.
, booleanSetting "creator" (\val es -> es { exportWithCreator = val })
, complementableListSetting "d" (\val es -> es { exportDrawers = val })
, ignoredSetting "date"
, ignoredSetting "e"
, ignoredSetting "email"
, booleanSetting "email" (\val es -> es { exportWithEmail = val })
, ignoredSetting "f"
, integerSetting "H" (\val es -> es { exportHeadlineLevels = val })
, ignoredSetting "inline"

View file

@ -0,0 +1,181 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TupleSections #-}
{-
Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Readers.Org.Meta
Copyright : Copyright (C) 2014-2016 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
Parsers for Org-mode meta declarations.
-}
module Text.Pandoc.Readers.Org.Meta
( metaLine
, metaExport
) where
import Text.Pandoc.Readers.Org.BlockStarts
import Text.Pandoc.Readers.Org.ExportSettings ( exportSettings )
import Text.Pandoc.Readers.Org.Inlines
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Blocks, Inlines )
import Text.Pandoc.Definition
import Text.Pandoc.Compat.Monoid ((<>))
import Control.Monad ( mzero )
import Data.Char ( toLower )
import Data.List ( intersperse )
import qualified Data.Map as M
import Network.HTTP ( urlEncode )
-- | Returns the current meta, respecting export options.
metaExport :: OrgParser (F Meta)
metaExport = do
st <- getState
let settings = orgStateExportSettings st
return $ (if exportWithAuthor settings then id else removeMeta "author")
. (if exportWithCreator settings then id else removeMeta "creator")
. (if exportWithEmail settings then id else removeMeta "email")
<$> orgStateMeta st
removeMeta :: String -> Meta -> Meta
removeMeta key meta' =
let metaMap = unMeta meta'
in Meta $ M.delete key metaMap
-- | Parse and handle a single line containing meta information
-- The order, in which blocks are tried, makes sure that we're not looking at
-- the beginning of a block, so we don't need to check for it
metaLine :: OrgParser Blocks
metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
declarationLine :: OrgParser ()
declarationLine = try $ do
key <- map toLower <$> metaKey
(key', value) <- metaValue key
updateState $ \st ->
let meta' = B.setMeta key' <$> value <*> pure nullMeta
in st { orgStateMeta = meta' <> orgStateMeta st }
metaKey :: OrgParser String
metaKey = map toLower <$> many1 (noneOf ": \n\r")
<* char ':'
<* skipSpaces
metaValue :: String -> OrgParser (String, (F MetaValue))
metaValue key =
let inclKey = "header-includes"
in case key of
"author" -> (key,) <$> metaInlinesCommaSeparated
"title" -> (key,) <$> metaInlines
"date" -> (key,) <$> metaInlines
"header-includes" -> (key,) <$> accumulatingList key metaInlines
"latex_header" -> (inclKey,) <$>
accumulatingList inclKey (metaExportSnippet "latex")
"latex_class" -> ("documentclass",) <$> metaString
-- Org-mode expects class options to contain the surrounding brackets,
-- pandoc does not.
"latex_class_options" -> ("classoption",) <$>
metaModifiedString (filter (`notElem` "[]"))
"html_head" -> (inclKey,) <$>
accumulatingList inclKey (metaExportSnippet "html")
_ -> (key,) <$> metaString
metaInlines :: OrgParser (F MetaValue)
metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
metaInlinesCommaSeparated :: OrgParser (F MetaValue)
metaInlinesCommaSeparated = do
authStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',')
newline
authors <- mapM (parseFromString inlinesTillNewline . (++ "\n")) authStrs
let toMetaInlines = MetaInlines . B.toList
return $ MetaList . map toMetaInlines <$> sequence authors
metaString :: OrgParser (F MetaValue)
metaString = metaModifiedString id
metaModifiedString :: (String -> String) -> OrgParser (F MetaValue)
metaModifiedString f = return . MetaString . f <$> anyLine
-- | Read an format specific meta definition
metaExportSnippet :: String -> OrgParser (F MetaValue)
metaExportSnippet format =
return . MetaInlines . B.toList . B.rawInline format <$> anyLine
-- | Accumulate the result of the @parser@ in a list under @key@.
accumulatingList :: String
-> OrgParser (F MetaValue)
-> OrgParser (F MetaValue)
accumulatingList key p = do
value <- p
meta' <- orgStateMeta <$> getState
return $ (\m v -> MetaList (curList m ++ [v])) <$> meta' <*> value
where curList m = case lookupMeta key m of
Just (MetaList ms) -> ms
Just x -> [x]
_ -> []
--
-- export options
--
optionLine :: OrgParser ()
optionLine = try $ do
key <- metaKey
case key of
"link" -> parseLinkFormat >>= uncurry addLinkFormat
"options" -> exportSettings
_ -> mzero
addLinkFormat :: String
-> (String -> String)
-> OrgParser ()
addLinkFormat key formatter = updateState $ \s ->
let fs = orgStateLinkFormatters s
in s{ orgStateLinkFormatters = M.insert key formatter fs }
parseLinkFormat :: OrgParser ((String, String -> String))
parseLinkFormat = try $ do
linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
linkSubst <- parseFormat
return (linkType, linkSubst)
-- | An ad-hoc, single-argument-only implementation of a printf-style format
-- parser.
parseFormat :: OrgParser (String -> String)
parseFormat = try $ do
replacePlain <|> replaceUrl <|> justAppend
where
-- inefficient, but who cares
replacePlain = try $ (\x -> concat . flip intersperse x)
<$> sequence [tillSpecifier 's', rest]
replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
<$> sequence [tillSpecifier 'h', rest]
justAppend = try $ (++) <$> rest
rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
inlinesTillNewline :: OrgParser (F Inlines)
inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline

View file

@ -163,6 +163,9 @@ data ExportSettings = ExportSettings
, exportSmartQuotes :: Bool -- ^ Parse quotes smartly
, exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly
, exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts
, exportWithAuthor :: Bool -- ^ Include author in final meta-data
, exportWithCreator :: Bool -- ^ Include creator in final meta-data
, exportWithEmail :: Bool -- ^ Include email in final meta-data
}
instance Default ExportSettings where
@ -177,6 +180,9 @@ defaultExportSettings = ExportSettings
, exportSmartQuotes = True
, exportSpecialStrings = True
, exportSubSuperscripts = True
, exportWithAuthor = True
, exportWithCreator = True
, exportWithEmail = True
}

View file

@ -467,7 +467,14 @@ tests =
, "Author" =:
"#+author: Albert /Emacs-Fanboy/ Krewinkel" =?>
let author = toList . spcSep $ [ "Albert", emph "Emacs-Fanboy", "Krewinkel" ]
meta = setMeta "author" (MetaInlines author) $ nullMeta
meta = setMeta "author" (MetaList [MetaInlines author]) $ nullMeta
in Pandoc meta mempty
, "Multiple authors" =:
"#+author: James Dewey Watson, Francis Harry Compton Crick " =?>
let watson = MetaInlines $ toList "James Dewey Watson"
crick = MetaInlines $ toList "Francis Harry Compton Crick"
meta = setMeta "author" (MetaList [watson, crick]) $ nullMeta
in Pandoc meta mempty
, "Date" =:
@ -478,8 +485,8 @@ tests =
, "Description" =:
"#+DESCRIPTION: Explanatory text" =?>
let description = toList . spcSep $ [ "Explanatory", "text" ]
meta = setMeta "description" (MetaInlines description) $ nullMeta
let description = "Explanatory text"
meta = setMeta "description" (MetaString description) $ nullMeta
in Pandoc meta mempty
, "Properties drawer" =:
@ -489,6 +496,38 @@ tests =
] =?>
(mempty::Blocks)
, "LaTeX_headers options are translated to header-includes" =:
"#+LaTeX_header: \\usepackage{tikz}" =?>
let latexInlines = rawInline "latex" "\\usepackage{tikz}"
inclList = MetaList [MetaInlines (toList latexInlines)]
meta = setMeta "header-includes" inclList nullMeta
in Pandoc meta mempty
, "LaTeX_class option is translated to documentclass" =:
"#+LATEX_CLASS: article" =?>
let meta = setMeta "documentclass" (MetaString "article") nullMeta
in Pandoc meta mempty
, "LaTeX_class_options is translated to classoption" =:
"#+LATEX_CLASS_OPTIONS: [a4paper]" =?>
let meta = setMeta "classoption" (MetaString "a4paper") nullMeta
in Pandoc meta mempty
, "LaTeX_class_options is translated to classoption" =:
"#+html_head: <meta/>" =?>
let html = rawInline "html" "<meta/>"
inclList = MetaList [MetaInlines (toList html)]
meta = setMeta "header-includes" inclList nullMeta
in Pandoc meta mempty
, "later meta definitions take precedence" =:
unlines [ "#+AUTHOR: this will not be used"
, "#+author: Max"
] =?>
let author = MetaInlines [Str "Max"]
meta = setMeta "author" (MetaList [author]) $ nullMeta
in Pandoc meta mempty
, "Logbook drawer" =:
unlines [ " :LogBook:"
, " - State \"DONE\" from \"TODO\" [2014-03-03 Mon 11:00]"
@ -563,69 +602,91 @@ tests =
] =?>
(para (link "http://example.com/foo" "" "bar"))
, "Export option: Disable simple sub/superscript syntax" =:
unlines [ "#+OPTIONS: ^:nil"
, "a^b"
] =?>
para "a^b"
, "Export option: directly select drawers to be exported" =:
unlines [ "#+OPTIONS: d:(\"IMPORTANT\")"
, ":IMPORTANT:"
, "23"
, ":END:"
, ":BORING:"
, "very boring"
, ":END:"
] =?>
divWith (mempty, ["IMPORTANT", "drawer"], mempty) (para "23")
, testGroup "export options"
, "Export option: exclude drawers from being exported" =:
unlines [ "#+OPTIONS: d:(not \"BORING\")"
, ":IMPORTANT:"
, "5"
, ":END:"
, ":BORING:"
, "very boring"
, ":END:"
] =?>
divWith (mempty, ["IMPORTANT", "drawer"], mempty) (para "5")
[ "disable simple sub/superscript syntax" =:
unlines [ "#+OPTIONS: ^:nil"
, "a^b"
] =?>
para "a^b"
, "Export option: don't include archive trees" =:
unlines [ "#+OPTIONS: arch:nil"
, "* old :ARCHIVE:"
] =?>
(mempty ::Blocks)
, "directly select drawers to be exported" =:
unlines [ "#+OPTIONS: d:(\"IMPORTANT\")"
, ":IMPORTANT:"
, "23"
, ":END:"
, ":BORING:"
, "very boring"
, ":END:"
] =?>
divWith (mempty, ["IMPORTANT", "drawer"], mempty) (para "23")
, "Export option: include complete archive trees" =:
unlines [ "#+OPTIONS: arch:t"
, "* old :ARCHIVE:"
, " boring"
] =?>
let tagSpan t = spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
in mconcat [ headerWith ("old", [], mempty) 1 ("old" <> tagSpan "ARCHIVE")
, para "boring"
]
, "exclude drawers from being exported" =:
unlines [ "#+OPTIONS: d:(not \"BORING\")"
, ":IMPORTANT:"
, "5"
, ":END:"
, ":BORING:"
, "very boring"
, ":END:"
] =?>
divWith (mempty, ["IMPORTANT", "drawer"], mempty) (para "5")
, "Export option: include archive tree header only" =:
unlines [ "#+OPTIONS: arch:headline"
, "* old :ARCHIVE:"
, " boring"
] =?>
let tagSpan t = spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
in headerWith ("old", [], mempty) 1 ("old" <> tagSpan "ARCHIVE")
, "don't include archive trees" =:
unlines [ "#+OPTIONS: arch:nil"
, "* old :ARCHIVE:"
] =?>
(mempty ::Blocks)
, "Export option: limit headline depth" =:
unlines [ "#+OPTIONS: H:2"
, "* section"
, "** subsection"
, "*** list item 1"
, "*** list item 2"
] =?>
mconcat [ headerWith ("section", [], []) 1 "section"
, headerWith ("subsection", [], []) 2 "subsection"
, orderedList [ para "list item 1", para "list item 2" ]
]
, "include complete archive trees" =:
unlines [ "#+OPTIONS: arch:t"
, "* old :ARCHIVE:"
, " boring"
] =?>
let tagSpan t = spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
in mconcat [ headerWith ("old", [], mempty) 1 ("old" <> tagSpan "ARCHIVE")
, para "boring"
]
, "include archive tree header only" =:
unlines [ "#+OPTIONS: arch:headline"
, "* old :ARCHIVE:"
, " boring"
] =?>
let tagSpan t = spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
in headerWith ("old", [], mempty) 1 ("old" <> tagSpan "ARCHIVE")
, "limit headline depth" =:
unlines [ "#+OPTIONS: H:2"
, "* section"
, "** subsection"
, "*** list item 1"
, "*** list item 2"
] =?>
mconcat [ headerWith ("section", [], []) 1 "section"
, headerWith ("subsection", [], []) 2 "subsection"
, orderedList [ para "list item 1", para "list item 2" ]
]
, "disable author export" =:
unlines [ "#+OPTIONS: author:nil"
, "#+AUTHOR: ShyGuy"
] =?>
Pandoc nullMeta mempty
, "disable creator export" =:
unlines [ "#+OPTIONS: creator:nil"
, "#+creator: The Architect"
] =?>
Pandoc nullMeta mempty
, "disable email export" =:
unlines [ "#+OPTIONS: email:nil"
, "#+email: no-mail-please@example.com"
] =?>
Pandoc nullMeta mempty
]
]
, testGroup "Basic Blocks" $