From bed5f700ceb91365018a4de6afea8a7c331688ae Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 29 Aug 2016 14:10:51 +0200 Subject: [PATCH 01/12] Org reader: extract meta parsing code to module Parsing of meta-data is well separable from other block parsing tasks. Moving into new module to get small files and clearly arranged code. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/Org/Blocks.hs | 65 +-------------- src/Text/Pandoc/Readers/Org/Meta.hs | 110 ++++++++++++++++++++++++++ 3 files changed, 112 insertions(+), 64 deletions(-) create mode 100644 src/Text/Pandoc/Readers/Org/Meta.hs diff --git a/pandoc.cabal b/pandoc.cabal index 1db7ded0b..afe41fb9b 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -401,6 +401,7 @@ Library Text.Pandoc.Readers.Org.Blocks, Text.Pandoc.Readers.Org.ExportSettings, Text.Pandoc.Readers.Org.Inlines, + Text.Pandoc.Readers.Org.Meta, Text.Pandoc.Readers.Org.ParserState, Text.Pandoc.Readers.Org.Parsing, Text.Pandoc.Readers.Org.Shared, diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 6a8bb8b28..b955dafa7 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -34,8 +34,8 @@ module Text.Pandoc.Readers.Org.Blocks ) where import Text.Pandoc.Readers.Org.BlockStarts -import Text.Pandoc.Readers.Org.ExportSettings ( exportSettings ) import Text.Pandoc.Readers.Org.Inlines +import Text.Pandoc.Readers.Org.Meta ( metaLine ) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import Text.Pandoc.Readers.Org.Shared @@ -52,9 +52,7 @@ import Text.Pandoc.Shared ( compactify', compactify'DL ) import Control.Monad ( foldM, guard, mzero, void ) import Data.Char ( isSpace, toLower, toUpper) import Data.List ( foldl', intersperse, isPrefixOf ) -import qualified Data.Map as M import Data.Maybe ( fromMaybe, isNothing ) -import Network.HTTP ( urlEncode ) -- -- Org headers @@ -631,67 +629,9 @@ exampleCode = B.codeBlockWith ("", ["example"], []) specialLine :: OrgParser (F Blocks) specialLine = fmap return . try $ metaLine <|> commentLine --- The order, in which blocks are tried, makes sure that we're not looking at --- the beginning of a block, so we don't need to check for it -metaLine :: OrgParser Blocks -metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) - commentLine :: OrgParser Blocks commentLine = commentLineStart *> anyLine *> pure mempty -declarationLine :: OrgParser () -declarationLine = try $ do - key <- metaKey - value <- metaInlines - updateState $ \st -> - let meta' = B.setMeta key <$> value <*> pure nullMeta - in st { orgStateMeta = orgStateMeta st <> meta' } - -metaInlines :: OrgParser (F MetaValue) -metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline - -metaKey :: OrgParser String -metaKey = map toLower <$> many1 (noneOf ": \n\r") - <* char ':' - <* skipSpaces - -optionLine :: OrgParser () -optionLine = try $ do - key <- metaKey - case key of - "link" -> parseLinkFormat >>= uncurry addLinkFormat - "options" -> exportSettings - _ -> mzero - -addLinkFormat :: String - -> (String -> String) - -> OrgParser () -addLinkFormat key formatter = updateState $ \s -> - let fs = orgStateLinkFormatters s - in s{ orgStateLinkFormatters = M.insert key formatter fs } - -parseLinkFormat :: OrgParser ((String, String -> String)) -parseLinkFormat = try $ do - linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces - linkSubst <- parseFormat - return (linkType, linkSubst) - --- | An ad-hoc, single-argument-only implementation of a printf-style format --- parser. -parseFormat :: OrgParser (String -> String) -parseFormat = try $ do - replacePlain <|> replaceUrl <|> justAppend - where - -- inefficient, but who cares - replacePlain = try $ (\x -> concat . flip intersperse x) - <$> sequence [tillSpecifier 's', rest] - replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) - <$> sequence [tillSpecifier 'h', rest] - justAppend = try $ (++) <$> rest - - rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") - tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) - -- -- Tables @@ -868,9 +808,6 @@ paraOrPlain = try $ do *> return (B.para <$> ils)) <|> (return (B.plain <$> ils)) -inlinesTillNewline :: OrgParser (F Inlines) -inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline - -- -- list blocks diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs new file mode 100644 index 000000000..e61947d43 --- /dev/null +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -0,0 +1,110 @@ +{-# LANGUAGE FlexibleContexts #-} +{- +Copyright (C) 2014-2016 Albert Krewinkel + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Org.Meta + Copyright : Copyright (C) 2014-2016 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel + +Parsers for Org-mode meta declarations. +-} +module Text.Pandoc.Readers.Org.Meta + ( metaLine + ) where + +import Text.Pandoc.Readers.Org.BlockStarts +import Text.Pandoc.Readers.Org.ExportSettings ( exportSettings ) +import Text.Pandoc.Readers.Org.Inlines +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.Parsing + +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder ( Blocks, Inlines ) +import Text.Pandoc.Definition +import Text.Pandoc.Compat.Monoid ((<>)) + +import Control.Monad ( mzero ) +import Data.Char ( toLower ) +import Data.List ( intersperse ) +import qualified Data.Map as M +import Network.HTTP ( urlEncode ) + +-- | Parse and handle a single line containing meta information +-- The order, in which blocks are tried, makes sure that we're not looking at +-- the beginning of a block, so we don't need to check for it +metaLine :: OrgParser Blocks +metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) + +declarationLine :: OrgParser () +declarationLine = try $ do + key <- metaKey + value <- metaInlines + updateState $ \st -> + let meta' = B.setMeta key <$> value <*> pure nullMeta + in st { orgStateMeta = orgStateMeta st <> meta' } + +metaInlines :: OrgParser (F MetaValue) +metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline + +metaKey :: OrgParser String +metaKey = map toLower <$> many1 (noneOf ": \n\r") + <* char ':' + <* skipSpaces + +optionLine :: OrgParser () +optionLine = try $ do + key <- metaKey + case key of + "link" -> parseLinkFormat >>= uncurry addLinkFormat + "options" -> exportSettings + _ -> mzero + +addLinkFormat :: String + -> (String -> String) + -> OrgParser () +addLinkFormat key formatter = updateState $ \s -> + let fs = orgStateLinkFormatters s + in s{ orgStateLinkFormatters = M.insert key formatter fs } + +parseLinkFormat :: OrgParser ((String, String -> String)) +parseLinkFormat = try $ do + linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces + linkSubst <- parseFormat + return (linkType, linkSubst) + +-- | An ad-hoc, single-argument-only implementation of a printf-style format +-- parser. +parseFormat :: OrgParser (String -> String) +parseFormat = try $ do + replacePlain <|> replaceUrl <|> justAppend + where + -- inefficient, but who cares + replacePlain = try $ (\x -> concat . flip intersperse x) + <$> sequence [tillSpecifier 's', rest] + replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) + <$> sequence [tillSpecifier 'h', rest] + justAppend = try $ (++) <$> rest + + rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") + tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) + +inlinesTillNewline :: OrgParser (F Inlines) +inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline From 153970bef5068f5a82943cc7a2bec79f04d31ae9 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 29 Aug 2016 14:10:56 +0200 Subject: [PATCH 02/12] Org reader: read markup only for special meta keys Most meta-keys should be read as normal string values, only a few are interpreted as marked-up text. --- src/Text/Pandoc/Readers/Org/Meta.hs | 25 ++++++++++++++++++++----- tests/Tests/Readers/Org.hs | 4 ++-- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index e61947d43..8f0b9f6b5 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -55,20 +55,35 @@ metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) declarationLine :: OrgParser () declarationLine = try $ do - key <- metaKey - value <- metaInlines + key <- map toLower <$> metaKey + value <- metaValue key updateState $ \st -> let meta' = B.setMeta key <$> value <*> pure nullMeta in st { orgStateMeta = orgStateMeta st <> meta' } -metaInlines :: OrgParser (F MetaValue) -metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline - metaKey :: OrgParser String metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces +metaValue :: String -> OrgParser (F MetaValue) +metaValue key = do + case key of + "author" -> metaInlines + "title" -> metaInlines + "date" -> metaInlines + _ -> metaString + +metaInlines :: OrgParser (F MetaValue) +metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline + +metaString :: OrgParser (F MetaValue) +metaString = return . MetaString <$> anyLine + + +-- +-- export options +-- optionLine :: OrgParser () optionLine = try $ do key <- metaKey diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 0a3f9c222..61c222919 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -478,8 +478,8 @@ tests = , "Description" =: "#+DESCRIPTION: Explanatory text" =?> - let description = toList . spcSep $ [ "Explanatory", "text" ] - meta = setMeta "description" (MetaInlines description) $ nullMeta + let description = "Explanatory text" + meta = setMeta "description" (MetaString description) $ nullMeta in Pandoc meta mempty , "Properties drawer" =: From 2ca2585b3569bd14923795f3023bd0789fe7911f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 29 Aug 2016 14:10:57 +0200 Subject: [PATCH 03/12] Org reader: allow multiple, comma-separated authors Multiple authors can be specified in the `#+AUTHOR` meta line if they are given as a comma-separated list. --- src/Text/Pandoc/Readers/Org/Meta.hs | 10 +++++++++- tests/Tests/Readers/Org.hs | 9 ++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 8f0b9f6b5..372b19fb6 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -69,7 +69,7 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") metaValue :: String -> OrgParser (F MetaValue) metaValue key = do case key of - "author" -> metaInlines + "author" -> metaInlinesCommaSeparated "title" -> metaInlines "date" -> metaInlines _ -> metaString @@ -77,6 +77,14 @@ metaValue key = do metaInlines :: OrgParser (F MetaValue) metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline +metaInlinesCommaSeparated :: OrgParser (F MetaValue) +metaInlinesCommaSeparated = do + authStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',') + newline + authors <- mapM (parseFromString inlinesTillNewline . (++ "\n")) authStrs + let toMetaInlines = MetaInlines . B.toList + return $ MetaList . map toMetaInlines <$> sequence authors + metaString :: OrgParser (F MetaValue) metaString = return . MetaString <$> anyLine diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 61c222919..844266401 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -467,7 +467,14 @@ tests = , "Author" =: "#+author: Albert /Emacs-Fanboy/ Krewinkel" =?> let author = toList . spcSep $ [ "Albert", emph "Emacs-Fanboy", "Krewinkel" ] - meta = setMeta "author" (MetaInlines author) $ nullMeta + meta = setMeta "author" (MetaList [MetaInlines author]) $ nullMeta + in Pandoc meta mempty + + , "Multiple authors" =: + "#+author: James Dewey Watson, Francis Harry Compton Crick " =?> + let watson = MetaInlines $ toList "James Dewey Watson" + crick = MetaInlines $ toList "Francis Harry Compton Crick" + meta = setMeta "author" (MetaList [watson, crick]) $ nullMeta in Pandoc meta mempty , "Date" =: From 75df1042157e271398e880e64ce95bd83c5d2193 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 29 Aug 2016 14:10:57 +0200 Subject: [PATCH 04/12] Org reader: give precedence to later meta lines The last meta-line of any given type is the significant line. Previously the value of the first line was kept, even if more lines of the same type were encounterd. --- src/Text/Pandoc/Readers/Org/Meta.hs | 2 +- tests/Tests/Readers/Org.hs | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 372b19fb6..91d16fc63 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -59,7 +59,7 @@ declarationLine = try $ do value <- metaValue key updateState $ \st -> let meta' = B.setMeta key <$> value <*> pure nullMeta - in st { orgStateMeta = orgStateMeta st <> meta' } + in st { orgStateMeta = meta' <> orgStateMeta st } metaKey :: OrgParser String metaKey = map toLower <$> many1 (noneOf ": \n\r") diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 844266401..5bb291d45 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -496,6 +496,14 @@ tests = ] =?> (mempty::Blocks) + , "later meta definitions take precedence" =: + unlines [ "#+AUTHOR: this will not be used" + , "#+author: Max" + ] =?> + let author = MetaInlines [Str "Max"] + meta = setMeta "author" (MetaList [author]) $ nullMeta + in Pandoc meta mempty + , "Logbook drawer" =: unlines [ " :LogBook:" , " - State \"DONE\" from \"TODO\" [2014-03-03 Mon 11:00]" From a2574883432c2375661caa4bee19a48967cf49db Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 29 Aug 2016 14:10:57 +0200 Subject: [PATCH 05/12] Org reader: read LaTeX_header as header-includes LaTeX-specific header commands can be defined in `#+LaTeX_header` lines. They are parsed as format-specific inlines to ensure that they will only show up in LaTeX output. --- src/Text/Pandoc/Readers/Org/Meta.hs | 40 ++++++++++++++++++++++------- tests/Tests/Readers/Org.hs | 7 +++++ 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 91d16fc63..988a18981 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -1,4 +1,5 @@ {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TupleSections #-} {- Copyright (C) 2014-2016 Albert Krewinkel @@ -56,9 +57,9 @@ metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) declarationLine :: OrgParser () declarationLine = try $ do key <- map toLower <$> metaKey - value <- metaValue key + (key', value) <- metaValue key updateState $ \st -> - let meta' = B.setMeta key <$> value <*> pure nullMeta + let meta' = B.setMeta key' <$> value <*> pure nullMeta in st { orgStateMeta = meta' <> orgStateMeta st } metaKey :: OrgParser String @@ -66,13 +67,17 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces -metaValue :: String -> OrgParser (F MetaValue) -metaValue key = do - case key of - "author" -> metaInlinesCommaSeparated - "title" -> metaInlines - "date" -> metaInlines - _ -> metaString +metaValue :: String -> OrgParser (String, (F MetaValue)) +metaValue key = + let inclKey = "header-includes" + in case key of + "author" -> (key,) <$> metaInlinesCommaSeparated + "title" -> (key,) <$> metaInlines + "date" -> (key,) <$> metaInlines + "header-includes" -> (key,) <$> accumulatingList key metaInlines + "latex_header" -> (inclKey,) <$> + accumulatingList inclKey (metaExportSnippet "latex") + _ -> (key,) <$> metaString metaInlines :: OrgParser (F MetaValue) metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline @@ -88,6 +93,23 @@ metaInlinesCommaSeparated = do metaString :: OrgParser (F MetaValue) metaString = return . MetaString <$> anyLine +-- | Read an format specific meta definition +metaExportSnippet :: String -> OrgParser (F MetaValue) +metaExportSnippet format = + return . MetaInlines . B.toList . B.rawInline format <$> anyLine + +-- | Accumulate the result of the @parser@ in a list under @key@. +accumulatingList :: String + -> OrgParser (F MetaValue) + -> OrgParser (F MetaValue) +accumulatingList key p = do + value <- p + meta' <- orgStateMeta <$> getState + return $ (\m v -> MetaList (curList m ++ [v])) <$> meta' <*> value + where curList m = case lookupMeta key m of + Just (MetaList ms) -> ms + Just x -> [x] + _ -> [] -- -- export options diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 5bb291d45..a3f6f73e4 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -496,6 +496,13 @@ tests = ] =?> (mempty::Blocks) + , "LaTeX_headers options are translated to header-includes" =: + "#+LaTeX_header: \\usepackage{tikz}" =?> + let latexInlines = rawInline "latex" "\\usepackage{tikz}" + inclList = MetaList [MetaInlines (toList latexInlines)] + meta = setMeta "header-includes" inclList nullMeta + in Pandoc meta mempty + , "later meta definitions take precedence" =: unlines [ "#+AUTHOR: this will not be used" , "#+author: Max" From 825ce8ca73073db3a1bf0db1ece9fe0344a2e8ab Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 29 Aug 2016 14:10:57 +0200 Subject: [PATCH 06/12] Org reader: set documentclass meta from LaTeX_class --- src/Text/Pandoc/Readers/Org/Meta.hs | 1 + tests/Tests/Readers/Org.hs | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 988a18981..213e417dd 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -77,6 +77,7 @@ metaValue key = "header-includes" -> (key,) <$> accumulatingList key metaInlines "latex_header" -> (inclKey,) <$> accumulatingList inclKey (metaExportSnippet "latex") + "latex_class" -> ("documentclass",) <$> metaString _ -> (key,) <$> metaString metaInlines :: OrgParser (F MetaValue) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index a3f6f73e4..2af019469 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -503,6 +503,11 @@ tests = meta = setMeta "header-includes" inclList nullMeta in Pandoc meta mempty + , "LaTeX_class option is translated to documentclass" =: + "#+LATEX_CLASS: article" =?> + let meta = setMeta "documentclass" (MetaString "article") nullMeta + in Pandoc meta mempty + , "later meta definitions take precedence" =: unlines [ "#+AUTHOR: this will not be used" , "#+author: Max" From d164ead37900a186acad44bb244f9268d3e3e91d Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 29 Aug 2016 14:10:57 +0200 Subject: [PATCH 07/12] Org reader: set classoption meta from LaTeX_class_options --- src/Text/Pandoc/Readers/Org/Meta.hs | 9 ++++++++- tests/Tests/Readers/Org.hs | 5 +++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 213e417dd..ea3ec51c3 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -78,6 +78,10 @@ metaValue key = "latex_header" -> (inclKey,) <$> accumulatingList inclKey (metaExportSnippet "latex") "latex_class" -> ("documentclass",) <$> metaString + -- Org-mode expects class options to contain the surrounding brackets, + -- pandoc does not. + "latex_class_options" -> ("classoption",) <$> + metaModifiedString (filter (`notElem` "[]")) _ -> (key,) <$> metaString metaInlines :: OrgParser (F MetaValue) @@ -92,7 +96,10 @@ metaInlinesCommaSeparated = do return $ MetaList . map toMetaInlines <$> sequence authors metaString :: OrgParser (F MetaValue) -metaString = return . MetaString <$> anyLine +metaString = metaModifiedString id + +metaModifiedString :: (String -> String) -> OrgParser (F MetaValue) +metaModifiedString f = return . MetaString . f <$> anyLine -- | Read an format specific meta definition metaExportSnippet :: String -> OrgParser (F MetaValue) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 2af019469..534990876 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -508,6 +508,11 @@ tests = let meta = setMeta "documentclass" (MetaString "article") nullMeta in Pandoc meta mempty + , "LaTeX_class_options is translated to classoption" =: + "#+LATEX_CLASS_OPTIONS: [a4paper]" =?> + let meta = setMeta "classoption" (MetaString "a4paper") nullMeta + in Pandoc meta mempty + , "later meta definitions take precedence" =: unlines [ "#+AUTHOR: this will not be used" , "#+author: Max" From 28d17ea70fee316576cf28525e9b5ad15c62cf9d Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 29 Aug 2016 14:10:57 +0200 Subject: [PATCH 08/12] Org reader: read HTML_head as header-includes HTML-specific head content can be defined in `#+HTML_head` lines. They are parsed as format-specific inlines to ensure that they will only show up in HTML output. --- src/Text/Pandoc/Readers/Org/Meta.hs | 2 ++ tests/Tests/Readers/Org.hs | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index ea3ec51c3..51fd2c9d8 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -82,6 +82,8 @@ metaValue key = -- pandoc does not. "latex_class_options" -> ("classoption",) <$> metaModifiedString (filter (`notElem` "[]")) + "html_head" -> (inclKey,) <$> + accumulatingList inclKey (metaExportSnippet "html") _ -> (key,) <$> metaString metaInlines :: OrgParser (F MetaValue) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 534990876..524bed109 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -513,6 +513,13 @@ tests = let meta = setMeta "classoption" (MetaString "a4paper") nullMeta in Pandoc meta mempty + , "LaTeX_class_options is translated to classoption" =: + "#+html_head: " =?> + let html = rawInline "html" "" + inclList = MetaList [MetaInlines (toList html)] + meta = setMeta "header-includes" inclList nullMeta + in Pandoc meta mempty + , "later meta definitions take precedence" =: unlines [ "#+AUTHOR: this will not be used" , "#+author: Max" From ad625782b170f4cbbef206bc6ea736c082cb38d7 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 29 Aug 2016 14:10:57 +0200 Subject: [PATCH 09/12] Put Org reader export option tests into test group Using a separate test group instead of prefixing the test subject should be clearer than the current approach. --- tests/Tests/Readers/Org.hs | 118 +++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 57 deletions(-) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 524bed109..9979dc8ec 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -602,69 +602,73 @@ tests = ] =?> (para (link "http://example.com/foo" "" "bar")) - , "Export option: Disable simple sub/superscript syntax" =: - unlines [ "#+OPTIONS: ^:nil" - , "a^b" - ] =?> - para "a^b" - , "Export option: directly select drawers to be exported" =: - unlines [ "#+OPTIONS: d:(\"IMPORTANT\")" - , ":IMPORTANT:" - , "23" - , ":END:" - , ":BORING:" - , "very boring" - , ":END:" - ] =?> - divWith (mempty, ["IMPORTANT", "drawer"], mempty) (para "23") + , testGroup "export options" - , "Export option: exclude drawers from being exported" =: - unlines [ "#+OPTIONS: d:(not \"BORING\")" - , ":IMPORTANT:" - , "5" - , ":END:" - , ":BORING:" - , "very boring" - , ":END:" - ] =?> - divWith (mempty, ["IMPORTANT", "drawer"], mempty) (para "5") + [ "disable simple sub/superscript syntax" =: + unlines [ "#+OPTIONS: ^:nil" + , "a^b" + ] =?> + para "a^b" - , "Export option: don't include archive trees" =: - unlines [ "#+OPTIONS: arch:nil" - , "* old :ARCHIVE:" - ] =?> - (mempty ::Blocks) + , "directly select drawers to be exported" =: + unlines [ "#+OPTIONS: d:(\"IMPORTANT\")" + , ":IMPORTANT:" + , "23" + , ":END:" + , ":BORING:" + , "very boring" + , ":END:" + ] =?> + divWith (mempty, ["IMPORTANT", "drawer"], mempty) (para "23") - , "Export option: include complete archive trees" =: - unlines [ "#+OPTIONS: arch:t" - , "* old :ARCHIVE:" - , " boring" - ] =?> - let tagSpan t = spanWith ("", ["tag"], [("data-tag-name", t)]) mempty - in mconcat [ headerWith ("old", [], mempty) 1 ("old" <> tagSpan "ARCHIVE") - , para "boring" - ] + , "exclude drawers from being exported" =: + unlines [ "#+OPTIONS: d:(not \"BORING\")" + , ":IMPORTANT:" + , "5" + , ":END:" + , ":BORING:" + , "very boring" + , ":END:" + ] =?> + divWith (mempty, ["IMPORTANT", "drawer"], mempty) (para "5") - , "Export option: include archive tree header only" =: - unlines [ "#+OPTIONS: arch:headline" - , "* old :ARCHIVE:" - , " boring" - ] =?> - let tagSpan t = spanWith ("", ["tag"], [("data-tag-name", t)]) mempty - in headerWith ("old", [], mempty) 1 ("old" <> tagSpan "ARCHIVE") + , "don't include archive trees" =: + unlines [ "#+OPTIONS: arch:nil" + , "* old :ARCHIVE:" + ] =?> + (mempty ::Blocks) - , "Export option: limit headline depth" =: - unlines [ "#+OPTIONS: H:2" - , "* section" - , "** subsection" - , "*** list item 1" - , "*** list item 2" - ] =?> - mconcat [ headerWith ("section", [], []) 1 "section" - , headerWith ("subsection", [], []) 2 "subsection" - , orderedList [ para "list item 1", para "list item 2" ] - ] + , "include complete archive trees" =: + unlines [ "#+OPTIONS: arch:t" + , "* old :ARCHIVE:" + , " boring" + ] =?> + let tagSpan t = spanWith ("", ["tag"], [("data-tag-name", t)]) mempty + in mconcat [ headerWith ("old", [], mempty) 1 ("old" <> tagSpan "ARCHIVE") + , para "boring" + ] + + , "include archive tree header only" =: + unlines [ "#+OPTIONS: arch:headline" + , "* old :ARCHIVE:" + , " boring" + ] =?> + let tagSpan t = spanWith ("", ["tag"], [("data-tag-name", t)]) mempty + in headerWith ("old", [], mempty) 1 ("old" <> tagSpan "ARCHIVE") + + , "limit headline depth" =: + unlines [ "#+OPTIONS: H:2" + , "* section" + , "** subsection" + , "*** list item 1" + , "*** list item 2" + ] =?> + mconcat [ headerWith ("section", [], []) 1 "section" + , headerWith ("subsection", [], []) 2 "subsection" + , orderedList [ para "list item 1", para "list item 2" ] + ] + ] ] , testGroup "Basic Blocks" $ From 117d3f4d92d5096cfa51305db6d2fa261ef87d24 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 29 Aug 2016 14:10:58 +0200 Subject: [PATCH 10/12] Org reader: respect `author` export option The `author` option controls whether the author should be included in the final markup. Setting `#+OPTIONS: author:nil` will drop the author from the final meta-data output. --- src/Text/Pandoc/Readers/Org/Blocks.hs | 6 +++--- src/Text/Pandoc/Readers/Org/ExportSettings.hs | 2 +- src/Text/Pandoc/Readers/Org/Meta.hs | 17 +++++++++++++++++ src/Text/Pandoc/Readers/Org/ParserState.hs | 2 ++ tests/Tests/Readers/Org.hs | 6 ++++++ 5 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index b955dafa7..b1f56eed0 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -35,7 +35,7 @@ module Text.Pandoc.Readers.Org.Blocks import Text.Pandoc.Readers.Org.BlockStarts import Text.Pandoc.Readers.Org.Inlines -import Text.Pandoc.Readers.Org.Meta ( metaLine ) +import Text.Pandoc.Readers.Org.Meta ( metaExport, metaLine ) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import Text.Pandoc.Readers.Org.Shared @@ -230,8 +230,8 @@ blockList = do -- | Get the meta information safed in the state. meta :: OrgParser Meta meta = do - st <- getState - return $ runF (orgStateMeta st) st + meta' <- metaExport + runF meta' <$> getState blocks :: OrgParser (F Blocks) blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof) diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index b48acc9c4..b539a8000 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -54,7 +54,7 @@ exportSetting = choice , ignoredSetting "<" , ignoredSetting "\\n" , archivedTreeSetting "arch" (\val es -> es { exportArchivedTrees = val }) - , ignoredSetting "author" + , booleanSetting "author" (\val es -> es { exportWithAuthor = val }) , ignoredSetting "c" , ignoredSetting "creator" , complementableListSetting "d" (\val es -> es { exportDrawers = val }) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 51fd2c9d8..4d74713d6 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -29,6 +29,7 @@ Parsers for Org-mode meta declarations. -} module Text.Pandoc.Readers.Org.Meta ( metaLine + , metaExport ) where import Text.Pandoc.Readers.Org.BlockStarts @@ -48,6 +49,22 @@ import Data.List ( intersperse ) import qualified Data.Map as M import Network.HTTP ( urlEncode ) +-- | Returns the current meta, respecting export options. +metaExport :: OrgParser (F Meta) +metaExport = do + st <- getState + let withAuthor = extractExportOption exportWithAuthor st + return $ (if withAuthor then id else removeMeta "author") + <$> orgStateMeta st + +removeMeta :: String -> Meta -> Meta +removeMeta key meta' = + let metaMap = unMeta meta' + in Meta $ M.delete key metaMap + +extractExportOption :: (ExportSettings -> a) -> OrgParserState -> a +extractExportOption ex = ex . orgStateExportSettings + -- | Parse and handle a single line containing meta information -- The order, in which blocks are tried, makes sure that we're not looking at -- the beginning of a block, so we don't need to check for it diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 48e7717cd..661ccc4ea 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -163,6 +163,7 @@ data ExportSettings = ExportSettings , exportSmartQuotes :: Bool -- ^ Parse quotes smartly , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts + , exportWithAuthor :: Bool -- ^ Include author in final meta-data } instance Default ExportSettings where @@ -177,6 +178,7 @@ defaultExportSettings = ExportSettings , exportSmartQuotes = True , exportSpecialStrings = True , exportSubSuperscripts = True + , exportWithAuthor = True } diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 9979dc8ec..2ef847f30 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -668,6 +668,12 @@ tests = , headerWith ("subsection", [], []) 2 "subsection" , orderedList [ para "list item 1", para "list item 2" ] ] + + , "disable author export" =: + unlines [ "#+OPTIONS: author:nil" + , "#+AUTHOR: ShyGuy" + ] =?> + Pandoc nullMeta mempty ] ] From 0568aa5cad5ca5501dc0565b0e341fc5393f67e2 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 29 Aug 2016 14:10:58 +0200 Subject: [PATCH 11/12] Org reader: respect `email` export option The `email` option controls whether the email meta-field should be included in the final markup. Setting `#+OPTIONS: email:nil` will drop the email field from the final meta-data output. --- src/Text/Pandoc/Readers/Org/ExportSettings.hs | 2 +- src/Text/Pandoc/Readers/Org/Meta.hs | 8 ++++---- src/Text/Pandoc/Readers/Org/ParserState.hs | 2 ++ tests/Tests/Readers/Org.hs | 6 ++++++ 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index b539a8000..6233a6104 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -60,7 +60,7 @@ exportSetting = choice , complementableListSetting "d" (\val es -> es { exportDrawers = val }) , ignoredSetting "date" , ignoredSetting "e" - , ignoredSetting "email" + , booleanSetting "email" (\val es -> es { exportWithEmail = val }) , ignoredSetting "f" , integerSetting "H" (\val es -> es { exportHeadlineLevels = val }) , ignoredSetting "inline" diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 4d74713d6..a20c25e09 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -53,8 +53,11 @@ import Network.HTTP ( urlEncode ) metaExport :: OrgParser (F Meta) metaExport = do st <- getState - let withAuthor = extractExportOption exportWithAuthor st + let settings = orgStateExportSettings st + let withAuthor = exportWithAuthor settings + let withEmail = exportWithEmail settings return $ (if withAuthor then id else removeMeta "author") + . (if withEmail then id else removeMeta "email") <$> orgStateMeta st removeMeta :: String -> Meta -> Meta @@ -62,9 +65,6 @@ removeMeta key meta' = let metaMap = unMeta meta' in Meta $ M.delete key metaMap -extractExportOption :: (ExportSettings -> a) -> OrgParserState -> a -extractExportOption ex = ex . orgStateExportSettings - -- | Parse and handle a single line containing meta information -- The order, in which blocks are tried, makes sure that we're not looking at -- the beginning of a block, so we don't need to check for it diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 661ccc4ea..4c3aa298c 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -164,6 +164,7 @@ data ExportSettings = ExportSettings , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts , exportWithAuthor :: Bool -- ^ Include author in final meta-data + , exportWithEmail :: Bool -- ^ Include email in final meta-data } instance Default ExportSettings where @@ -179,6 +180,7 @@ defaultExportSettings = ExportSettings , exportSpecialStrings = True , exportSubSuperscripts = True , exportWithAuthor = True + , exportWithEmail = True } diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 2ef847f30..5191f63d5 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -674,6 +674,12 @@ tests = , "#+AUTHOR: ShyGuy" ] =?> Pandoc nullMeta mempty + + , "disable email export" =: + unlines [ "#+OPTIONS: email:nil" + , "#+email: no-mail-please@example.com" + ] =?> + Pandoc nullMeta mempty ] ] From 88313c0b93694e310175a461ed74f497debbd57d Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 29 Aug 2016 14:10:58 +0200 Subject: [PATCH 12/12] Org reader: respect `creator` export option The `creator` option controls whether the creator meta-field should be included in the final markup. Setting `#+OPTIONS: creator:nil` will drop the creator field from the final meta-data output. Org-mode recognizes the special value `comment` for this field, causing the creator to be included in a comment. This is difficult to translate to Pandoc internals and is hence interpreted the same as other truish values (i.e. the meta field is kept if it's present). --- src/Text/Pandoc/Readers/Org/ExportSettings.hs | 4 +++- src/Text/Pandoc/Readers/Org/Meta.hs | 7 +++---- src/Text/Pandoc/Readers/Org/ParserState.hs | 2 ++ tests/Tests/Readers/Org.hs | 6 ++++++ 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index 6233a6104..283cfa998 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -56,7 +56,9 @@ exportSetting = choice , archivedTreeSetting "arch" (\val es -> es { exportArchivedTrees = val }) , booleanSetting "author" (\val es -> es { exportWithAuthor = val }) , ignoredSetting "c" - , ignoredSetting "creator" + -- org-mode allows the special value `comment` for creator, which we'll + -- interpret as true as it doesn't make sense in the context of Pandoc. + , booleanSetting "creator" (\val es -> es { exportWithCreator = val }) , complementableListSetting "d" (\val es -> es { exportDrawers = val }) , ignoredSetting "date" , ignoredSetting "e" diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index a20c25e09..11eb18e36 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -54,10 +54,9 @@ metaExport :: OrgParser (F Meta) metaExport = do st <- getState let settings = orgStateExportSettings st - let withAuthor = exportWithAuthor settings - let withEmail = exportWithEmail settings - return $ (if withAuthor then id else removeMeta "author") - . (if withEmail then id else removeMeta "email") + return $ (if exportWithAuthor settings then id else removeMeta "author") + . (if exportWithCreator settings then id else removeMeta "creator") + . (if exportWithEmail settings then id else removeMeta "email") <$> orgStateMeta st removeMeta :: String -> Meta -> Meta diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 4c3aa298c..84dbe9d33 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -164,6 +164,7 @@ data ExportSettings = ExportSettings , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts , exportWithAuthor :: Bool -- ^ Include author in final meta-data + , exportWithCreator :: Bool -- ^ Include creator in final meta-data , exportWithEmail :: Bool -- ^ Include email in final meta-data } @@ -180,6 +181,7 @@ defaultExportSettings = ExportSettings , exportSpecialStrings = True , exportSubSuperscripts = True , exportWithAuthor = True + , exportWithCreator = True , exportWithEmail = True } diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 5191f63d5..d6e7bba22 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -675,6 +675,12 @@ tests = ] =?> Pandoc nullMeta mempty + , "disable creator export" =: + unlines [ "#+OPTIONS: creator:nil" + , "#+creator: The Architect" + ] =?> + Pandoc nullMeta mempty + , "disable email export" =: unlines [ "#+OPTIONS: email:nil" , "#+email: no-mail-please@example.com"