From a26b3a2d6af8614e13299bbf477e28c5932ef680 Mon Sep 17 00:00:00 2001 From: Brian Leung Date: Fri, 5 Oct 2018 14:28:17 -0700 Subject: [PATCH] Org reader: Add partial support for `#+EXCLUDE_TAGS` option. (#4950) Closes #4284. Headers with the corresponding tags should not appear in the output. If one or more of the specified tags contains a non-tag character like `+`, Org-mode will not treat that as a valid tag, but will nonetheless continue scanning for valid tags. That behavior is not replicated in this patch; entering `cat+dog` as one of the entries in `#+EXCLUDE_TAGS` and running the file through Pandoc will cause the parser to fail and result in the only excluded tag being the default, `noexport`. --- src/Text/Pandoc/Readers/Org/DocumentTree.hs | 14 +++++----- src/Text/Pandoc/Readers/Org/Meta.hs | 11 ++++++++ src/Text/Pandoc/Readers/Org/ParserState.hs | 6 +++++ src/Text/Pandoc/Readers/Org/Parsing.hs | 8 ++++++ test/command/4284.md | 29 +++++++++++++++++++++ 5 files changed, 61 insertions(+), 7 deletions(-) create mode 100644 test/command/4284.md diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs index a9df3b437..7d55892fe 100644 --- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -43,6 +43,7 @@ import Text.Pandoc.Readers.Org.BlockStarts import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing +import qualified Data.Set as Set import qualified Text.Pandoc.Builder as B -- @@ -73,9 +74,6 @@ documentTree blocks inline = do , headlineChildren = headlines' } -newtype Tag = Tag { fromTag :: String } - deriving (Show, Eq) - -- | Create a tag containing the given string. toTag :: String -> Tag toTag = Tag @@ -153,7 +151,7 @@ headline blocks inline lvl = try $ do headerTags :: Monad m => OrgParser m [Tag] headerTags = try $ - let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' + let tag = orgTagWord <* char ':' in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) -- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks @@ -163,15 +161,17 @@ headlineToBlocks hdln = do let tags = headlineTags hdln let text = headlineText hdln let level = headlineLevel hdln + shouldNotExport <- hasDoNotExportTag tags case () of - _ | any isNoExportTag tags -> return mempty + _ | shouldNotExport -> return mempty _ | any isArchiveTag tags -> archivedHeadlineToBlocks hdln _ | isCommentTitle text -> return mempty _ | maxLevel <= level -> headlineToHeaderWithList hdln _ | otherwise -> headlineToHeaderWithContents hdln -isNoExportTag :: Tag -> Bool -isNoExportTag = (== toTag "noexport") +hasDoNotExportTag :: Monad m => [Tag] -> OrgParser m Bool +hasDoNotExportTag tags = containsExcludedTag . orgStateExcludedTags <$> getState + where containsExcludedTag s = any (`Set.member` s) tags isArchiveTag :: Tag -> Bool isArchiveTag = (== toTag "ARCHIVE") diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 965e33d94..921cd27e0 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -52,6 +52,7 @@ import Data.Char (toLower) import Data.List (intersperse) import Data.Maybe (fromMaybe) import qualified Data.Map as M +import qualified Data.Set as Set import Network.HTTP (urlEncode) -- | Returns the current meta, respecting export options. @@ -158,6 +159,7 @@ optionLine = try $ do "seq_todo" -> todoSequence >>= updateState . registerTodoSequence "typ_todo" -> todoSequence >>= updateState . registerTodoSequence "macro" -> macroDefinition >>= updateState . registerMacro + "exclude_tags" -> excludedTagSet >>= updateState . setExcludedTags "pandoc-emphasis-pre" -> emphChars >>= updateState . setEmphasisPreChar "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar _ -> mzero @@ -190,6 +192,15 @@ parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) +excludedTagSet :: Monad m => OrgParser m (Set.Set Tag) +excludedTagSet = do + skipSpaces + Set.fromList . map Tag <$> + many (orgTagWord <* skipSpaces) <* newline + +setExcludedTags :: Set.Set Tag -> OrgParserState -> OrgParserState +setExcludedTags tagSet st = st { orgStateExcludedTags = tagSet } + setEmphasisPreChar :: Maybe [Char] -> OrgParserState -> OrgParserState setEmphasisPreChar csMb st = let preChars = fromMaybe (orgStateEmphasisPostChars defaultOrgParserState) csMb diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index d33602575..381d4c5ee 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Org.ParserState , defaultOrgParserState , OrgParserLocal (..) , OrgNoteRecord + , Tag(..) , HasReaderOptions (..) , HasQuoteContext (..) , HasMacros (..) @@ -88,6 +89,9 @@ type OrgNoteTable = [OrgNoteRecord] type OrgLinkFormatters = M.Map String (String -> String) -- | Macro expander function type MacroExpander = [String] -> String +-- | Tag +newtype Tag = Tag { fromTag :: String } + deriving (Show, Eq, Ord) -- | The states in which a todo item can be data TodoState = Todo | Done @@ -113,6 +117,7 @@ data OrgParserState = OrgParserState -- specified here. , orgStateEmphasisPostChars :: [Char] -- ^ Chars allowed at after emphasis , orgStateEmphasisNewlines :: Maybe Int + , orgStateExcludedTags :: Set.Set Tag , orgStateExportSettings :: ExportSettings , orgStateHeaderMap :: M.Map Inlines String , orgStateIdentifiers :: Set.Set String @@ -183,6 +188,7 @@ defaultOrgParserState = OrgParserState , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing , orgStateExportSettings = def + , orgStateExcludedTags = Set.singleton $ Tag "noexport" , orgStateHeaderMap = M.empty , orgStateIdentifiers = Set.empty , orgStateIncludeFiles = [] diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index b37b36624..52a346e36 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -46,6 +46,8 @@ module Text.Pandoc.Readers.Org.Parsing , orgArgKey , orgArgWord , orgArgWordChar + , orgTagWord + , orgTagWordChar -- * Re-exports from Text.Pandoc.Parser , ParserContext (..) , many1Till @@ -220,3 +222,9 @@ orgArgWord = many1 orgArgWordChar -- | Chars treated as part of a word in plists. orgArgWordChar :: Monad m => OrgParser m Char orgArgWordChar = alphaNum <|> oneOf "-_" + +orgTagWord :: Monad m => OrgParser m String +orgTagWord = many1 orgTagWordChar + +orgTagWordChar :: Monad m => OrgParser m Char +orgTagWordChar = alphaNum <|> oneOf "@%#_" diff --git a/test/command/4284.md b/test/command/4284.md new file mode 100644 index 000000000..e2a41d14f --- /dev/null +++ b/test/command/4284.md @@ -0,0 +1,29 @@ +``` +% pandoc -f org -t native +#+EXCLUDE_TAGS:apple cat bye dog % + +* This should not appear :apple: +* NOEXPORT should appear if not specified in EXCLUDE_TAGS :noexport: +* This should not appear :cat:hi:laptop: +** Children of headers with excluded tags should not appear :xylophone: +* This should not appear :%: +^D +[Header 1 ("noexport-should-appear-if-not-specified-in-excludetags",[],[]) [Str "NOEXPORT",Space,Str "should",Space,Str "appear",Space,Str "if",Space,Str "not",Space,Str "specified",Space,Str "in",Space,Str "EXCLUDE",Subscript [Str "TAGS"],Space,Span ("",["tag"],[("tag-name","noexport")]) [SmallCaps [Str "noexport"]]]] +``` + +``` +% pandoc -f org -t native +#+EXCLUDE_TAGS:elephant +* This should not appear :elephant: +* This should appear :fawn: +^D +[Header 1 ("this-should-appear",[],[]) [Str "This",Space,Str "should",Space,Str "appear",Space,Span ("",["tag"],[("tag-name","fawn")]) [SmallCaps [Str "fawn"]]]] +``` + +``` +% pandoc -f org -t native +#+EXCLUDE_TAGS: +* NOEXPORT should appear if not specified in EXCLUDE_TAGS :noexport: +^D +[Header 1 ("noexport-should-appear-if-not-specified-in-excludetags",[],[]) [Str "NOEXPORT",Space,Str "should",Space,Str "appear",Space,Str "if",Space,Str "not",Space,Str "specified",Space,Str "in",Space,Str "EXCLUDE",Subscript [Str "TAGS"],Space,Span ("",["tag"],[("tag-name","noexport")]) [SmallCaps [Str "noexport"]]]] +```