Org reader: Add partial support for #+EXCLUDE_TAGS option. (#4950)

Closes #4284.

Headers with the corresponding tags should not appear in the output.

If one or more of the specified tags contains a non-tag character
like `+`, Org-mode will not treat that as a valid tag, but will
nonetheless continue scanning for valid tags. That behavior is not
replicated in this patch; entering `cat+dog` as one of the entries in
`#+EXCLUDE_TAGS` and running the file through Pandoc will cause the
parser to fail and result in the only excluded tag being the default, `noexport`.
This commit is contained in:
Brian Leung 2018-10-05 14:28:17 -07:00 committed by John MacFarlane
parent e4ca51c2a7
commit a26b3a2d6a
5 changed files with 61 additions and 7 deletions

View file

@ -43,6 +43,7 @@ import Text.Pandoc.Readers.Org.BlockStarts
import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing import Text.Pandoc.Readers.Org.Parsing
import qualified Data.Set as Set
import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Builder as B
-- --
@ -73,9 +74,6 @@ documentTree blocks inline = do
, headlineChildren = headlines' , headlineChildren = headlines'
} }
newtype Tag = Tag { fromTag :: String }
deriving (Show, Eq)
-- | Create a tag containing the given string. -- | Create a tag containing the given string.
toTag :: String -> Tag toTag :: String -> Tag
toTag = Tag toTag = Tag
@ -153,7 +151,7 @@ headline blocks inline lvl = try $ do
headerTags :: Monad m => OrgParser m [Tag] headerTags :: Monad m => OrgParser m [Tag]
headerTags = try $ headerTags = try $
let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' let tag = orgTagWord <* char ':'
in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
-- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks -- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks
@ -163,15 +161,17 @@ headlineToBlocks hdln = do
let tags = headlineTags hdln let tags = headlineTags hdln
let text = headlineText hdln let text = headlineText hdln
let level = headlineLevel hdln let level = headlineLevel hdln
shouldNotExport <- hasDoNotExportTag tags
case () of case () of
_ | any isNoExportTag tags -> return mempty _ | shouldNotExport -> return mempty
_ | any isArchiveTag tags -> archivedHeadlineToBlocks hdln _ | any isArchiveTag tags -> archivedHeadlineToBlocks hdln
_ | isCommentTitle text -> return mempty _ | isCommentTitle text -> return mempty
_ | maxLevel <= level -> headlineToHeaderWithList hdln _ | maxLevel <= level -> headlineToHeaderWithList hdln
_ | otherwise -> headlineToHeaderWithContents hdln _ | otherwise -> headlineToHeaderWithContents hdln
isNoExportTag :: Tag -> Bool hasDoNotExportTag :: Monad m => [Tag] -> OrgParser m Bool
isNoExportTag = (== toTag "noexport") hasDoNotExportTag tags = containsExcludedTag . orgStateExcludedTags <$> getState
where containsExcludedTag s = any (`Set.member` s) tags
isArchiveTag :: Tag -> Bool isArchiveTag :: Tag -> Bool
isArchiveTag = (== toTag "ARCHIVE") isArchiveTag = (== toTag "ARCHIVE")

View file

@ -52,6 +52,7 @@ import Data.Char (toLower)
import Data.List (intersperse) import Data.List (intersperse)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as Set
import Network.HTTP (urlEncode) import Network.HTTP (urlEncode)
-- | Returns the current meta, respecting export options. -- | Returns the current meta, respecting export options.
@ -158,6 +159,7 @@ optionLine = try $ do
"seq_todo" -> todoSequence >>= updateState . registerTodoSequence "seq_todo" -> todoSequence >>= updateState . registerTodoSequence
"typ_todo" -> todoSequence >>= updateState . registerTodoSequence "typ_todo" -> todoSequence >>= updateState . registerTodoSequence
"macro" -> macroDefinition >>= updateState . registerMacro "macro" -> macroDefinition >>= updateState . registerMacro
"exclude_tags" -> excludedTagSet >>= updateState . setExcludedTags
"pandoc-emphasis-pre" -> emphChars >>= updateState . setEmphasisPreChar "pandoc-emphasis-pre" -> emphChars >>= updateState . setEmphasisPreChar
"pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar
_ -> mzero _ -> mzero
@ -190,6 +192,15 @@ parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
excludedTagSet :: Monad m => OrgParser m (Set.Set Tag)
excludedTagSet = do
skipSpaces
Set.fromList . map Tag <$>
many (orgTagWord <* skipSpaces) <* newline
setExcludedTags :: Set.Set Tag -> OrgParserState -> OrgParserState
setExcludedTags tagSet st = st { orgStateExcludedTags = tagSet }
setEmphasisPreChar :: Maybe [Char] -> OrgParserState -> OrgParserState setEmphasisPreChar :: Maybe [Char] -> OrgParserState -> OrgParserState
setEmphasisPreChar csMb st = setEmphasisPreChar csMb st =
let preChars = fromMaybe (orgStateEmphasisPostChars defaultOrgParserState) csMb let preChars = fromMaybe (orgStateEmphasisPostChars defaultOrgParserState) csMb

View file

@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Org.ParserState
, defaultOrgParserState , defaultOrgParserState
, OrgParserLocal (..) , OrgParserLocal (..)
, OrgNoteRecord , OrgNoteRecord
, Tag(..)
, HasReaderOptions (..) , HasReaderOptions (..)
, HasQuoteContext (..) , HasQuoteContext (..)
, HasMacros (..) , HasMacros (..)
@ -88,6 +89,9 @@ type OrgNoteTable = [OrgNoteRecord]
type OrgLinkFormatters = M.Map String (String -> String) type OrgLinkFormatters = M.Map String (String -> String)
-- | Macro expander function -- | Macro expander function
type MacroExpander = [String] -> String type MacroExpander = [String] -> String
-- | Tag
newtype Tag = Tag { fromTag :: String }
deriving (Show, Eq, Ord)
-- | The states in which a todo item can be -- | The states in which a todo item can be
data TodoState = Todo | Done data TodoState = Todo | Done
@ -113,6 +117,7 @@ data OrgParserState = OrgParserState
-- specified here. -- specified here.
, orgStateEmphasisPostChars :: [Char] -- ^ Chars allowed at after emphasis , orgStateEmphasisPostChars :: [Char] -- ^ Chars allowed at after emphasis
, orgStateEmphasisNewlines :: Maybe Int , orgStateEmphasisNewlines :: Maybe Int
, orgStateExcludedTags :: Set.Set Tag
, orgStateExportSettings :: ExportSettings , orgStateExportSettings :: ExportSettings
, orgStateHeaderMap :: M.Map Inlines String , orgStateHeaderMap :: M.Map Inlines String
, orgStateIdentifiers :: Set.Set String , orgStateIdentifiers :: Set.Set String
@ -183,6 +188,7 @@ defaultOrgParserState = OrgParserState
, orgStateEmphasisCharStack = [] , orgStateEmphasisCharStack = []
, orgStateEmphasisNewlines = Nothing , orgStateEmphasisNewlines = Nothing
, orgStateExportSettings = def , orgStateExportSettings = def
, orgStateExcludedTags = Set.singleton $ Tag "noexport"
, orgStateHeaderMap = M.empty , orgStateHeaderMap = M.empty
, orgStateIdentifiers = Set.empty , orgStateIdentifiers = Set.empty
, orgStateIncludeFiles = [] , orgStateIncludeFiles = []

View file

@ -46,6 +46,8 @@ module Text.Pandoc.Readers.Org.Parsing
, orgArgKey , orgArgKey
, orgArgWord , orgArgWord
, orgArgWordChar , orgArgWordChar
, orgTagWord
, orgTagWordChar
-- * Re-exports from Text.Pandoc.Parser -- * Re-exports from Text.Pandoc.Parser
, ParserContext (..) , ParserContext (..)
, many1Till , many1Till
@ -220,3 +222,9 @@ orgArgWord = many1 orgArgWordChar
-- | Chars treated as part of a word in plists. -- | Chars treated as part of a word in plists.
orgArgWordChar :: Monad m => OrgParser m Char orgArgWordChar :: Monad m => OrgParser m Char
orgArgWordChar = alphaNum <|> oneOf "-_" orgArgWordChar = alphaNum <|> oneOf "-_"
orgTagWord :: Monad m => OrgParser m String
orgTagWord = many1 orgTagWordChar
orgTagWordChar :: Monad m => OrgParser m Char
orgTagWordChar = alphaNum <|> oneOf "@%#_"

29
test/command/4284.md Normal file
View file

@ -0,0 +1,29 @@
```
% pandoc -f org -t native
#+EXCLUDE_TAGS:apple cat bye dog %
* This should not appear :apple:
* NOEXPORT should appear if not specified in EXCLUDE_TAGS :noexport:
* This should not appear :cat:hi:laptop:
** Children of headers with excluded tags should not appear :xylophone:
* This should not appear :%:
^D
[Header 1 ("noexport-should-appear-if-not-specified-in-excludetags",[],[]) [Str "NOEXPORT",Space,Str "should",Space,Str "appear",Space,Str "if",Space,Str "not",Space,Str "specified",Space,Str "in",Space,Str "EXCLUDE",Subscript [Str "TAGS"],Space,Span ("",["tag"],[("tag-name","noexport")]) [SmallCaps [Str "noexport"]]]]
```
```
% pandoc -f org -t native
#+EXCLUDE_TAGS:elephant
* This should not appear :elephant:
* This should appear :fawn:
^D
[Header 1 ("this-should-appear",[],[]) [Str "This",Space,Str "should",Space,Str "appear",Space,Span ("",["tag"],[("tag-name","fawn")]) [SmallCaps [Str "fawn"]]]]
```
```
% pandoc -f org -t native
#+EXCLUDE_TAGS:
* NOEXPORT should appear if not specified in EXCLUDE_TAGS :noexport:
^D
[Header 1 ("noexport-should-appear-if-not-specified-in-excludetags",[],[]) [Str "NOEXPORT",Space,Str "should",Space,Str "appear",Space,Str "if",Space,Str "not",Space,Str "specified",Space,Str "in",Space,Str "EXCLUDE",Subscript [Str "TAGS"],Space,Span ("",["tag"],[("tag-name","noexport")]) [SmallCaps [Str "noexport"]]]]
```