From d5182778c45704b0a2d5d283a7fca5104588af81 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sun, 30 Oct 2016 10:27:47 +0100
Subject: [PATCH] Org reader: add support for todo-markers

Headlines can have optional todo-markers which can be controlled via the
`#+TODO`, `#+SEQ_TODO`, or `#+TYP_TODO` meta directive.  Multiple such
directives can be given, each adding a new set of recognized todo-markers.
If no custom todo-markers are defined, the default `TODO` and `DONE`
markers are used.

Todo-markers are conceptually separate from headline text and are hence
excluded when autogenerating headline IDs.

The markers are rendered as spans and labelled with two classes: One
class is the markers name, the other signals the todo-state of the
marker (either `todo` or `done`).
---
 src/Text/Pandoc/Readers/Org/Blocks.hs      |  21 +-
 src/Text/Pandoc/Readers/Org/Meta.hs        |  43 +++-
 src/Text/Pandoc/Readers/Org/ParserState.hs |  39 +++
 tests/Tests/Readers/Org.hs                 | 278 ++++++++++++---------
 4 files changed, 257 insertions(+), 124 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 61978f79f..ead600ccc 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -90,6 +90,7 @@ type Properties = [(PropertyKey, PropertyValue)]
 -- | Org mode headline (i.e. a document subtree).
 data Headline = Headline
   { headlineLevel      :: Int
+  , headlineTodoMarker :: Maybe TodoMarker
   , headlineText       :: Inlines
   , headlineTags       :: [Tag]
   , headlineProperties :: Properties
@@ -107,6 +108,7 @@ headline :: Int -> OrgParser (F Headline)
 headline lvl = try $ do
   level <- headerStart
   guard (lvl <= level)
+  todoKw <- optionMaybe todoKeyword
   title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle
   tags  <- option [] headerTags
   newline
@@ -119,6 +121,7 @@ headline lvl = try $ do
     children' <- sequence children
     return $ Headline
       { headlineLevel = level
+      , headlineTodoMarker = todoKw
       , headlineText = title'
       , headlineTags = tags
       , headlineProperties = properties
@@ -193,11 +196,27 @@ headlineToHeaderWithContents hdln@(Headline {..}) = do
 
 headlineToHeader :: Headline -> OrgParser Blocks
 headlineToHeader (Headline {..}) = do
-  let text        = tagTitle headlineText headlineTags
+  let todoText    = case headlineTodoMarker of
+                      Just kw -> todoKeywordToInlines kw <> B.space
+                      Nothing -> mempty
+  let text        = tagTitle (todoText <> headlineText) headlineTags
   let propAttr    = propertiesToAttr headlineProperties
   attr           <- registerHeader propAttr headlineText
   return $ B.headerWith attr headlineLevel text
 
+todoKeyword :: OrgParser TodoMarker
+todoKeyword = try $ do
+  taskStates <- activeTodoMarkers <$> getState
+  let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar)
+  choice (map kwParser taskStates)
+
+todoKeywordToInlines :: TodoMarker -> Inlines
+todoKeywordToInlines tdm =
+  let todoText  = todoMarkerName tdm
+      todoState = map toLower . show $ todoMarkerState tdm
+      classes = [todoState, todoText]
+  in B.spanWith (mempty, classes, mempty) (B.str todoText)
+
 propertiesToAttr :: Properties -> Attr
 propertiesToAttr properties =
   let
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index ea088bfdb..bbbb216a0 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -42,11 +42,11 @@ import qualified Text.Pandoc.Builder as B
 import           Text.Pandoc.Builder ( Blocks, Inlines )
 import           Text.Pandoc.Definition
 
-import           Control.Monad ( mzero )
+import           Control.Monad ( mzero, void )
 import           Data.Char ( toLower )
 import           Data.List ( intersperse )
 import qualified Data.Map as M
-import           Data.Monoid ((<>))
+import           Data.Monoid ( (<>) )
 import           Network.HTTP ( urlEncode )
 
 -- | Returns the current meta, respecting export options.
@@ -144,8 +144,11 @@ optionLine :: OrgParser ()
 optionLine = try $ do
   key <- metaKey
   case key of
-    "link"    -> parseLinkFormat >>= uncurry addLinkFormat
-    "options" -> exportSettings
+    "link"     -> parseLinkFormat >>= uncurry addLinkFormat
+    "options"  -> exportSettings
+    "todo"     -> todoSequence >>= updateState . registerTodoSequence
+    "seq_todo" -> todoSequence >>= updateState . registerTodoSequence
+    "typ_todo" -> todoSequence >>= updateState . registerTodoSequence
     _         -> mzero
 
 addLinkFormat :: String
@@ -179,3 +182,35 @@ parseFormat = try $ do
 
 inlinesTillNewline :: OrgParser (F Inlines)
 inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
+
+--
+-- ToDo Sequences and Keywords
+--
+todoSequence :: OrgParser TodoSequence
+todoSequence = try $ do
+  todoKws <- todoKeywords
+  doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
+  newline
+  -- There must be at least one DONE keyword. The last TODO keyword is taken if
+  -- necessary.
+  case doneKws of
+    Just done  -> return $ keywordsToSequence todoKws done
+    Nothing    -> case reverse todoKws of
+                    []     -> mzero  -- no keywords present
+                    (x:xs) -> return $ keywordsToSequence (reverse xs) [x]
+
+ where
+   todoKeywords :: OrgParser [String]
+   todoKeywords = try $
+     let keyword = many1 nonspaceChar <* skipSpaces
+         endOfKeywords = todoDoneSep <|> void newline
+     in manyTill keyword (lookAhead endOfKeywords)
+
+   todoDoneSep :: OrgParser ()
+   todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
+
+   keywordsToSequence :: [String] -> [String] -> TodoSequence
+   keywordsToSequence todo done =
+     let todoMarkers = map (TodoMarker Todo) todo
+         doneMarkers = map (TodoMarker Done) done
+     in todoMarkers ++ doneMarkers
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 84dbe9d33..ef5f89461 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -34,6 +34,11 @@ module Text.Pandoc.Readers.Org.ParserState
   , OrgNoteRecord
   , HasReaderOptions (..)
   , HasQuoteContext (..)
+  , TodoMarker (..)
+  , TodoSequence
+  , TodoState (..)
+  , activeTodoMarkers
+  , registerTodoSequence
   , F(..)
   , askF
   , asksF
@@ -72,6 +77,20 @@ type OrgNoteTable = [OrgNoteRecord]
 -- link-type, the corresponding function transforms the given link string.
 type OrgLinkFormatters = M.Map String (String -> String)
 
+-- | The states in which a todo item can be
+data TodoState = Todo | Done
+  deriving (Eq, Ord, Show)
+
+-- | A ToDo keyword like @TODO@ or @DONE@.
+data TodoMarker = TodoMarker
+  { todoMarkerState :: TodoState
+  , todoMarkerName  :: String
+  }
+  deriving (Show, Eq)
+
+-- | Collection of todo markers in the order in which items should progress
+type TodoSequence = [TodoMarker]
+
 -- | Org-mode parser state
 data OrgParserState = OrgParserState
   { orgStateAnchorIds            :: [String]
@@ -88,6 +107,7 @@ data OrgParserState = OrgParserState
   , orgStateNotes'               :: OrgNoteTable
   , orgStateOptions              :: ReaderOptions
   , orgStateParserContext        :: ParserContext
+  , orgStateTodoSequences        :: [TodoSequence]
   }
 
 data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
@@ -133,12 +153,31 @@ defaultOrgParserState = OrgParserState
   , orgStateNotes' = []
   , orgStateOptions = def
   , orgStateParserContext = NullState
+  , orgStateTodoSequences = []
   }
 
 optionsToParserState :: ReaderOptions -> OrgParserState
 optionsToParserState opts =
   def { orgStateOptions = opts }
 
+registerTodoSequence :: TodoSequence -> OrgParserState -> OrgParserState
+registerTodoSequence todoSeq st =
+  let curSeqs = orgStateTodoSequences st
+  in st{ orgStateTodoSequences = todoSeq : curSeqs }
+
+-- | Get the current todo/done sequences. If no custom todo sequences have been
+-- defined, return a list containing just the default todo/done sequence.
+activeTodoSequences :: OrgParserState -> [TodoSequence]
+activeTodoSequences st =
+  let curSeqs = orgStateTodoSequences st
+  in if null curSeqs
+     then [[ TodoMarker Todo "TODO" , TodoMarker Done "DONE" ]]
+     else curSeqs
+
+activeTodoMarkers :: OrgParserState -> TodoSequence
+activeTodoMarkers = concat . activeTodoSequences
+
+
 --
 -- Export Settings
 --
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 48d0da51c..30132c795 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -693,139 +693,179 @@ tests =
           "Paragraph\n" =?>
           para "Paragraph"
 
-      , "First Level Header" =:
-          "* Headline\n" =?>
-          headerWith ("headline", [], []) 1 "Headline"
+      , testGroup "headers" $
+        [ "First Level Header" =:
+            "* Headline\n" =?>
+            headerWith ("headline", [], []) 1 "Headline"
 
-      , "Third Level Header" =:
-          "*** Third Level Headline\n" =?>
-          headerWith ("third-level-headline", [], [])
-                     3
-                     ("Third" <> space <> "Level" <> space <> "Headline")
+        , "Third Level Header" =:
+            "*** Third Level Headline\n" =?>
+            headerWith ("third-level-headline", [], [])
+                       3
+                       ("Third" <> space <> "Level" <> space <> "Headline")
 
-      , "Compact Headers with Paragraph" =:
-          unlines [ "* First Level"
-                  , "** Second Level"
-                  , "   Text"
-                  ] =?>
-          mconcat [ headerWith ("first-level", [], [])
-                               1
-                               ("First" <> space <> "Level")
-                  , headerWith ("second-level", [], [])
-                               2
-                               ("Second" <> space <> "Level")
-                  , para "Text"
-                  ]
+        , "Compact Headers with Paragraph" =:
+            unlines [ "* First Level"
+                    , "** Second Level"
+                    , "   Text"
+                    ] =?>
+            mconcat [ headerWith ("first-level", [], [])
+                                 1
+                                 ("First" <> space <> "Level")
+                    , headerWith ("second-level", [], [])
+                                 2
+                                 ("Second" <> space <> "Level")
+                    , para "Text"
+                    ]
 
-      , "Separated Headers with Paragraph" =:
-          unlines [ "* First Level"
-                  , ""
-                  , "** Second Level"
-                  , ""
-                  , "   Text"
-                  ] =?>
-          mconcat [ headerWith ("first-level", [], [])
-                               1
-                               ("First" <> space <> "Level")
-                  , headerWith ("second-level", [], [])
-                               2
-                               ("Second" <> space <> "Level")
-                  , para "Text"
-                  ]
+        , "Separated Headers with Paragraph" =:
+            unlines [ "* First Level"
+                    , ""
+                    , "** Second Level"
+                    , ""
+                    , "   Text"
+                    ] =?>
+            mconcat [ headerWith ("first-level", [], [])
+                                 1
+                                 ("First" <> space <> "Level")
+                    , headerWith ("second-level", [], [])
+                                 2
+                                 ("Second" <> space <> "Level")
+                    , para "Text"
+                    ]
 
-      , "Headers not preceded by a blank line" =:
-          unlines [ "** eat dinner"
-                  , "Spaghetti and meatballs tonight."
-                  , "** walk dog"
-                  ] =?>
-          mconcat [ headerWith ("eat-dinner", [], [])
-                               2
-                               ("eat" <> space <> "dinner")
-                  , para $ spcSep [ "Spaghetti", "and", "meatballs", "tonight." ]
-                  , headerWith ("walk-dog", [], [])
-                               2
-                               ("walk" <> space <> "dog")
-                  ]
+        , "Headers not preceded by a blank line" =:
+            unlines [ "** eat dinner"
+                    , "Spaghetti and meatballs tonight."
+                    , "** walk dog"
+                    ] =?>
+            mconcat [ headerWith ("eat-dinner", [], [])
+                                 2
+                                 ("eat" <> space <> "dinner")
+                    , para $ spcSep [ "Spaghetti", "and", "meatballs", "tonight." ]
+                    , headerWith ("walk-dog", [], [])
+                                 2
+                                 ("walk" <> space <> "dog")
+                    ]
 
-      , "Tagged headers" =:
-          unlines [ "* Personal       :PERSONAL:"
-                  , "** Call Mom      :@PHONE:"
-                  , "** Call John     :@PHONE:JOHN: "
-                  ] =?>
-          let tagSpan t = spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
-          in mconcat [ headerWith ("personal", [], [])
-                                  1
-                                  ("Personal" <> tagSpan "PERSONAL")
-                     , headerWith ("call-mom", [], [])
-                                  2
-                                  ("Call Mom" <> tagSpan "@PHONE")
-                     , headerWith ("call-john", [], [])
-                                  2
-                                  ("Call John" <> tagSpan "@PHONE" <> tagSpan "JOHN")
-                     ]
+        , testGroup "Todo keywords"
+          [ "Header with known todo keyword" =:
+              "* TODO header" =?>
+              let todoSpan = spanWith ("", ["todo", "TODO"], []) "TODO"
+              in headerWith ("header", [], []) 1 (todoSpan <> space <> "header")
 
-      , "Untagged header containing colons" =:
-          "* This: is not: tagged" =?>
-          headerWith ("this-is-not-tagged", [], []) 1 "This: is not: tagged"
+          , "Header marked as done" =:
+              "* DONE header" =?>
+              let todoSpan = spanWith ("", ["done", "DONE"], []) "DONE"
+              in headerWith ("header", [], []) 1 (todoSpan <> space <> "header")
 
-      , "Header starting with strokeout text" =:
-          unlines [ "foo"
-                  , ""
-                  , "* +thing+ other thing"
-                  ] =?>
-          mconcat [ para "foo"
-                  , headerWith ("thing-other-thing", [], [])
-                               1
-                               ((strikeout "thing") <> " other thing")
-                  ]
+          , "Header with unknown todo keyword" =:
+              "* WAITING header" =?>
+              headerWith ("waiting-header", [], []) 1 "WAITING header"
 
-      , "Comment Trees" =:
-          unlines [ "* COMMENT A comment tree"
-                  , "  Not much going on here"
-                  , "** This will be dropped"
-                  , "* Comment tree above"
-                  ] =?>
-          headerWith ("comment-tree-above", [], []) 1 "Comment tree above"
+          , "Custom todo keywords" =:
+              unlines [ "#+TODO: WAITING CANCELLED"
+                      , "* WAITING compile"
+                      , "* CANCELLED lunch"
+                      ] =?>
+              let todoSpan = spanWith ("", ["todo", "WAITING"], []) "WAITING"
+                  doneSpan = spanWith ("", ["done", "CANCELLED"], []) "CANCELLED"
+              in headerWith ("compile", [], []) 1 (todoSpan <> space <> "compile")
+              <> headerWith ("lunch", [], []) 1 (doneSpan <> space <> "lunch")
 
-      , "Nothing but a COMMENT header" =:
-          "* COMMENT Test" =?>
-          (mempty::Blocks)
+          , "Custom todo keywords with multiple done-states" =:
+              unlines [ "#+TODO: WAITING | DONE CANCELLED "
+                      , "* WAITING compile"
+                      , "* CANCELLED lunch"
+                      , "* DONE todo-feature"
+                      ] =?>
+              let waiting = spanWith ("", ["todo", "WAITING"], []) "WAITING"
+                  cancelled = spanWith ("", ["done", "CANCELLED"], []) "CANCELLED"
+                  done = spanWith ("", ["done", "DONE"], []) "DONE"
+              in headerWith ("compile", [], []) 1 (waiting <> space <> "compile")
+              <> headerWith ("lunch", [], []) 1 (cancelled <> space <> "lunch")
+              <> headerWith ("todo-feature", [], []) 1 (done <> space <> "todo-feature")
+          ]
 
-      , "Tree with :noexport:" =:
-          unlines [ "* Should be ignored :archive:noexport:old:"
-                  , "** Old stuff"
-                  , "   This is not going to be exported"
-                  ] =?>
-          (mempty::Blocks)
+        , "Tagged headers" =:
+            unlines [ "* Personal       :PERSONAL:"
+                    , "** Call Mom      :@PHONE:"
+                    , "** Call John     :@PHONE:JOHN: "
+                    ] =?>
+            let tagSpan t = spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
+            in mconcat [ headerWith ("personal", [], [])
+                                    1
+                                    ("Personal" <> tagSpan "PERSONAL")
+                       , headerWith ("call-mom", [], [])
+                                    2
+                                    ("Call Mom" <> tagSpan "@PHONE")
+                       , headerWith ("call-john", [], [])
+                                    2
+                                    ("Call John" <> tagSpan "@PHONE" <> tagSpan "JOHN")
+                       ]
 
-      , "Subtree with :noexport:" =:
-          unlines [ "* Exported"
-                  , "** This isn't exported :noexport:"
-                  , "*** This neither"
-                  , "** But this is"
-                  ] =?>
-          mconcat [ headerWith ("exported", [], []) 1 "Exported"
-                  , headerWith ("but-this-is", [], []) 2 "But this is"
-                  ]
+        , "Untagged header containing colons" =:
+            "* This: is not: tagged" =?>
+            headerWith ("this-is-not-tagged", [], []) 1 "This: is not: tagged"
 
-      , "Preferences are treated as header attributes" =:
-          unlines [ "* foo"
-                  , "  :PROPERTIES:"
-                  , "  :custom_id: fubar"
-                  , "  :bar: baz"
-                  , "  :END:"
-                  ] =?>
-          headerWith ("fubar", [], [("bar", "baz")]) 1 "foo"
+        , "Header starting with strokeout text" =:
+            unlines [ "foo"
+                    , ""
+                    , "* +thing+ other thing"
+                    ] =?>
+            mconcat [ para "foo"
+                    , headerWith ("thing-other-thing", [], [])
+                                 1
+                                 ((strikeout "thing") <> " other thing")
+                    ]
+
+        , "Comment Trees" =:
+            unlines [ "* COMMENT A comment tree"
+                    , "  Not much going on here"
+                    , "** This will be dropped"
+                    , "* Comment tree above"
+                    ] =?>
+            headerWith ("comment-tree-above", [], []) 1 "Comment tree above"
+
+        , "Nothing but a COMMENT header" =:
+            "* COMMENT Test" =?>
+            (mempty::Blocks)
+
+        , "Tree with :noexport:" =:
+            unlines [ "* Should be ignored :archive:noexport:old:"
+                    , "** Old stuff"
+                    , "   This is not going to be exported"
+                    ] =?>
+            (mempty::Blocks)
+
+        , "Subtree with :noexport:" =:
+            unlines [ "* Exported"
+                    , "** This isn't exported :noexport:"
+                    , "*** This neither"
+                    , "** But this is"
+                    ] =?>
+            mconcat [ headerWith ("exported", [], []) 1 "Exported"
+                    , headerWith ("but-this-is", [], []) 2 "But this is"
+                    ]
+
+        , "Preferences are treated as header attributes" =:
+            unlines [ "* foo"
+                    , "  :PROPERTIES:"
+                    , "  :custom_id: fubar"
+                    , "  :bar: baz"
+                    , "  :END:"
+                    ] =?>
+            headerWith ("fubar", [], [("bar", "baz")]) 1 "foo"
 
 
-      , "Headers marked with a unnumbered property get a class of the same name" =:
-          unlines [ "* Not numbered"
-                  , "  :PROPERTIES:"
-                  , "  :UNNUMBERED: t"
-                  , "  :END:"
-                  ] =?>
-          headerWith ("not-numbered", ["unnumbered"], []) 1 "Not numbered"
-
+        , "Headers marked with a unnumbered property get a class of the same name" =:
+            unlines [ "* Not numbered"
+                    , "  :PROPERTIES:"
+                    , "  :UNNUMBERED: t"
+                    , "  :END:"
+                    ] =?>
+            headerWith ("not-numbered", ["unnumbered"], []) 1 "Not numbered"
+      ]
       , "Paragraph starting with an asterisk" =:
           "*five" =?>
           para "*five"