From f1d83aea12b93b31f5218bed75bd0e9d8d373cb6 Mon Sep 17 00:00:00 2001
From: Mauro Bieg <mb21@users.noreply.github.com>
Date: Wed, 2 Jan 2019 20:36:37 +0100
Subject: [PATCH] Implement task lists (#5139)

Closes #3051
---
 MANUAL.txt                            |  11 ++-
 src/Text/Pandoc/Extensions.hs         |   3 +
 src/Text/Pandoc/Readers/CommonMark.hs |   7 +-
 src/Text/Pandoc/Readers/Markdown.hs   |   3 +-
 src/Text/Pandoc/Shared.hs             |  32 ++++++++
 src/Text/Pandoc/Writers/CommonMark.hs |  25 ++++--
 src/Text/Pandoc/Writers/HTML.hs       |  22 ++++-
 src/Text/Pandoc/Writers/LaTeX.hs      |  16 +++-
 src/Text/Pandoc/Writers/Markdown.hs   |   6 +-
 test/command/gfm.md                   |  29 +++++++
 test/command/tasklist.md              | 113 ++++++++++++++++++++++++++
 11 files changed, 247 insertions(+), 20 deletions(-)
 create mode 100644 test/command/tasklist.md

diff --git a/MANUAL.txt b/MANUAL.txt
index 9c1f2f9a2..5c7e13e41 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -2612,6 +2612,13 @@ If default list markers are desired, use `#.`:
     #.  two
     #.  three
 
+#### Extension: `task_lists` ####
+
+Pandoc supports task lists, using the syntax of GitHub-Flavored Markdown.
+
+    - [ ] an unchecked task list item
+    - [x] checked item
+
 
 ### Definition lists ###
 
@@ -4223,7 +4230,7 @@ variants are supported:
 :   `pipe_tables`, `raw_html`, `fenced_code_blocks`, `auto_identifiers`,
     `gfm_auto_identifiers`, `backtick_code_blocks`,
     `autolink_bare_uris`, `space_in_atx_header`,
-    `intraword_underscores`, `strikeout`, `emoji`,
+    `intraword_underscores`, `strikeout`, `task_lists`, `emoji`,
     `shortcut_reference_links`, `angle_brackets_escapable`,
     `lists_without_preceding_blankline`.
 
@@ -4254,7 +4261,7 @@ only affects `gfm` output, not input.
 :   `pipe_tables`, `raw_html`, `fenced_code_blocks`, `auto_identifiers`,
     `gfm_auto_identifiers`, `backtick_code_blocks`,
     `autolink_bare_uris`, `space_in_atx_header`,
-    `intraword_underscores`, `strikeout`, `emoji`,
+    `intraword_underscores`, `strikeout`, `task_lists`, `emoji`,
     `shortcut_reference_links`, `angle_brackets_escapable`,
     `lists_without_preceding_blankline`.
 
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index f2599ed6d..f660cf766 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -167,6 +167,7 @@ data Extension =
     | Ext_subscript           -- ^ Subscript using ~this~ syntax
     | Ext_superscript         -- ^ Superscript using ^this^ syntax
     | Ext_styles              -- ^ Read styles that pandoc doesn't know
+    | Ext_task_lists          -- ^ Parse certain list items as task list items
     | Ext_table_captions      -- ^ Pandoc-style table captions
     | Ext_tex_math_dollars    -- ^ TeX math between $..$ or $$..$$
     | Ext_tex_math_double_backslash  -- ^ TeX math btw \\(..\\) \\[..\\]
@@ -215,6 +216,7 @@ pandocExtensions = extensionsFromList
   , Ext_strikeout
   , Ext_superscript
   , Ext_subscript
+  , Ext_task_lists
   , Ext_auto_identifiers
   , Ext_header_attributes
   , Ext_link_attributes
@@ -274,6 +276,7 @@ githubMarkdownExtensions = extensionsFromList
   , Ext_space_in_atx_header
   , Ext_intraword_underscores
   , Ext_strikeout
+  , Ext_task_lists
   , Ext_emoji
   , Ext_lists_without_preceding_blankline
   , Ext_shortcut_reference_links
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 3cc75e2a1..0a3f5e51d 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -43,7 +43,7 @@ import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Emoji (emojiToInline)
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (uniqueIdent)
+import Text.Pandoc.Shared (uniqueIdent, taskListItemFromAscii)
 import Text.Pandoc.Walk (walkM)
 
 -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
@@ -111,12 +111,14 @@ addBlock _ (Node _ (CODE_BLOCK info t) _) =
 addBlock opts (Node _ (HEADING lev) nodes) =
   (Header lev ("",[],[]) (addInlines opts nodes) :)
 addBlock opts (Node _ (LIST listAttrs) nodes) =
-  (constructor (map (setTightness . addBlocks opts . children) nodes) :)
+  (constructor (map listItem nodes) :)
   where constructor = case listType listAttrs of
                        BULLET_LIST  -> BulletList
                        ORDERED_LIST -> OrderedList
                                          (start, DefaultStyle, delim)
         start = listStart listAttrs
+        listItem = taskListItemFromAscii exts . setTightness
+                     . addBlocks opts . children
         setTightness = if listTight listAttrs
                            then map paraToPlain
                            else id
@@ -125,6 +127,7 @@ addBlock opts (Node _ (LIST listAttrs) nodes) =
         delim = case listDelim listAttrs of
                      PERIOD_DELIM -> Period
                      PAREN_DELIM  -> OneParen
+        exts = readerExtensions opts
 addBlock opts (Node _ (TABLE alignments) nodes) =
   (Table [] aligns widths headers rows :)
   where aligns = map fromTableCellAlignment alignments
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 94d1157a6..dd1bedc91 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -958,7 +958,8 @@ listItem fourSpaceRule start = try $ do
   let raw = concat (first:continuations)
   contents <- parseFromString' parseBlocks raw
   updateState (\st -> st {stateParserContext = oldContext})
-  return contents
+  exts <- getOption readerExtensions
+  return $ B.fromList . taskListItemFromAscii exts . B.toList <$> contents
 
 orderedList :: PandocMonad m => MarkdownParser m (F Blocks)
 orderedList = try $ do
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 9fa083c11..4efdbba61 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -79,6 +79,8 @@ module Text.Pandoc.Shared (
                      headerShift,
                      stripEmptyParagraphs,
                      isTightList,
+                     taskListItemFromAscii,
+                     taskListItemToAscii,
                      addMetaField,
                      makeMeta,
                      eastAsianLineBreakFilter,
@@ -588,6 +590,36 @@ isTightList = all firstIsPlain
   where firstIsPlain (Plain _ : _) = True
         firstIsPlain _             = False
 
+-- | Convert a list item containing tasklist syntax (e.g. @[x]@)
+-- to using @U+2610 BALLOT BOX@ or @U+2612 BALLOT BOX WITH X@.
+taskListItemFromAscii :: Extensions -> [Block] -> [Block]
+taskListItemFromAscii = handleTaskListItem fromMd
+  where
+    fromMd (Str "[" : Space : Str "]" : Space : is) = (Str "☐") : Space : is
+    fromMd (Str "[x]"                 : Space : is) = (Str "☒") : Space : is
+    fromMd (Str "[X]"                 : Space : is) = (Str "☒") : Space : is
+    fromMd is = is
+
+-- | Convert a list item containing text starting with @U+2610 BALLOT BOX@
+-- or @U+2612 BALLOT BOX WITH X@ to tasklist syntax (e.g. @[x]@).
+taskListItemToAscii :: Extensions -> [Block] -> [Block]
+taskListItemToAscii = handleTaskListItem toMd
+  where
+    toMd (Str "☐" : Space : is) = rawMd "[ ]" : Space : is
+    toMd (Str "☒" : Space : is) = rawMd "[x]" : Space : is
+    toMd is = is
+    rawMd = RawInline (Format "markdown")
+
+handleTaskListItem :: ([Inline] -> [Inline]) -> Extensions -> [Block] -> [Block]
+handleTaskListItem handleInlines exts bls =
+  if Ext_task_lists `extensionEnabled` exts
+  then handleItem bls
+  else bls
+  where
+    handleItem (Plain is : bs) = Plain (handleInlines is) : bs
+    handleItem (Para is  : bs) = Para  (handleInlines is) : bs
+    handleItem bs = bs
+
 -- | Set a field of a 'Meta' object.  If the field already has a value,
 -- convert it into a list with the new value appended to the old value(s).
 addMetaField :: ToMetaValue a
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
index 6299b0263..e28fa71a9 100644
--- a/src/Text/Pandoc/Writers/CommonMark.hs
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -46,7 +46,8 @@ import Network.HTTP (urlEncode)
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (isTightList, linesToPara, substitute, capitalize)
+import Text.Pandoc.Shared (isTightList, taskListItemToAscii, linesToPara,
+                           substitute, capitalize)
 import Text.Pandoc.Templates (renderTemplate')
 import Text.Pandoc.Walk (query, walk, walkM)
 import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
@@ -115,24 +116,28 @@ blockToNodes opts (Para xs) ns =
 blockToNodes opts (LineBlock lns) ns = blockToNodes opts (linesToPara lns) ns
 blockToNodes _ (CodeBlock (_,classes,_) xs) ns = return
   (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] : ns)
-blockToNodes opts (RawBlock fmt xs) ns
-  | fmt == Format "html" && isEnabled Ext_raw_html opts
+blockToNodes opts (RawBlock (Format f) xs) ns
+  | f == "html" && isEnabled Ext_raw_html opts
               = return (node (HTML_BLOCK (T.pack xs)) [] : ns)
-  | (fmt == Format "latex" || fmt == Format "tex") && isEnabled Ext_raw_tex opts
+  | (f == "latex" || f == "tex") && isEnabled Ext_raw_tex opts
+              = return (node (CUSTOM_BLOCK (T.pack xs) T.empty) [] : ns)
+  | f == "markdown"
               = return (node (CUSTOM_BLOCK (T.pack xs) T.empty) [] : ns)
   | otherwise = return ns
 blockToNodes opts (BlockQuote bs) ns = do
   nodes <- blocksToNodes opts bs
   return (node BLOCK_QUOTE nodes : ns)
 blockToNodes opts (BulletList items) ns = do
-  nodes <- mapM (blocksToNodes opts) items
+  let exts = writerExtensions opts
+  nodes <- mapM (blocksToNodes opts . taskListItemToAscii exts) items
   return (node (LIST ListAttributes{
                    listType = BULLET_LIST,
                    listDelim = PERIOD_DELIM,
                    listTight = isTightList items,
                    listStart = 1 }) (map (node ITEM) nodes) : ns)
 blockToNodes opts (OrderedList (start, _sty, delim) items) ns = do
-  nodes <- mapM (blocksToNodes opts) items
+  let exts = writerExtensions opts
+  nodes <- mapM (blocksToNodes opts . taskListItemToAscii exts) items
   return (node (LIST ListAttributes{
                    listType = ORDERED_LIST,
                    listDelim = case delim of
@@ -292,10 +297,12 @@ inlineToNodes opts (Image alt ils (url,'f':'i':'g':':':tit)) =
   inlineToNodes opts (Image alt ils (url,tit))
 inlineToNodes opts (Image _ ils (url,tit)) =
   (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :)
-inlineToNodes opts (RawInline fmt xs)
-  | fmt == Format "html" && isEnabled Ext_raw_html opts
+inlineToNodes opts (RawInline (Format f) xs)
+  | f == "html" && isEnabled Ext_raw_html opts
               = (node (HTML_INLINE (T.pack xs)) [] :)
-  | (fmt == Format "latex" || fmt == Format "tex") && isEnabled Ext_raw_tex opts
+  | (f == "latex" || f == "tex") && isEnabled Ext_raw_tex opts
+              = (node (CUSTOM_INLINE (T.pack xs) T.empty) [] :)
+  | f == "markdown"
               = (node (CUSTOM_INLINE (T.pack xs) T.empty) [] :)
   | otherwise = id
 inlineToNodes opts (Quoted qt ils) =
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 8cdadca5b..98b86a7c9 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -365,6 +365,24 @@ defList :: PandocMonad m
         => WriterOptions -> [Html] -> StateT WriterState m Html
 defList opts items = toList H.dl opts (items ++ [nl opts])
 
+listItemToHtml :: PandocMonad m
+               => WriterOptions -> [Block] -> StateT WriterState m Html
+listItemToHtml opts bls
+  | Plain (Str "☐":Space:is) : bs <- bls = taskListItem False id  is bs
+  | Plain (Str "☒":Space:is) : bs <- bls = taskListItem True  id  is bs
+  | Para  (Str "☐":Space:is) : bs <- bls = taskListItem False H.p is bs
+  | Para  (Str "☒":Space:is) : bs <- bls = taskListItem True  H.p is bs
+  | otherwise = blockListToHtml opts bls
+  where
+    taskListItem checked constr is bs = do
+      let checkbox  = if checked
+                      then checkbox' ! A.checked ""
+                      else checkbox'
+          checkbox' = H.input ! A.type_ "checkbox" ! A.disabled "" >> nl opts
+      isContents <- inlineListToHtml opts is
+      bsContents <- blockListToHtml opts bs
+      return $ constr (checkbox >> isContents) >> bsContents
+
 -- | Construct table of contents from list of elements.
 tableOfContents :: PandocMonad m => WriterOptions -> [Element]
                 -> StateT WriterState m (Maybe Html)
@@ -824,10 +842,10 @@ blockToHtml opts (Header level attr@(_,classes,_) lst) = do
               6 -> H.h6 contents'
               _ -> H.p contents'
 blockToHtml opts (BulletList lst) = do
-  contents <- mapM (blockListToHtml opts) lst
+  contents <- mapM (listItemToHtml opts) lst
   unordList opts contents
 blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
-  contents <- mapM (blockListToHtml opts) lst
+  contents <- mapM (listItemToHtml opts) lst
   html5 <- gets stHtml5
   let numstyle' = case numstyle of
                        Example -> "decimal"
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index f9bee886e..7441152a6 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -924,8 +924,20 @@ listItemToLaTeX lst
   -- this will keep the typesetter from throwing an error.
   | (Header{} :_) <- lst =
     blockListToLaTeX lst >>= return . (text "\\item ~" $$) . nest 2
-  | otherwise = blockListToLaTeX lst >>= return .  (text "\\item" $$) .
-                      nest 2
+  | Plain (Str "☐":Space:is) : bs <- lst = taskListItem False is bs
+  | Plain (Str "☒":Space:is) : bs <- lst = taskListItem True  is bs
+  | Para  (Str "☐":Space:is) : bs <- lst = taskListItem False is bs
+  | Para  (Str "☒":Space:is) : bs <- lst = taskListItem True  is bs
+  | otherwise = blockListToLaTeX lst >>= return . (text "\\item" $$) . nest 2
+  where
+    taskListItem checked is bs = do
+      let checkbox  = if checked
+                      then "$\\boxtimes$"
+                      else "$\\square$"
+      isContents <- inlineListToLaTeX is
+      bsContents <- blockListToLaTeX bs
+      return $ "\\item" <> brackets checkbox
+        $$ nest 2 (isContents $+$ bsContents)
 
 defListItemToLaTeX :: PandocMonad m => ([Inline], [[Block]]) -> LW m Doc
 defListItemToLaTeX (term, defs) = do
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index c0c6e8ebf..7babbe982 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -765,7 +765,8 @@ itemEndsWithTightList bs =
 -- | Convert bullet list item (list of blocks) to markdown.
 bulletListItemToMarkdown :: PandocMonad m => WriterOptions -> [Block] -> MD m Doc
 bulletListItemToMarkdown opts bs = do
-  contents <- blockListToMarkdown opts bs
+  let exts = writerExtensions opts
+  contents <- blockListToMarkdown opts $ taskListItemToAscii exts bs
   let sps = replicate (writerTabStop opts - 2) ' '
   let start = text ('-' : ' ' : sps)
   -- remove trailing blank line if item ends with a tight list
@@ -781,7 +782,8 @@ orderedListItemToMarkdown :: PandocMonad m
                           -> [Block]       -- ^ list item (list of blocks)
                           -> MD m Doc
 orderedListItemToMarkdown opts marker bs = do
-  contents <- blockListToMarkdown opts bs
+  let exts = writerExtensions opts
+  contents <- blockListToMarkdown opts $ taskListItemToAscii exts bs
   let sps = case length marker - writerTabStop opts of
                    n | n > 0 -> text $ replicate n ' '
                    _ -> text " "
diff --git a/test/command/gfm.md b/test/command/gfm.md
index 7a7098989..a4bb088b6 100644
--- a/test/command/gfm.md
+++ b/test/command/gfm.md
@@ -101,3 +101,32 @@ hi
 ^D
 [Para [Str "hi",LineBreak,Str "hi"]]
 ```
+
+```
+% pandoc -f gfm -t native
+- [ ] foo
+- [x] bar
+^D
+[BulletList
+ [[Plain [Str "\9744",Space,Str "foo"]]
+ ,[Plain [Str "\9746",Space,Str "bar"]]]]
+```
+
+```
+% pandoc -f gfm-task_lists -t native
+- [ ] foo
+- [x] bar
+^D
+[BulletList
+ [[Plain [Str "[",Space,Str "]",Space,Str "foo"]]
+ ,[Plain [Str "[x]",Space,Str "bar"]]]]
+```
+
+```
+% pandoc -f gfm -t gfm
+- [ ] foo
+- [x] bar
+^D
+  - [ ] foo
+  - [x] bar
+```
diff --git a/test/command/tasklist.md b/test/command/tasklist.md
new file mode 100644
index 000000000..5ff628e1c
--- /dev/null
+++ b/test/command/tasklist.md
@@ -0,0 +1,113 @@
+tests adapted from <https://github.github.com/gfm/#task-list-items-extension->
+
+```
+% pandoc
+- [ ] foo
+- [x] bar
+^D
+<ul>
+<li><input type="checkbox" disabled="" />
+foo</li>
+<li><input type="checkbox" disabled="" checked="" />
+bar</li>
+</ul>
+```
+
+
+```
+% pandoc
+- [x] foo
+  - [ ] bar
+  - [x] baz
+- [ ] bim
+^D
+<ul>
+<li><input type="checkbox" disabled="" checked="" />
+foo<ul>
+<li><input type="checkbox" disabled="" />
+bar</li>
+<li><input type="checkbox" disabled="" checked="" />
+baz</li>
+</ul></li>
+<li><input type="checkbox" disabled="" />
+bim</li>
+</ul>
+```
+
+
+custom html task list test:
+
+```
+% pandoc
+- [ ]  unchecked
+- plain item
+-  [x] checked
+
+paragraph
+
+1. [ ] ordered unchecked
+2. [] plain item
+3. [x] ordered checked
+
+paragraph
+
+- [ ] list item with a
+
+    second paragraph
+
+- [x] checked
+^D
+<ul>
+<li><input type="checkbox" disabled="" />
+unchecked</li>
+<li>plain item</li>
+<li><input type="checkbox" disabled="" checked="" />
+checked</li>
+</ul>
+<p>paragraph</p>
+<ol type="1">
+<li><input type="checkbox" disabled="" />
+ordered unchecked</li>
+<li>[] plain item</li>
+<li><input type="checkbox" disabled="" checked="" />
+ordered checked</li>
+</ol>
+<p>paragraph</p>
+<ul>
+<li><p><input type="checkbox" disabled="" />
+list item with a</p><p>second paragraph</p></li>
+<li><p><input type="checkbox" disabled="" checked="" />
+checked</p></li>
+</ul>
+```
+
+latex task list test:
+
+```
+% pandoc -t latex
+- [ ] foo bar
+
+  baz
+
+- [x] ok
+^D
+\begin{itemize}
+\item[$\square$]
+  foo bar
+
+  baz
+\item[$\boxtimes$]
+  ok
+\end{itemize}
+```
+
+round trip:
+
+```
+% pandoc -f markdown -t markdown
+- [ ] foo
+- [x] bar
+^D
+-   [ ] foo
+-   [x] bar
+```