Added gfm (GitHub-flavored CommonMark) as an input and output format.

This uses bindings to GitHub's fork of cmark, so it should parse
gfm exactly as GitHub does (excepting certain postprocessing
steps, involving notifications, emojis, etc.).

* Added Text.Pandoc.Readers.GFM (exporting readGFM)
* Added Text.Pandoc.Writers.GFM (exporting writeGFM)
* Added `gfm` as input and output forma

Note that tables are currently always rendered as HTML
in the writer; this can be improved when CMarkGFM supports
tables in output.
This commit is contained in:
John MacFarlane 2017-08-03 10:50:05 -07:00
parent 8995281691
commit 2c81c4c218
7 changed files with 386 additions and 2 deletions

View file

@ -321,6 +321,7 @@ Library
JuicyPixels >= 3.1.6.1 && < 3.3,
Glob >= 0.7 && < 0.8,
cmark >= 0.5 && < 0.6,
cmark-gfm >= 0.1.1 && < 0.2,
doctemplates >= 0.1 && < 0.2,
http-client >= 0.4.30 && < 0.6,
http-client-tls >= 0.2.4 && < 0.4,
@ -370,6 +371,7 @@ Library
Text.Pandoc.Readers.LaTeX.Types,
Text.Pandoc.Readers.Markdown,
Text.Pandoc.Readers.CommonMark,
Text.Pandoc.Readers.GFM,
Text.Pandoc.Readers.MediaWiki,
Text.Pandoc.Readers.Vimwiki,
Text.Pandoc.Readers.RST,
@ -401,6 +403,7 @@ Library
Text.Pandoc.Writers.Ms,
Text.Pandoc.Writers.Markdown,
Text.Pandoc.Writers.CommonMark,
Text.Pandoc.Writers.GFM,
Text.Pandoc.Writers.Haddock,
Text.Pandoc.Writers.RST,
Text.Pandoc.Writers.Org,

View file

@ -45,6 +45,7 @@ module Text.Pandoc.Readers
, readOdt
, readMarkdown
, readCommonMark
, readGFM
, readMediaWiki
, readVimwiki
, readRST
@ -76,6 +77,7 @@ import Text.Pandoc.Error
import Text.Pandoc.Extensions
import Text.Pandoc.Options
import Text.Pandoc.Readers.CommonMark
import Text.Pandoc.Readers.GFM
import Text.Pandoc.Readers.DocBook
import Text.Pandoc.Readers.Docx
import Text.Pandoc.Readers.EPUB
@ -117,6 +119,7 @@ readers = [ ("native" , TextReader readNative)
,("markdown_github" , TextReader readMarkdown)
,("markdown_mmd", TextReader readMarkdown)
,("commonmark" , TextReader readCommonMark)
,("gfm" , TextReader readGFM)
,("rst" , TextReader readRST)
,("mediawiki" , TextReader readMediaWiki)
,("vimwiki" , TextReader readVimwiki)

View file

@ -44,8 +44,8 @@ readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readCommonMark opts s = return $
nodeToPandoc $ commonmarkToNode opts' s
where opts' = if extensionEnabled Ext_smart (readerExtensions opts)
then [optNormalize, optSmart]
else [optNormalize]
then [optSmart]
else []
nodeToPandoc :: Node -> Pandoc
nodeToPandoc (Node _ DOCUMENT nodes) =

View file

@ -0,0 +1,185 @@
{-
Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Readers.GFM
Copyright : Copyright (C) 2017 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of GitHub flavored CommonMark text to 'Pandoc' document.
CommonMark is a strongly specified variant of Markdown: http://commonmark.org.
-}
module Text.Pandoc.Readers.GFM (readGFM)
where
import CMarkGFM
import Data.List (groupBy)
import Data.Text (Text, unpack)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
readGFM :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readGFM opts s = return $
nodeToPandoc $ commonmarkToNode opts' exts s
where opts' = [optSmart | enabled Ext_smart]
exts = [extStrikethrough, extTable, extAutolink]
enabled x = extensionEnabled x (readerExtensions opts)
nodeToPandoc :: Node -> Pandoc
nodeToPandoc (Node _ DOCUMENT nodes) =
Pandoc nullMeta $ foldr addBlock [] nodes
nodeToPandoc n = -- shouldn't happen
Pandoc nullMeta $ foldr addBlock [] [n]
addBlocks :: [Node] -> [Block]
addBlocks = foldr addBlock []
addBlock :: Node -> [Block] -> [Block]
addBlock (Node _ PARAGRAPH nodes) =
(Para (addInlines nodes) :)
addBlock (Node _ THEMATIC_BREAK _) =
(HorizontalRule :)
addBlock (Node _ BLOCK_QUOTE nodes) =
(BlockQuote (addBlocks nodes) :)
addBlock (Node _ (HTML_BLOCK t) _) =
(RawBlock (Format "html") (unpack t) :)
-- Note: the cmark parser will never generate CUSTOM_BLOCK,
-- so we don't need to handle it:
addBlock (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) =
id
addBlock (Node _ (CODE_BLOCK info t) _) =
(CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :)
addBlock (Node _ (HEADING lev) nodes) =
(Header lev ("",[],[]) (addInlines nodes) :)
addBlock (Node _ (LIST listAttrs) nodes) =
(constructor (map (setTightness . addBlocks . children) nodes) :)
where constructor = case listType listAttrs of
BULLET_LIST -> BulletList
ORDERED_LIST -> OrderedList
(start, DefaultStyle, delim)
start = listStart listAttrs
setTightness = if listTight listAttrs
then map paraToPlain
else id
paraToPlain (Para xs) = Plain (xs)
paraToPlain x = x
delim = case listDelim listAttrs of
PERIOD_DELIM -> Period
PAREN_DELIM -> OneParen
addBlock (Node _ (TABLE alignments) nodes) = do
(Table [] aligns widths headers rows :)
where aligns = map fromTableCellAlignment alignments
fromTableCellAlignment NoAlignment = AlignDefault
fromTableCellAlignment LeftAligned = AlignLeft
fromTableCellAlignment RightAligned = AlignRight
fromTableCellAlignment CenterAligned = AlignCenter
widths = replicate numcols 0.0
numcols = if null rows'
then 0
else maximum $ map length rows'
rows' = map toRow $ filter isRow nodes
(headers, rows) = case rows' of
(h:rs) -> (h, rs)
[] -> ([], [])
isRow (Node _ TABLE_ROW _) = True
isRow _ = False
isCell (Node _ TABLE_CELL _) = True
isCell _ = False
toRow (Node _ TABLE_ROW ns) = map toCell $ filter isCell ns
toRow (Node _ t _) = error $ "toRow encountered non-row " ++ show t
toCell (Node _ TABLE_CELL []) = []
toCell (Node _ TABLE_CELL (n:ns))
| isBlockNode n = addBlocks (n:ns)
| otherwise = [Plain (addInlines (n:ns))]
toCell (Node _ t _) = error $ "toCell encountered non-cell " ++ show t
addBlock (Node _ TABLE_ROW _) = id -- handled in TABLE
addBlock (Node _ TABLE_CELL _) = id -- handled in TABLE
addBlock _ = id
isBlockNode :: Node -> Bool
isBlockNode (Node _ nodetype _) =
case nodetype of
DOCUMENT -> True
THEMATIC_BREAK -> True
PARAGRAPH -> True
BLOCK_QUOTE -> True
HTML_BLOCK _ -> True
CUSTOM_BLOCK _ _ -> True
CODE_BLOCK _ _ -> True
HEADING _ -> True
LIST _ -> True
ITEM -> True
TEXT _ -> False
SOFTBREAK -> False
LINEBREAK -> False
HTML_INLINE _ -> False
CUSTOM_INLINE _ _ -> False
CODE _ -> False
EMPH -> False
STRONG -> False
LINK _ _ -> False
IMAGE _ _ -> False
STRIKETHROUGH -> False
TABLE _ -> False
TABLE_ROW -> False
TABLE_CELL -> False
children :: Node -> [Node]
children (Node _ _ ns) = ns
addInlines :: [Node] -> [Inline]
addInlines = foldr addInline []
addInline :: Node -> [Inline] -> [Inline]
addInline (Node _ (TEXT t) _) = (map toinl clumps ++)
where raw = unpack t
clumps = groupBy samekind raw
samekind ' ' ' ' = True
samekind ' ' _ = False
samekind _ ' ' = False
samekind _ _ = True
toinl (' ':_) = Space
toinl xs = Str xs
addInline (Node _ LINEBREAK _) = (LineBreak :)
addInline (Node _ SOFTBREAK _) = (SoftBreak :)
addInline (Node _ (HTML_INLINE t) _) =
(RawInline (Format "html") (unpack t) :)
-- Note: the cmark parser will never generate CUSTOM_BLOCK,
-- so we don't need to handle it:
addInline (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) =
id
addInline (Node _ (CODE t) _) =
(Code ("",[],[]) (unpack t) :)
addInline (Node _ EMPH nodes) =
(Emph (addInlines nodes) :)
addInline (Node _ STRONG nodes) =
(Strong (addInlines nodes) :)
addInline (Node _ STRIKETHROUGH nodes) =
(Strikeout (addInlines nodes) :)
addInline (Node _ (LINK url title) nodes) =
(Link nullAttr (addInlines nodes) (unpack url, unpack title) :)
addInline (Node _ (IMAGE url title) nodes) =
(Image nullAttr (addInlines nodes) (unpack url, unpack title) :)
addInline _ = id

View file

@ -38,6 +38,7 @@ module Text.Pandoc.Writers
, writeAsciiDoc
, writeBeamer
, writeCommonMark
, writeGFM
, writeConTeXt
, writeCustom
, writeDZSlides
@ -89,6 +90,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Writers.AsciiDoc
import Text.Pandoc.Writers.CommonMark
import Text.Pandoc.Writers.GFM
import Text.Pandoc.Writers.ConTeXt
import Text.Pandoc.Writers.Custom
import Text.Pandoc.Writers.Docbook
@ -172,6 +174,7 @@ writers = [
,("asciidoc" , TextWriter writeAsciiDoc)
,("haddock" , TextWriter writeHaddock)
,("commonmark" , TextWriter writeCommonMark)
,("gfm" , TextWriter writeGFM)
,("tei" , TextWriter writeTEI)
,("muse" , TextWriter writeMuse)
]

View file

@ -0,0 +1,189 @@
{-
Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Writers.GFM
Copyright : Copyright (C) 2017 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of 'Pandoc' documents to CommonMark.
CommonMark: <http://commonmark.org>
-}
module Text.Pandoc.Writers.GFM (writeGFM) where
import CMarkGFM
import Control.Monad.State.Strict (State, get, modify, runState)
import Data.Foldable (foldrM)
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Shared (isTightList, linesToPara)
import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Walk (walkM)
import Text.Pandoc.Writers.HTML (writeHtml5String)
import Text.Pandoc.Writers.Shared
-- | Convert Pandoc to GitHub flavored CommonMark.
writeGFM :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeGFM opts (Pandoc meta blocks) = do
let (blocks', notes) = runState (walkM processNotes blocks) []
notes' = if null notes
then []
else [OrderedList (1, Decimal, Period) $ reverse notes]
main <- blocksToCommonMark opts (blocks' ++ notes')
metadata <- metaToJSON opts
(blocksToCommonMark opts)
(inlinesToCommonMark opts)
meta
let context = defField "body" main $ metadata
case writerTemplate opts of
Nothing -> return main
Just tpl -> renderTemplate' tpl context
processNotes :: Inline -> State [[Block]] Inline
processNotes (Note bs) = do
modify (bs :)
notes <- get
return $ Str $ "[" ++ show (length notes) ++ "]"
processNotes x = return x
node :: NodeType -> [Node] -> Node
node = Node Nothing
blocksToCommonMark :: PandocMonad m => WriterOptions -> [Block] -> m Text
blocksToCommonMark opts bs = do
let cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
nodes <- blocksToNodes bs
return $
nodeToCommonmark cmarkOpts colwidth $
node DOCUMENT nodes
inlinesToCommonMark :: PandocMonad m => WriterOptions -> [Inline] -> m Text
inlinesToCommonMark opts ils = return $
nodeToCommonmark cmarkOpts colwidth $ node PARAGRAPH (inlinesToNodes ils)
where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
blocksToNodes :: PandocMonad m => [Block] -> m [Node]
blocksToNodes = foldrM blockToNodes []
blockToNodes :: PandocMonad m => Block -> [Node] -> m [Node]
blockToNodes (Plain xs) ns = return (node PARAGRAPH (inlinesToNodes xs) : ns)
blockToNodes (Para xs) ns = return (node PARAGRAPH (inlinesToNodes xs) : ns)
blockToNodes (LineBlock lns) ns = blockToNodes (linesToPara lns) ns
blockToNodes (CodeBlock (_,classes,_) xs) ns = return $
(node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] : ns)
blockToNodes (RawBlock fmt xs) ns
| fmt == Format "html" = return (node (HTML_BLOCK (T.pack xs)) [] : ns)
| otherwise = return (node (CUSTOM_BLOCK (T.pack xs) (T.empty)) [] : ns)
blockToNodes (BlockQuote bs) ns = do
nodes <- blocksToNodes bs
return (node BLOCK_QUOTE nodes : ns)
blockToNodes (BulletList items) ns = do
nodes <- mapM blocksToNodes items
return (node (LIST ListAttributes{
listType = BULLET_LIST,
listDelim = PERIOD_DELIM,
listTight = isTightList items,
listStart = 1 }) (map (node ITEM) nodes) : ns)
blockToNodes (OrderedList (start, _sty, delim) items) ns = do
nodes <- mapM blocksToNodes items
return (node (LIST ListAttributes{
listType = ORDERED_LIST,
listDelim = case delim of
OneParen -> PAREN_DELIM
TwoParens -> PAREN_DELIM
_ -> PERIOD_DELIM,
listTight = isTightList items,
listStart = start }) (map (node ITEM) nodes) : ns)
blockToNodes HorizontalRule ns = return (node THEMATIC_BREAK [] : ns)
blockToNodes (Header lev _ ils) ns = return (node (HEADING lev) (inlinesToNodes ils) : ns)
blockToNodes (Div _ bs) ns = do
nodes <- blocksToNodes bs
return (nodes ++ ns)
blockToNodes (DefinitionList items) ns = blockToNodes (BulletList items') ns
where items' = map dlToBullet items
dlToBullet (term, ((Para xs : ys) : zs)) =
Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs
dlToBullet (term, ((Plain xs : ys) : zs)) =
Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs
dlToBullet (term, xs) =
Para term : concat xs
blockToNodes t@(Table _ _ _ _ _) ns = do
s <- writeHtml5String def $! Pandoc nullMeta [t]
return (node (HTML_BLOCK s) [] : ns)
blockToNodes Null ns = return ns
inlinesToNodes :: [Inline] -> [Node]
inlinesToNodes = foldr inlineToNodes []
inlineToNodes :: Inline -> [Node] -> [Node]
inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :)
inlineToNodes Space = (node (TEXT (T.pack " ")) [] :)
inlineToNodes LineBreak = (node LINEBREAK [] :)
inlineToNodes SoftBreak = (node SOFTBREAK [] :)
inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :)
inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :)
inlineToNodes (Strikeout xs) =
((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes xs ++
[node (HTML_INLINE (T.pack "</s>")) []]) ++ )
inlineToNodes (Superscript xs) =
((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes xs ++
[node (HTML_INLINE (T.pack "</sup>")) []]) ++ )
inlineToNodes (Subscript xs) =
((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes xs ++
[node (HTML_INLINE (T.pack "</sub>")) []]) ++ )
inlineToNodes (SmallCaps xs) =
((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) []
: inlinesToNodes xs ++
[node (HTML_INLINE (T.pack "</span>")) []]) ++ )
inlineToNodes (Link _ ils (url,tit)) =
(node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
inlineToNodes (Image _ ils (url,tit)) =
(node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
inlineToNodes (RawInline fmt xs)
| fmt == Format "html" = (node (HTML_INLINE (T.pack xs)) [] :)
| otherwise = (node (CUSTOM_INLINE (T.pack xs) (T.empty)) [] :)
inlineToNodes (Quoted qt ils) =
((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++)
where (start, end) = case qt of
SingleQuote -> (T.pack "", T.pack "")
DoubleQuote -> (T.pack "", T.pack "")
inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :)
inlineToNodes (Math mt str) =
case mt of
InlineMath ->
(node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :)
DisplayMath ->
(node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :)
inlineToNodes (Span _ ils) = (inlinesToNodes ils ++)
inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++)
inlineToNodes (Note _) = id -- should not occur
-- we remove Note elements in preprocessing

View file

@ -11,4 +11,5 @@ extra-deps:
- skylighting-0.3.3
- texmath-0.9.4.1
- cmark-0.5.6
- cmark-gfm-0.1.1
resolver: lts-8.16