Initial addition of groff ms writer.

* New module: Text.Pandoc.Writers.Ms.
* New template: default.ms.
* The writer uses texmath's new eqn writer to convert math
  to eqn format, so a ms file produced with this writer
  should be processed with `groff -ms -e` if it contains
  math.
This commit is contained in:
John MacFarlane 2017-03-15 21:04:14 +01:00
parent f4ac0edf2a
commit 6c204ea2bd
9 changed files with 1690 additions and 1 deletions

31
data/templates/default.ms Normal file
View file

@ -0,0 +1,31 @@
$if(adjusting)$
.ad $adjusting$
$endif$
$if(hyphenate)$
.hy
$else$
.nh \" Turn off hyphenation by default.
$endif$
$if(has-inline-math)$
.EQ
delim ||
.EN
$endif$
$if(title)$
.TL
$title$
$endif$
$for(author)$
.AU
$author$
$endfor$
$for(header-includes)$
$header-includes$
$endfor$
$for(include-before)$
$include-before$
$endfor$
$body$
$for(include-after)$
$include-after$
$endfor$

View file

@ -49,6 +49,7 @@ Data-Files:
data/templates/default.context
data/templates/default.texinfo
data/templates/default.man
data/templates/default.ms
data/templates/default.markdown
data/templates/default.muse
data/templates/default.commonmark
@ -161,6 +162,7 @@ Extra-Source-Files:
test/tables.html5
test/tables.latex
test/tables.man
test/tables.ms
test/tables.plain
test/tables.markdown
test/tables.mediawiki
@ -184,6 +186,7 @@ Extra-Source-Files:
test/writer.html4
test/writer.html5
test/writer.man
test/writer.ms
test/writer.markdown
test/writer.plain
test/writer.mediawiki
@ -280,7 +283,7 @@ Library
safe >= 0.3 && < 0.4,
zip-archive >= 0.2.3.4 && < 0.4,
HTTP >= 4000.0.5 && < 4000.4,
texmath >= 0.9.3 && < 0.10,
texmath >= 0.9.4 && < 0.10,
xml >= 1.3.12 && < 1.4,
random >= 1 && < 1.2,
extensible-exceptions >= 0.1 && < 0.2,
@ -382,6 +385,7 @@ Library
Text.Pandoc.Writers.OpenDocument,
Text.Pandoc.Writers.Texinfo,
Text.Pandoc.Writers.Man,
Text.Pandoc.Writers.Ms,
Text.Pandoc.Writers.Markdown,
Text.Pandoc.Writers.CommonMark,
Text.Pandoc.Writers.Haddock,

View file

@ -120,6 +120,7 @@ module Text.Pandoc
, writeOPML
, writeOpenDocument
, writeMan
, writeMs
, writeMediaWiki
, writeDokuWiki
, writeZimWiki
@ -190,6 +191,7 @@ import Text.Pandoc.Writers.HTML
import Text.Pandoc.Writers.ICML
import Text.Pandoc.Writers.LaTeX
import Text.Pandoc.Writers.Man
import Text.Pandoc.Writers.Ms
import Text.Pandoc.Writers.Markdown
import Text.Pandoc.Writers.MediaWiki
import Text.Pandoc.Writers.Muse
@ -292,6 +294,7 @@ writers = [
,("context" , StringWriter writeConTeXt)
,("texinfo" , StringWriter writeTexinfo)
,("man" , StringWriter writeMan)
,("ms" , StringWriter writeMs)
,("markdown" , StringWriter writeMarkdown)
,("markdown_strict" , StringWriter writeMarkdown)
,("markdown_phpextra" , StringWriter writeMarkdown)

View file

@ -691,6 +691,7 @@ defaultWriterName x =
".icml" -> "icml"
".tei.xml" -> "tei"
".tei" -> "tei"
".ms" -> "ms"
['.',y] | y `elem` ['1'..'9'] -> "man"
_ -> "html"

View file

@ -13,3 +13,4 @@ dataFiles = map (\(fp, contents) ->
dataFiles' :: [(FilePath, B.ByteString)]
dataFiles' = ("MANUAL.txt", %blob "MANUAL.txt") : %blobs "data"

View file

@ -0,0 +1,426 @@
{-
Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Writers.Ms
Copyright : Copyright (C) 2007-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of 'Pandoc' documents to groff man page format.
TODO:
[ ] warning for non-rendered raw content
[ ] is there a better way to do strikeout?
[ ] strong + em doesn't seem to work
[ ] super + subscript don't seem to work
[ ] options for hyperlink rendering (currently footnote)
[ ] avoid note-in-note (which we currently get easily with
links in footnotes)
[ ] can we get prettier output using .B, etc. instead of
the inline forms?
[ ] tight/loose list distinction
[ ] internal hyperlinks (this seems to be possible since
they exist in the groff manual PDF version)
[ ] use a custom macro for defn lists so they're configurable
[ ] better handling of accented characters and other non-ascii
characters (e.g. curly quotes) -- we shouldn't rely on a
utf8 compatible groff
[ ] avoid blank line after footnote marker when footnote has a
paragraph
[ ] add via groff option to PDF module
[ ] better handling of autolinks?
[ ] better handling of images, perhaps converting to eps when
going to PDF?
[ ] better template, with configurable page number, table of contents,
columns, etc.
-}
module Text.Pandoc.Writers.Ms ( writeMs ) where
import Text.Pandoc.Definition
import Text.Pandoc.Templates
import Text.Pandoc.Shared
import Text.Pandoc.Writers.Shared
import Text.Pandoc.Options
import Text.Pandoc.Writers.Math
import Text.Printf ( printf )
import Data.List ( stripPrefix, intersperse, intercalate )
import Data.Maybe (fromMaybe)
import Text.Pandoc.Pretty
import Text.Pandoc.Builder (deleteMeta)
import Text.Pandoc.Class (PandocMonad)
import Control.Monad.State
import Data.Char ( isDigit )
import Text.TeXMath (writeEqn)
data WriterState = WriterState { stHasInlineMath :: Bool
, stNotes :: [Note] }
type Note = [Block]
type MS = StateT WriterState
-- | Convert Pandoc to Ms.
writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m String
writeMs opts document =
evalStateT (pandocToMs opts document) (WriterState False [])
-- | Return groff man representation of document.
pandocToMs :: PandocMonad m => WriterOptions -> Pandoc -> MS m String
pandocToMs opts (Pandoc meta blocks) = do
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
let render' = render colwidth
titleText <- inlineListToMs' opts $ docTitle meta
let title' = render' titleText
let setFieldsFromTitle =
case break (== ' ') title' of
(cmdName, rest) -> case reverse cmdName of
(')':d:'(':xs) | isDigit d ->
defField "title" (reverse xs) .
defField "section" [d] .
case splitBy (=='|') rest of
(ft:hds) ->
defField "footer" (trim ft) .
defField "header"
(trim $ concat hds)
[] -> id
_ -> defField "title" title'
metadata <- metaToJSON opts
(fmap (render colwidth) . blockListToMs opts)
(fmap (render colwidth) . inlineListToMs' opts)
$ deleteMeta "title" meta
body <- blockListToMs opts blocks
let main = render' body
hasInlineMath <- gets stHasInlineMath
let context = defField "body" main
$ setFieldsFromTitle
$ defField "has-inline-math" hasInlineMath
$ defField "hyphenate" True
$ defField "pandoc-version" pandocVersion
$ metadata
case writerTemplate opts of
Nothing -> return main
Just tpl -> return $ renderTemplate' tpl context
-- | Association list of characters to escape.
manEscapes :: [(Char, String)]
manEscapes = [ ('\160', "\\ ")
, ('\'', "\\[aq]")
, ('', "'")
, ('\x2014', "\\[em]")
, ('\x2013', "\\[en]")
, ('\x2026', "\\&...")
, ('|', "\\[u007C]") -- because we use | for inline math
] ++ backslashEscapes "-@\\"
-- | Escape | character, used to mark inline math, inside math.
escapeBar :: String -> String
escapeBar = concatMap go
where go '|' = "\\[u007C]"
go c = [c]
-- | Escape special characters for Ms.
escapeString :: String -> String
escapeString = escapeStringUsing manEscapes
-- | Escape a literal (code) section for Ms.
escapeCode :: String -> String
escapeCode = concat . intersperse "\n" . map escapeLine . lines where
escapeLine codeline =
case escapeStringUsing (manEscapes ++ backslashEscapes "\t ") codeline of
a@('.':_) -> "\\&" ++ a
b -> b
-- We split inline lists into sentences, and print one sentence per
-- line. groff/troff treats the line-ending period differently.
-- See http://code.google.com/p/pandoc/issues/detail?id=148.
-- | Returns the first sentence in a list of inlines, and the rest.
breakSentence :: [Inline] -> ([Inline], [Inline])
breakSentence [] = ([],[])
breakSentence xs =
let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True
isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True
isSentenceEndInline (LineBreak) = True
isSentenceEndInline _ = False
(as, bs) = break isSentenceEndInline xs
in case bs of
[] -> (as, [])
[c] -> (as ++ [c], [])
(c:Space:cs) -> (as ++ [c], cs)
(c:SoftBreak:cs) -> (as ++ [c], cs)
(Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs)
(x@(Str ('.':')':_)):cs) -> (as ++ [x], cs)
(LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs)
(c:cs) -> (as ++ [c] ++ ds, es)
where (ds, es) = breakSentence cs
-- | Split a list of inlines into sentences.
splitSentences :: [Inline] -> [[Inline]]
splitSentences xs =
let (sent, rest) = breakSentence xs
in if null rest then [sent] else sent : splitSentences rest
blockToMs :: PandocMonad m
=> WriterOptions -- ^ Options
-> Block -- ^ Block element
-> MS m Doc
blockToMs _ Null = return empty
blockToMs opts (Div _ bs) = blockListToMs opts bs
blockToMs opts (Plain inlines) =
liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines
blockToMs opts (Para inlines) = do
contents <- liftM vcat $ mapM (inlineListToMs' opts) $
splitSentences inlines
return $ text ".LP" $$ contents
blockToMs _ (RawBlock f str)
| f == Format "man" = return $ text str
| otherwise = return empty
blockToMs _ HorizontalRule = return $ text ".PP" $$ text " * * * * *"
blockToMs opts (Header level _ inlines) = do
contents <- inlineListToMs' opts inlines
let heading = if writerNumberSections opts
then ".NH"
else ".SH"
return $ text heading <> space <> text (show level) $$ contents
blockToMs _ (CodeBlock _ str) = return $
text ".IP" $$
text ".nf" $$
text "\\f[C]" $$
text (escapeCode str) $$
text "\\f[]" $$
text ".fi"
blockToMs opts (LineBlock ls) = do
blockToMs opts $ Para $ intercalate [LineBreak] ls
blockToMs opts (BlockQuote blocks) = do
contents <- blockListToMs opts blocks
return $ text ".RS" $$ contents $$ text ".RE"
blockToMs opts (Table caption alignments widths headers rows) =
let aligncode AlignLeft = "l"
aligncode AlignRight = "r"
aligncode AlignCenter = "c"
aligncode AlignDefault = "l"
in do
caption' <- inlineListToMs' opts caption
let iwidths = if all (== 0) widths
then repeat ""
else map (printf "w(%0.1fn)" . (70 *)) widths
-- 78n default width - 8n indent = 70n
let coldescriptions = text $ intercalate " "
(zipWith (\align width -> aligncode align ++ width)
alignments iwidths) ++ "."
colheadings <- mapM (blockListToMs opts) headers
let makeRow cols = text "T{" $$
(vcat $ intersperse (text "T}@T{") cols) $$
text "T}"
let colheadings' = if all null headers
then empty
else makeRow colheadings $$ char '_'
body <- mapM (\row -> do
cols <- mapM (blockListToMs opts) row
return $ makeRow cols) rows
return $ text ".PP" $$ caption' $$
text ".TS" $$ text "tab(@);" $$ coldescriptions $$
colheadings' $$ vcat body $$ text ".TE"
blockToMs opts (BulletList items) = do
contents <- mapM (bulletListItemToMs opts) items
return (vcat contents)
blockToMs opts (OrderedList attribs items) = do
let markers = take (length items) $ orderedListMarkers attribs
let indent = 1 + (maximum $ map length markers)
contents <- mapM (\(num, item) -> orderedListItemToMs opts num indent item) $
zip markers items
return (vcat contents)
blockToMs opts (DefinitionList items) = do
contents <- mapM (definitionListItemToMs opts) items
return (vcat contents)
-- | Convert bullet list item (list of blocks) to man.
bulletListItemToMs :: PandocMonad m => WriterOptions -> [Block] -> MS m Doc
bulletListItemToMs _ [] = return empty
bulletListItemToMs opts ((Para first):rest) =
bulletListItemToMs opts ((Plain first):rest)
bulletListItemToMs opts ((Plain first):rest) = do
first' <- blockToMs opts (Plain first)
rest' <- blockListToMs opts rest
let first'' = text ".IP \\[bu] 2" $$ first'
let rest'' = if null rest
then empty
else text ".RS 2" $$ rest' $$ text ".RE"
return (first'' $$ rest'')
bulletListItemToMs opts (first:rest) = do
first' <- blockToMs opts first
rest' <- blockListToMs opts rest
return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE"
-- | Convert ordered list item (a list of blocks) to man.
orderedListItemToMs :: PandocMonad m
=> WriterOptions -- ^ options
-> String -- ^ order marker for list item
-> Int -- ^ number of spaces to indent
-> [Block] -- ^ list item (list of blocks)
-> MS m Doc
orderedListItemToMs _ _ _ [] = return empty
orderedListItemToMs opts num indent ((Para first):rest) =
orderedListItemToMs opts num indent ((Plain first):rest)
orderedListItemToMs opts num indent (first:rest) = do
first' <- blockToMs opts first
rest' <- blockListToMs opts rest
let num' = printf ("%" ++ show (indent - 1) ++ "s") num
let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first'
let rest'' = if null rest
then empty
else text ".RS 4" $$ rest' $$ text ".RE"
return $ first'' $$ rest''
-- | Convert definition list item (label, list of blocks) to man.
definitionListItemToMs :: PandocMonad m
=> WriterOptions
-> ([Inline],[[Block]])
-> MS m Doc
definitionListItemToMs opts (label, defs) = do
labelText <- inlineListToMs' opts label
contents <- if null defs
then return empty
else liftM vcat $ forM defs $ \blocks -> do
let (first, rest) = case blocks of
((Para x):y) -> (Plain x,y)
(x:y) -> (x,y)
[] -> error "blocks is null"
rest' <- liftM vcat $
mapM (\item -> blockToMs opts item) rest
first' <- blockToMs opts first
return $ first' $$ text ".RS" $$ rest' $$ text ".RE"
return $ text ".XP" $$ nowrap (text ".B \"" <> labelText <> text "\"")
$$ text "\\~\\~" <> contents
-- | Convert list of Pandoc block elements to man.
blockListToMs :: PandocMonad m
=> WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
-> MS m Doc
blockListToMs opts blocks =
mapM (blockToMs opts) blocks >>= (return . vcat)
-- | Convert list of Pandoc inline elements to ms.
inlineListToMs :: PandocMonad m => WriterOptions -> [Inline] -> MS m Doc
-- if list starts with ., insert a zero-width character \& so it
-- won't be interpreted as markup if it falls at the beginning of a line.
inlineListToMs opts lst@(Str ('.':_) : _) = mapM (inlineToMs opts) lst >>=
(return . (text "\\&" <>) . hcat)
inlineListToMs opts lst = hcat <$> mapM (inlineToMs opts) lst
-- This version to be used when there is no further inline content;
-- forces a note at the end.
inlineListToMs' :: PandocMonad m => WriterOptions -> [Inline] -> MS m Doc
inlineListToMs' opts lst = do
x <- hcat <$> mapM (inlineToMs opts) lst
y <- handleNotes opts empty
return $ x <> y
-- | Convert Pandoc inline element to man.
inlineToMs :: PandocMonad m => WriterOptions -> Inline -> MS m Doc
inlineToMs opts (Span _ ils) = inlineListToMs opts ils
inlineToMs opts (Emph lst) = do
contents <- inlineListToMs opts lst
return $ text "\\f[I]" <> contents <> text "\\f[]"
inlineToMs opts (Strong lst) = do
contents <- inlineListToMs opts lst
return $ text "\\f[B]" <> contents <> text "\\f[]"
inlineToMs opts (Strikeout lst) = do
contents <- inlineListToMs opts lst
return $ text "[STRIKEOUT:" <> contents <> char ']'
inlineToMs opts (Superscript lst) = do
contents <- inlineListToMs opts lst
return $ char '^' <> contents <> char '^'
inlineToMs opts (Subscript lst) = do
contents <- inlineListToMs opts lst
return $ char '~' <> contents <> char '~'
inlineToMs opts (SmallCaps lst) = inlineListToMs opts lst -- not supported
inlineToMs opts (Quoted SingleQuote lst) = do
contents <- inlineListToMs opts lst
return $ char '`' <> contents <> char '\''
inlineToMs opts (Quoted DoubleQuote lst) = do
contents <- inlineListToMs opts lst
return $ text "\\[lq]" <> contents <> text "\\[rq]"
inlineToMs opts (Cite _ lst) =
inlineListToMs opts lst
inlineToMs _ (Code _ str) =
return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]"
inlineToMs _ (Str str) = return $ text $ escapeString str
inlineToMs opts (Math InlineMath str) = do
modify $ \st -> st{ stHasInlineMath = True }
res <- convertMath writeEqn InlineMath str
case res of
Left il -> inlineToMs opts il
Right r -> return $ text "|" <> text (escapeBar r) <> text "|"
inlineToMs opts (Math DisplayMath str) = do
res <- convertMath writeEqn InlineMath str
case res of
Left il -> do
contents <- inlineToMs opts il
return $ cr <> text ".RS" $$ contents $$ text ".RE"
Right r -> return $
cr <> text ".EQ" $$ text (escapeBar r) $$ text ".EN"
inlineToMs _ (RawInline f str)
| f == Format "man" = return $ text str
| otherwise = return empty
inlineToMs _ (LineBreak) = return $
cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr
inlineToMs opts SoftBreak = handleNotes opts cr
inlineToMs opts Space = handleNotes opts space
inlineToMs opts (Link _ txt (src, _)) = do
let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
case txt of
[Str s]
| escapeURI s == srcSuffix ->
return $ char '<' <> text srcSuffix <> char '>'
_ -> do
let linknote = [Plain [Str src]]
inlineListToMs opts (txt ++ [Note linknote])
inlineToMs opts (Image attr alternate (source, tit)) = do
let txt = if (null alternate) || (alternate == [Str ""]) ||
(alternate == [Str source]) -- to prevent autolinks
then [Str "image"]
else alternate
linkPart <- inlineToMs opts (Link attr txt (source, tit))
return $ char '[' <> text "IMAGE: " <> linkPart <> char ']'
inlineToMs _ (Note contents) = do
modify $ \st -> st{ stNotes = contents : stNotes st }
return $ text "\\**"
handleNotes :: PandocMonad m => WriterOptions -> Doc -> MS m Doc
handleNotes opts fallback = do
notes <- gets stNotes
if null notes
then return fallback
else do
modify $ \st -> st{ stNotes = [] }
vcat <$> mapM (handleNote opts) notes
handleNote :: PandocMonad m => WriterOptions -> Note -> MS m Doc
handleNote opts bs = do
contents <- blockListToMs opts bs
return $ cr <> text ".FS" $$ contents $$ text ".FE" <> cr

View file

@ -143,6 +143,9 @@ tests = [ testGroup "markdown"
, testGroup "muse"
[ testGroup "writer" $ writerTests "muse"
]
, testGroup "ms"
[ testGroup "writer" $ writerTests "ms"
]
]
-- makes sure file is fully closed after reading

280
test/tables.ms Normal file
View file

@ -0,0 +1,280 @@
.LP
Simple table with caption:
.PP
Demonstration of simple table syntax.
.TS
tab(@);
r l c l.
T{
Right
T}@T{
Left
T}@T{
Center
T}@T{
Default
T}
_
T{
12
T}@T{
12
T}@T{
12
T}@T{
12
T}
T{
123
T}@T{
123
T}@T{
123
T}@T{
123
T}
T{
1
T}@T{
1
T}@T{
1
T}@T{
1
T}
.TE
.LP
Simple table without caption:
.PP
.TS
tab(@);
r l c l.
T{
Right
T}@T{
Left
T}@T{
Center
T}@T{
Default
T}
_
T{
12
T}@T{
12
T}@T{
12
T}@T{
12
T}
T{
123
T}@T{
123
T}@T{
123
T}@T{
123
T}
T{
1
T}@T{
1
T}@T{
1
T}@T{
1
T}
.TE
.LP
Simple table indented two spaces:
.PP
Demonstration of simple table syntax.
.TS
tab(@);
r l c l.
T{
Right
T}@T{
Left
T}@T{
Center
T}@T{
Default
T}
_
T{
12
T}@T{
12
T}@T{
12
T}@T{
12
T}
T{
123
T}@T{
123
T}@T{
123
T}@T{
123
T}
T{
1
T}@T{
1
T}@T{
1
T}@T{
1
T}
.TE
.LP
Multiline table with caption:
.PP
Here's the caption.
It may span multiple lines.
.TS
tab(@);
cw(10.5n) lw(9.6n) rw(11.4n) lw(23.6n).
T{
Centered
Header
T}@T{
Left
Aligned
T}@T{
Right
Aligned
T}@T{
Default aligned
T}
_
T{
First
T}@T{
row
T}@T{
12.0
T}@T{
Example of a row that spans
multiple lines.
T}
T{
Second
T}@T{
row
T}@T{
5.0
T}@T{
Here's another one.
Note
the blank line between rows.
T}
.TE
.LP
Multiline table without caption:
.PP
.TS
tab(@);
cw(10.5n) lw(9.6n) rw(11.4n) lw(23.6n).
T{
Centered
Header
T}@T{
Left
Aligned
T}@T{
Right
Aligned
T}@T{
Default aligned
T}
_
T{
First
T}@T{
row
T}@T{
12.0
T}@T{
Example of a row that spans
multiple lines.
T}
T{
Second
T}@T{
row
T}@T{
5.0
T}@T{
Here's another one.
Note
the blank line between rows.
T}
.TE
.LP
Table without column headers:
.PP
.TS
tab(@);
r l c r.
T{
12
T}@T{
12
T}@T{
12
T}@T{
12
T}
T{
123
T}@T{
123
T}@T{
123
T}@T{
123
T}
T{
1
T}@T{
1
T}@T{
1
T}@T{
1
T}
.TE
.LP
Multiline table without column headers:
.PP
.TS
tab(@);
cw(10.5n) lw(9.6n) rw(11.4n) lw(23.6n).
T{
First
T}@T{
row
T}@T{
12.0
T}@T{
Example of a row that spans
multiple lines.
T}
T{
Second
T}@T{
row
T}@T{
5.0
T}@T{
Here's another one.
Note
the blank line between rows.
T}
.TE

940
test/writer.ms Normal file
View file

@ -0,0 +1,940 @@
.hy
.EQ
delim ||
.EN
.TL
Pandoc Test Suite
.AU
John MacFarlane
.AU
Anonymous
.LP
This is a set of tests for pandoc.
Most of them are adapted from
John Gruber's markdown test suite.
.PP
* * * * *
.SH 1
Headers
.SH 2
Level 2 with an embedded link\**
.FS
/url
.FE
.SH 3
Level 3 with \f[I]emphasis\f[]
.SH 4
Level 4
.SH 5
Level 5
.SH 1
Level 1
.SH 2
Level 2 with \f[I]emphasis\f[]
.SH 3
Level 3
.LP
with no blank line
.SH 2
Level 2
.LP
with no blank line
.PP
* * * * *
.SH 1
Paragraphs
.LP
Here's a regular paragraph.
.LP
In Markdown 1.0.0 and earlier.
Version
8.
This line turns into a list item.
Because a hard\-wrapped line in the
middle of a paragraph looked like a
list item.
.LP
Here's one with a bullet.
* criminey.
.LP
There should be a hard line break
.PD 0
.P
.PD
here.
.PP
* * * * *
.SH 1
Block Quotes
.LP
E\-mail style:
.RS
.LP
This is a block quote.
It is pretty short.
.RE
.RS
.LP
Code in a block quote:
.IP
.nf
\f[C]
sub\ status\ {
\ \ \ \ print\ "working";
}
\f[]
.fi
.LP
A list:
.IP "1." 3
item one
.IP "2." 3
item two
.LP
Nested block quotes:
.RS
.LP
nested
.RE
.RS
.LP
nested
.RE
.RE
.LP
This should not be a block quote: 2
> 1.
.LP
And a following paragraph.
.PP
* * * * *
.SH 1
Code Blocks
.LP
Code:
.IP
.nf
\f[C]
\-\-\-\-\ (should\ be\ four\ hyphens)
sub\ status\ {
\ \ \ \ print\ "working";
}
this\ code\ block\ is\ indented\ by\ one\ tab
\f[]
.fi
.LP
And:
.IP
.nf
\f[C]
\ \ \ \ this\ code\ block\ is\ indented\ by\ two\ tabs
These\ should\ not\ be\ escaped:\ \ \\$\ \\\\\ \\>\ \\[\ \\{
\f[]
.fi
.PP
* * * * *
.SH 1
Lists
.SH 2
Unordered
.LP
Asterisks tight:
.IP \[bu] 2
asterisk 1
.IP \[bu] 2
asterisk 2
.IP \[bu] 2
asterisk 3
.LP
Asterisks loose:
.IP \[bu] 2
asterisk 1
.IP \[bu] 2
asterisk 2
.IP \[bu] 2
asterisk 3
.LP
Pluses tight:
.IP \[bu] 2
Plus 1
.IP \[bu] 2
Plus 2
.IP \[bu] 2
Plus 3
.LP
Pluses loose:
.IP \[bu] 2
Plus 1
.IP \[bu] 2
Plus 2
.IP \[bu] 2
Plus 3
.LP
Minuses tight:
.IP \[bu] 2
Minus 1
.IP \[bu] 2
Minus 2
.IP \[bu] 2
Minus 3
.LP
Minuses loose:
.IP \[bu] 2
Minus 1
.IP \[bu] 2
Minus 2
.IP \[bu] 2
Minus 3
.SH 2
Ordered
.LP
Tight:
.IP "1." 3
First
.IP "2." 3
Second
.IP "3." 3
Third
.LP
and:
.IP "1." 3
One
.IP "2." 3
Two
.IP "3." 3
Three
.LP
Loose using tabs:
.IP "1." 3
First
.IP "2." 3
Second
.IP "3." 3
Third
.LP
and using spaces:
.IP "1." 3
One
.IP "2." 3
Two
.IP "3." 3
Three
.LP
Multiple paragraphs:
.IP "1." 3
Item 1, graf one.
.RS 4
.LP
Item 1.
graf two.
The quick brown fox jumped over the lazy dog's
back.
.RE
.IP "2." 3
Item 2.
.IP "3." 3
Item 3.
.SH 2
Nested
.IP \[bu] 2
Tab
.RS 2
.IP \[bu] 2
Tab
.RS 2
.IP \[bu] 2
Tab
.RE
.RE
.LP
Here's another:
.IP "1." 3
First
.IP "2." 3
Second:
.RS 4
.IP \[bu] 2
Fee
.IP \[bu] 2
Fie
.IP \[bu] 2
Foe
.RE
.IP "3." 3
Third
.LP
Same thing but with paragraphs:
.IP "1." 3
First
.IP "2." 3
Second:
.RS 4
.IP \[bu] 2
Fee
.IP \[bu] 2
Fie
.IP \[bu] 2
Foe
.RE
.IP "3." 3
Third
.SH 2
Tabs and spaces
.IP \[bu] 2
this is a list item
indented with tabs
.IP \[bu] 2
this is a list item
indented with spaces
.RS 2
.IP \[bu] 2
this is an example list item
indented with tabs
.IP \[bu] 2
this is an example list item
indented with spaces
.RE
.SH 2
Fancy list markers
.IP "(2)" 4
begins with 2
.IP "(3)" 4
and now 3
.RS 4
.LP
with a continuation
.IP "iv." 4
sublist with roman numerals,
starting with 4
.IP " v." 4
more items
.RS 4
.IP "(A)" 4
a subsublist
.IP "(B)" 4
a subsublist
.RE
.RE
.LP
Nesting:
.IP "A." 3
Upper Alpha
.RS 4
.IP "I." 3
Upper Roman.
.RS 4
.IP "(6)" 4
Decimal start with 6
.RS 4
.IP "c)" 3
Lower alpha with paren
.RE
.RE
.RE
.LP
Autonumbering:
.IP "1." 3
Autonumber.
.IP "2." 3
More.
.RS 4
.IP "1." 3
Nested.
.RE
.LP
Should not be a list item:
.LP
M.A.\ 2007
.LP
B.
Williams
.PP
* * * * *
.SH 1
Definition Lists
.LP
Tight using spaces:
.XP
.B "apple"
\~\~red fruit
.RS
.RE
.XP
.B "orange"
\~\~orange fruit
.RS
.RE
.XP
.B "banana"
\~\~yellow fruit
.RS
.RE
.LP
Tight using tabs:
.XP
.B "apple"
\~\~red fruit
.RS
.RE
.XP
.B "orange"
\~\~orange fruit
.RS
.RE
.XP
.B "banana"
\~\~yellow fruit
.RS
.RE
.LP
Loose:
.XP
.B "apple"
\~\~red fruit
.RS
.RE
.XP
.B "orange"
\~\~orange fruit
.RS
.RE
.XP
.B "banana"
\~\~yellow fruit
.RS
.RE
.LP
Multiple blocks with italics:
.XP
.B "\f[I]apple\f[]"
\~\~red fruit
.RS
.LP
contains seeds,
crisp, pleasant to taste
.RE
.XP
.B "\f[I]orange\f[]"
\~\~orange fruit
.RS
.IP
.nf
\f[C]
{\ orange\ code\ block\ }
\f[]
.fi
.RS
.LP
orange block quote
.RE
.RE
.LP
Multiple definitions, tight:
.XP
.B "apple"
\~\~red fruit
.RS
.RE
computer
.RS
.RE
.XP
.B "orange"
\~\~orange fruit
.RS
.RE
bank
.RS
.RE
.LP
Multiple definitions, loose:
.XP
.B "apple"
\~\~red fruit
.RS
.RE
computer
.RS
.RE
.XP
.B "orange"
\~\~orange fruit
.RS
.RE
bank
.RS
.RE
.LP
Blank line after term, indented marker, alternate markers:
.XP
.B "apple"
\~\~red fruit
.RS
.RE
computer
.RS
.RE
.XP
.B "orange"
\~\~orange fruit
.RS
.IP "1." 3
sublist
.IP "2." 3
sublist
.RE
.SH 1
HTML Blocks
.LP
Simple block on one line:
foo
.LP
And nested without indentation:
.LP
foo
bar
.LP
Interpreted markdown in a table:
This is \f[I]emphasized\f[]
And this is \f[B]strong\f[]
.LP
Here's a simple block:
.LP
foo
.LP
This should be a code block, though:
.IP
.nf
\f[C]
<div>
\ \ \ \ foo
</div>
\f[]
.fi
.LP
As should this:
.IP
.nf
\f[C]
<div>foo</div>
\f[]
.fi
.LP
Now, nested:
foo
.LP
This should just be an HTML comment:
.LP
Multiline:
.LP
Code block:
.IP
.nf
\f[C]
<!\-\-\ Comment\ \-\->
\f[]
.fi
.LP
Just plain comment, with trailing spaces on the line:
.LP
Code:
.IP
.nf
\f[C]
<hr\ />
\f[]
.fi
.LP
Hr's:
.PP
* * * * *
.SH 1
Inline Markup
.LP
This is \f[I]emphasized\f[], and so \f[I]is this\f[].
.LP
This is \f[B]strong\f[], and so \f[B]is this\f[].
.LP
An \f[I]emphasized link\**\f[].
.FS
/url
.FE
.LP
\f[B]\f[I]This is strong and em.\f[]\f[]
.LP
So is \f[B]\f[I]this\f[]\f[] word.
.LP
\f[B]\f[I]This is strong and em.\f[]\f[]
.LP
So is \f[B]\f[I]this\f[]\f[] word.
.LP
This is code: \f[C]>\f[], \f[C]$\f[], \f[C]\\\f[], \f[C]\\$\f[],
\f[C]<html>\f[].
.LP
[STRIKEOUT:This is \f[I]strikeout\f[].]
.LP
Superscripts: a^bc^d a^\f[I]hello\f[]^ a^hello\ there^.
.LP
Subscripts: H~2~O, H~23~O, H~many\ of\ them~O.
.LP
These should not be superscripts or subscripts,
because of the unescaped spaces: a^b c^d, a~b c~d.
.PP
* * * * *
.SH 1
Smart quotes, ellipses, dashes
.LP
\[lq]Hello,\[rq] said the spider.
\[lq]`Shelob' is my name.\[rq]
.LP
`A', `B', and `C' are letters.
.LP
`Oak,' `elm,' and `beech' are names of trees.
So is `pine.'
.LP
`He said, \[lq]I want to go.\[rq]' Were you alive in the
70's?
.LP
Here is some quoted `\f[C]code\f[]' and a \[lq]quoted link\**\[rq].
.FS
http://example.com/?foo=1&bar=2
.FE
.LP
Some dashes: one\[em]two \[em] three\[em]four \[em] five.
.LP
Dashes between numbers: 5\[en]7, 255\[en]66, 1987\[en]1999.
.LP
Ellipses\&...and\&...and\&....
.PP
* * * * *
.SH 1
LaTeX
.IP \[bu] 2
.IP \[bu] 2
|2 + 2 = 4|
.IP \[bu] 2
|x \[u2208] y|
.IP \[bu] 2
|alpha \[u2227] omega|
.IP \[bu] 2
|223|
.IP \[bu] 2
|p|\-Tree
.IP \[bu] 2
Here's some display math:
.EQ
d over {d x} f ( x ) = lim sub {h -> 0} {f ( x + h ) \[u2212] f ( x )} over h
.EN
.IP \[bu] 2
Here's one that has a line break in it: |alpha + omega times x sup 2|.
.LP
These shouldn't be math:
.IP \[bu] 2
To get the famous equation, write \f[C]$e\ =\ mc^2$\f[].
.IP \[bu] 2
$22,000 is a \f[I]lot\f[] of money.
So is $34,000.
(It worked if \[lq]lot\[rq] is emphasized.)
.IP \[bu] 2
Shoes ($20) and socks ($5).
.IP \[bu] 2
Escaped \f[C]$\f[]: $73 \f[I]this should be emphasized\f[] 23$.
.LP
Here's a LaTeX table:
.PP
* * * * *
.SH 1
Special Characters
.LP
Here is some unicode:
.IP \[bu] 2
I hat: Î
.IP \[bu] 2
o umlaut: ö
.IP \[bu] 2
section: §
.IP \[bu] 2
set membership: ∈
.IP \[bu] 2
copyright: ©
.LP
AT&T has an ampersand in their name.
.LP
AT&T is another way to write it.
.LP
This & that.
.LP
4 < 5.
.LP
6 > 5.
.LP
Backslash: \\
.LP
Backtick: `
.LP
Asterisk: *
.LP
Underscore: _
.LP
Left brace: {
.LP
Right brace: }
.LP
Left bracket: [
.LP
Right bracket: ]
.LP
Left paren: (
.LP
Right paren: )
.LP
Greater\-than: >
.LP
Hash: #
.LP
Period: .
.LP
Bang: !
.LP
Plus: +
.LP
Minus: \-
.PP
* * * * *
.SH 1
Links
.SH 2
Explicit
.LP
Just a URL\**.
.FS
/url/
.FE
.LP
URL and title\**.
.FS
/url/
.FE
.LP
URL and title\**.
.FS
/url/
.FE
.LP
URL and title\**.
.FS
/url/
.FE
.LP
URL and title\**
.FS
/url/
.FE
.LP
URL and title\**
.FS
/url/
.FE
.LP
with_underscore\**
.FS
/url/with_underscore
.FE
.LP
Email link\**
.FS
mailto:nobody\@nowhere.net
.FE
.LP
Empty\**.
.FS
.FE
.SH 2
Reference
.LP
Foo bar\**.
.FS
/url/
.FE
.LP
Foo bar\**.
.FS
/url/
.FE
.LP
Foo bar\**.
.FS
/url/
.FE
.LP
With embedded [brackets]\**.
.FS
/url/
.FE
.LP
b\**
.FS
/url/
.FE
by itself should be a link.
.LP
Indented once\**.
.FS
/url
.FE
.LP
Indented twice\**.
.FS
/url
.FE
.LP
Indented thrice\**.
.FS
/url
.FE
.LP
This should [not][] be a link.
.IP
.nf
\f[C]
[not]:\ /url
\f[]
.fi
.LP
Foo bar\**.
.FS
/url/
.FE
.LP
Foo biz\**.
.FS
/url/
.FE
.SH 2
With ampersands
.LP
Here's a link with an ampersand in the URL\**.
.FS
http://example.com/?foo=1&bar=2
.FE
.LP
Here's a link with an amersand in the link text: AT&T\**.
.FS
http://att.com/
.FE
.LP
Here's an inline link\**.
.FS
/script?foo=1&bar=2
.FE
.LP
Here's an inline link in pointy braces\**.
.FS
/script?foo=1&bar=2
.FE
.SH 2
Autolinks
.LP
With an ampersand: <http://example.com/?foo=1&bar=2>
.IP \[bu] 2
In a list?
.IP \[bu] 2
<http://example.com/>
.IP \[bu] 2
It should.
.LP
An e\-mail address: <nobody@nowhere.net>
.RS
.LP
Blockquoted: <http://example.com/>
.RE
.LP
Auto\-links should not occur here: \f[C]<http://example.com/>\f[]
.IP
.nf
\f[C]
or\ here:\ <http://example.com/>
\f[]
.fi
.PP
* * * * *
.SH 1
Images
.LP
From \[lq]Voyage dans la Lune\[rq] by Georges Melies (1902):
.LP
[IMAGE: lalune\**]
.FS
lalune.jpg
.FE
.LP
Here is a movie [IMAGE: movie\**]
.FS
movie.jpg
.FE
icon.
.PP
* * * * *
.SH 1
Footnotes
.LP
Here is a footnote reference,\**
.FS
.LP
Here is the footnote.
It can go anywhere after the footnote
reference.
It need not be placed at the end of the document.
.FE
and another.\**
.FS
.LP
Here's the long note.
This one contains multiple
blocks.
.LP
Subsequent blocks are indented to show that they belong to the
footnote (as with list items).
.IP
.nf
\f[C]
\ \ {\ <code>\ }
\f[]
.fi
.LP
If you want, you can indent every line, but you can also be
lazy and just indent the first line of each block.
.FE
This should \f[I]not\f[] be a footnote reference, because it
contains a space.[^my note] Here is an inline note.\**
.FS
.LP
This
is \f[I]easier\f[] to type.
Inline notes may contain
links\**
.FS
http://google.com
.FE
and \f[C]]\f[] verbatim characters,
as well as [bracketed text].
.FE
.RS
.LP
Notes can go in quotes.\**
.FS
.LP
In quote.
.FE
.RE
.IP "1." 3
And in list items.\**
.FS
.LP
In list.
.FE
.LP
This paragraph should not be part of the note, as it is not indented.