Finished first draft of Haddock writer.

This commit is contained in:
John MacFarlane 2014-06-18 15:32:13 -07:00
parent 9fc5c8d7af
commit 35e57db5c2
5 changed files with 381 additions and 11 deletions

12
README
View file

@ -18,10 +18,10 @@ Org-mode], [DocBook], and [Word docx]; and it can write plain text,
[markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX] (including [markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX] (including
[beamer] slide shows), [ConTeXt], [RTF], [OPML], [DocBook], [beamer] slide shows), [ConTeXt], [RTF], [OPML], [DocBook],
[OpenDocument], [ODT], [Word docx], [GNU Texinfo], [MediaWiki markup], [OpenDocument], [ODT], [Word docx], [GNU Texinfo], [MediaWiki markup],
[EPUB] (v2 or v3), [FictionBook2], [Textile], [groff man] pages, [Haddock markup], [EPUB] (v2 or v3), [FictionBook2], [Textile],
[Emacs Org-Mode], [AsciiDoc], [InDesign ICML], and [Slidy], [groff man] pages, [Emacs Org-Mode], [AsciiDoc], [InDesign ICML],
[Slideous], [DZSlides], [reveal.js] or [S5] HTML slide shows. It can and [Slidy], [Slideous], [DZSlides], [reveal.js] or [S5] HTML slide shows.
also produce [PDF] output on systems where LaTeX is installed. It can also produce [PDF] output on systems where LaTeX is installed.
Pandoc's enhanced version of markdown includes syntax for footnotes, Pandoc's enhanced version of markdown includes syntax for footnotes,
tables, flexible ordered lists, definition lists, fenced code blocks, tables, flexible ordered lists, definition lists, fenced code blocks,
@ -169,8 +169,8 @@ General options
`context` (ConTeXt), `man` (groff man), `mediawiki` (MediaWiki markup), `context` (ConTeXt), `man` (groff man), `mediawiki` (MediaWiki markup),
`textile` (Textile), `org` (Emacs Org-Mode), `texinfo` (GNU Texinfo), `textile` (Textile), `org` (Emacs Org-Mode), `texinfo` (GNU Texinfo),
`opml` (OPML), `docbook` (DocBook), `opendocument` (OpenDocument), `odt` `opml` (OPML), `docbook` (DocBook), `opendocument` (OpenDocument), `odt`
(OpenOffice text document), `docx` (Word docx), (OpenOffice text document), `docx` (Word docx), `haddock` (Haddock
`rtf` (rich text format), `epub` (EPUB v2 book), `epub3` markup), `rtf` (rich text format), `epub` (EPUB v2 book), `epub3`
(EPUB v3), `fb2` (FictionBook2 e-book), `asciidoc` (AsciiDoc), (EPUB v3), `fb2` (FictionBook2 e-book), `asciidoc` (AsciiDoc),
`icml` (InDesign ICML), `slidy` (Slidy HTML and javascript slide show), `icml` (InDesign ICML), `slidy` (Slidy HTML and javascript slide show),
`slideous` (Slideous HTML and javascript slide show), `dzslides` `slideous` (Slideous HTML and javascript slide show), `dzslides`

View file

@ -21,9 +21,9 @@ Description: Pandoc is a Haskell library for converting from one markup
markdown, reStructuredText, HTML, LaTeX, ConTeXt, Docbook, OPML, markdown, reStructuredText, HTML, LaTeX, ConTeXt, Docbook, OPML,
OpenDocument, ODT, Word docx, RTF, MediaWiki, Textile, OpenDocument, ODT, Word docx, RTF, MediaWiki, Textile,
groff man pages, plain text, Emacs Org-Mode, AsciiDoc, groff man pages, plain text, Emacs Org-Mode, AsciiDoc,
EPUB (v2 and v3), FictionBook2, InDesign ICML, and several kinds Haddock markup, EPUB (v2 and v3), FictionBook2,
of HTML/javascript slide shows (S5, Slidy, Slideous, DZSlides, InDesign ICML, and several kinds of HTML/javascript
reveal.js). slide shows (S5, Slidy, Slideous, DZSlides, reveal.js).
. .
Pandoc extends standard markdown syntax with footnotes, Pandoc extends standard markdown syntax with footnotes,
embedded LaTeX, definition lists, tables, and other embedded LaTeX, definition lists, tables, and other
@ -305,6 +305,7 @@ Library
Text.Pandoc.Writers.Texinfo, Text.Pandoc.Writers.Texinfo,
Text.Pandoc.Writers.Man, Text.Pandoc.Writers.Man,
Text.Pandoc.Writers.Markdown, Text.Pandoc.Writers.Markdown,
Text.Pandoc.Writers.Haddock,
Text.Pandoc.Writers.RST, Text.Pandoc.Writers.RST,
Text.Pandoc.Writers.Org, Text.Pandoc.Writers.Org,
Text.Pandoc.Writers.AsciiDoc, Text.Pandoc.Writers.AsciiDoc,

View file

@ -102,6 +102,7 @@ module Text.Pandoc
, writeFB2 , writeFB2
, writeOrg , writeOrg
, writeAsciiDoc , writeAsciiDoc
, writeHaddock
, writeCustom , writeCustom
-- * Rendering templates and default templates -- * Rendering templates and default templates
, module Text.Pandoc.Templates , module Text.Pandoc.Templates
@ -149,6 +150,7 @@ import Text.Pandoc.Writers.MediaWiki
import Text.Pandoc.Writers.Textile import Text.Pandoc.Writers.Textile
import Text.Pandoc.Writers.Org import Text.Pandoc.Writers.Org
import Text.Pandoc.Writers.AsciiDoc import Text.Pandoc.Writers.AsciiDoc
import Text.Pandoc.Writers.Haddock
import Text.Pandoc.Writers.Custom import Text.Pandoc.Writers.Custom
import Text.Pandoc.Templates import Text.Pandoc.Templates
import Text.Pandoc.Options import Text.Pandoc.Options
@ -279,6 +281,7 @@ writers = [
,("rtf" , IOStringWriter writeRTFWithEmbeddedImages) ,("rtf" , IOStringWriter writeRTFWithEmbeddedImages)
,("org" , PureStringWriter writeOrg) ,("org" , PureStringWriter writeOrg)
,("asciidoc" , PureStringWriter writeAsciiDoc) ,("asciidoc" , PureStringWriter writeAsciiDoc)
,("haddock" , PureStringWriter writeHaddock)
] ]
getDefaultExtensions :: String -> Set Extension getDefaultExtensions :: String -> Set Extension

View file

@ -23,19 +23,28 @@ import Text.Pandoc.Definition
import Text.Pandoc.Options import Text.Pandoc.Options
import Documentation.Haddock.Parser import Documentation.Haddock.Parser
import Documentation.Haddock.Types import Documentation.Haddock.Types
import Debug.Trace (trace)
-- | Parse Haddock markup and return a 'Pandoc' document. -- | Parse Haddock markup and return a 'Pandoc' document.
readHaddock :: ReaderOptions -- ^ Reader options readHaddock :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse -> String -- ^ String to parse
-> Pandoc -> Pandoc
readHaddock _ = B.doc . docHToBlocks . parseParas readHaddock opts = B.doc . docHToBlocks . trace' . parseParas
where trace' x = if readerTrace opts
then trace (show x) x
else x
docHToBlocks :: DocH String Identifier -> Blocks docHToBlocks :: DocH String Identifier -> Blocks
docHToBlocks d' = docHToBlocks d' =
case d' of case d' of
DocEmpty -> mempty DocEmpty -> mempty
DocAppend (DocParagraph (DocHeader h)) (DocParagraph (DocAName ident)) ->
B.headerWith (ident,[],[]) (headerLevel h)
(docHToInlines False $ headerTitle h)
DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
DocString _ -> inlineFallback DocString _ -> inlineFallback
DocParagraph (DocHeader h) -> docHToBlocks (DocHeader h)
DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h
DocParagraph ils -> B.para $ docHToInlines False ils DocParagraph ils -> B.para $ docHToInlines False ils
DocIdentifier _ -> inlineFallback DocIdentifier _ -> inlineFallback
DocIdentifierUnchecked _ -> inlineFallback DocIdentifierUnchecked _ -> inlineFallback
@ -64,7 +73,7 @@ docHToBlocks d' =
consolidatePlains = B.fromList . consolidatePlains' . B.toList consolidatePlains = B.fromList . consolidatePlains' . B.toList
consolidatePlains' zs@(Plain _ : _) = consolidatePlains' zs@(Plain _ : _) =
let (xs, ys) = span isPlain zs in let (xs, ys) = span isPlain zs in
Plain (concatMap extractContents xs) : consolidatePlains' ys Para (concatMap extractContents xs) : consolidatePlains' ys
consolidatePlains' (x : xs) = x : consolidatePlains' xs consolidatePlains' (x : xs) = x : consolidatePlains' xs
consolidatePlains' [] = [] consolidatePlains' [] = []
isPlain (Plain _) = True isPlain (Plain _) = True

View file

@ -0,0 +1,357 @@
{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-}
{-
Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Writers.Haddock
Copyright : Copyright (C) 2014 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of 'Pandoc' documents to haddock markup.
Haddock: <http://www.haskell.org/haddock/doc/html/>
-}
module Text.Pandoc.Writers.Haddock (writeHaddock) where
import Text.Pandoc.Definition
import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Shared
import Text.Pandoc.Writers.Shared
import Text.Pandoc.Options
import Data.List ( intersperse, transpose )
import Text.Pandoc.Pretty
import Control.Monad.State
import Text.Pandoc.Readers.TeXMath (readTeXMath')
import Network.URI (isURI)
import Data.Default
type Notes = [[Block]]
data WriterState = WriterState { stNotes :: Notes }
instance Default WriterState
where def = WriterState{ stNotes = [] }
-- | Convert Pandoc to Haddock.
writeHaddock :: WriterOptions -> Pandoc -> String
writeHaddock opts document =
evalState (pandocToHaddock opts{
writerWrapText = writerWrapText opts } document) def
-- | Return haddock representation of document.
pandocToHaddock :: WriterOptions -> Pandoc -> State WriterState String
pandocToHaddock opts (Pandoc meta blocks) = do
let colwidth = if writerWrapText opts
then Just $ writerColumns opts
else Nothing
body <- blockListToHaddock opts blocks
st <- get
notes' <- notesToHaddock opts (reverse $ stNotes st)
let render' :: Doc -> String
render' = render colwidth
let main = render' $ body <>
(if isEmpty notes' then empty else blankline <> notes')
metadata <- metaToJSON opts
(fmap (render colwidth) . blockListToHaddock opts)
(fmap (render colwidth) . inlineListToHaddock opts)
meta
let context = defField "body" main
$ metadata
if writerStandalone opts
then return $ renderTemplate' (writerTemplate opts) context
else return main
-- | Return haddock representation of notes.
notesToHaddock :: WriterOptions -> [[Block]] -> State WriterState Doc
notesToHaddock opts notes =
mapM (\(num, note) -> noteToHaddock opts num note) (zip [1..] notes) >>=
return . vsep
-- | Return haddock representation of a note.
noteToHaddock :: WriterOptions -> Int -> [Block] -> State WriterState Doc
noteToHaddock opts num blocks = do
contents <- blockListToHaddock opts blocks
let num' = text $ writerIdentifierPrefix opts ++ show num
let marker = text "[" <> num' <> text "]"
let markerSize = 4 + offset num'
let spacer = case writerTabStop opts - markerSize of
n | n > 0 -> text $ replicate n ' '
_ -> text " "
return $ if isEnabled Ext_footnotes opts
then hang (writerTabStop opts) (marker <> spacer) contents
else marker <> spacer <> contents
-- | Escape special characters for Haddock.
escapeString :: String -> String
escapeString = escapeStringUsing haddockEscapes
where haddockEscapes = backslashEscapes "\\/'`\"@<"
-- | Convert Pandoc block element to haddock.
blockToHaddock :: WriterOptions -- ^ Options
-> Block -- ^ Block element
-> State WriterState Doc
blockToHaddock _ Null = return empty
blockToHaddock opts (Div _ ils) = do
contents <- blockListToHaddock opts ils
return $ contents <> blankline
blockToHaddock opts (Plain inlines) = do
contents <- inlineListToHaddock opts inlines
return $ contents <> cr
-- title beginning with fig: indicates figure
blockToHaddock opts (Para [Image alt (src,'f':'i':'g':':':tit)]) =
blockToHaddock opts (Para [Image alt (src,tit)])
blockToHaddock opts (Para inlines) =
-- TODO: if it contains linebreaks, we need to use a @...@ block
(<> blankline) `fmap` blockToHaddock opts (Plain inlines)
blockToHaddock _ (RawBlock f str)
| f == "haddock" = do
return $ text str <> text "\n"
| otherwise = return empty
blockToHaddock _ HorizontalRule =
return $ blankline <> text "--------------" <> blankline
blockToHaddock opts (Header level (ident,_,_) inlines) = do
contents <- inlineListToHaddock opts inlines
let attr' = if null ident
then empty
else cr <> text "#" <> text ident <> text "#"
return $ nowrap (text (replicate level '=') <> space <> contents)
<> attr' <> blankline
blockToHaddock _ (CodeBlock (_,_,_) str) =
return $ prefixed "> " (text str) <> blankline
-- Nothing in haddock corresponds to block quotes:
blockToHaddock opts (BlockQuote blocks) =
blockListToHaddock opts blocks
-- Haddock doesn't have tables. Use haddock tables in code.
blockToHaddock opts (Table caption aligns widths headers rows) = do
caption' <- inlineListToHaddock opts caption
let caption'' = if null caption || not (isEnabled Ext_table_captions opts)
then empty
else blankline <> caption' <> blankline
rawHeaders <- mapM (blockListToHaddock opts) headers
rawRows <- mapM (mapM (blockListToHaddock opts)) rows
let isSimple = all (==0) widths
let isPlainBlock (Plain _) = True
isPlainBlock _ = False
let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows)
(nst,tbl) <- case True of
_ | isSimple -> fmap (nest 2,) $
pandocTable opts (all null headers) aligns widths
rawHeaders rawRows
| not hasBlocks -> fmap (nest 2,) $
pandocTable opts (all null headers) aligns widths
rawHeaders rawRows
| otherwise -> fmap (id,) $
gridTable opts (all null headers) aligns widths
rawHeaders rawRows
return $ prefixed "> " $ nst $ tbl $$ blankline $$ caption'' $$ blankline
blockToHaddock opts (BulletList items) = do
contents <- mapM (bulletListItemToHaddock opts) items
return $ cat contents <> blankline
blockToHaddock opts (OrderedList (start,sty,delim) items) = do
let attribs = (start, sty, delim)
let markers = orderedListMarkers attribs
let markers' = map (\m -> if length m < 3
then m ++ replicate (3 - length m) ' '
else m) markers
contents <- mapM (\(item, num) -> orderedListItemToHaddock opts item num) $
zip markers' items
return $ cat contents <> blankline
blockToHaddock opts (DefinitionList items) = do
contents <- mapM (definitionListItemToHaddock opts) items
return $ cat contents <> blankline
pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double]
-> [Doc] -> [[Doc]] -> State WriterState Doc
pandocTable opts headless aligns widths rawHeaders rawRows = do
let isSimple = all (==0) widths
let alignHeader alignment = case alignment of
AlignLeft -> lblock
AlignCenter -> cblock
AlignRight -> rblock
AlignDefault -> lblock
let numChars = maximum . map offset
let widthsInChars = if isSimple
then map ((+2) . numChars)
$ transpose (rawHeaders : rawRows)
else map
(floor . (fromIntegral (writerColumns opts) *))
widths
let makeRow = hcat . intersperse (lblock 1 (text " ")) .
(zipWith3 alignHeader aligns widthsInChars)
let rows' = map makeRow rawRows
let head' = makeRow rawHeaders
let maxRowHeight = maximum $ map height (head':rows')
let underline = cat $ intersperse (text " ") $
map (\width -> text (replicate width '-')) widthsInChars
let border = if maxRowHeight > 1
then text (replicate (sum widthsInChars +
length widthsInChars - 1) '-')
else if headless
then underline
else empty
let head'' = if headless
then empty
else border <> cr <> head'
let body = if maxRowHeight > 1
then vsep rows'
else vcat rows'
let bottom = if headless
then underline
else border
return $ head'' $$ underline $$ body $$ bottom
gridTable :: WriterOptions -> Bool -> [Alignment] -> [Double]
-> [Doc] -> [[Doc]] -> State WriterState Doc
gridTable opts headless _aligns widths headers' rawRows = do
let numcols = length headers'
let widths' = if all (==0) widths
then replicate numcols (1.0 / fromIntegral numcols)
else widths
let widthsInChars = map (floor . (fromIntegral (writerColumns opts) *)) widths'
let hpipeBlocks blocks = hcat [beg, middle, end]
where h = maximum (map height blocks)
sep' = lblock 3 $ vcat (map text $ replicate h " | ")
beg = lblock 2 $ vcat (map text $ replicate h "| ")
end = lblock 2 $ vcat (map text $ replicate h " |")
middle = chomp $ hcat $ intersperse sep' blocks
let makeRow = hpipeBlocks . zipWith lblock widthsInChars
let head' = makeRow headers'
let rows' = map (makeRow . map chomp) rawRows
let border ch = char '+' <> char ch <>
(hcat $ intersperse (char ch <> char '+' <> char ch) $
map (\l -> text $ replicate l ch) widthsInChars) <>
char ch <> char '+'
let body = vcat $ intersperse (border '-') rows'
let head'' = if headless
then empty
else head' $$ border '='
return $ border '-' $$ head'' $$ body $$ border '-'
-- | Convert bullet list item (list of blocks) to haddock
bulletListItemToHaddock :: WriterOptions -> [Block] -> State WriterState Doc
bulletListItemToHaddock opts items = do
contents <- blockListToHaddock opts items
let sps = replicate (writerTabStop opts - 2) ' '
let start = text ('-' : ' ' : sps)
-- remove trailing blank line if it is a tight list
let contents' = case reverse items of
(BulletList xs:_) | isTightList xs ->
chomp contents <> cr
(OrderedList _ xs:_) | isTightList xs ->
chomp contents <> cr
_ -> contents
return $ hang (writerTabStop opts) start $ contents' <> cr
-- | Convert ordered list item (a list of blocks) to haddock
orderedListItemToHaddock :: WriterOptions -- ^ options
-> String -- ^ list item marker
-> [Block] -- ^ list item (list of blocks)
-> State WriterState Doc
orderedListItemToHaddock opts marker items = do
contents <- blockListToHaddock opts items
let sps = case length marker - writerTabStop opts of
n | n > 0 -> text $ replicate n ' '
_ -> text " "
let start = text marker <> sps
return $ hang (writerTabStop opts) start $ contents <> cr
-- | Convert definition list item (label, list of blocks) to haddock
definitionListItemToHaddock :: WriterOptions
-> ([Inline],[[Block]])
-> State WriterState Doc
definitionListItemToHaddock opts (label, defs) = do
labelText <- inlineListToHaddock opts label
defs' <- mapM (mapM (blockToHaddock opts)) defs
let contents = vcat $ map (\d -> hang 4 empty $ vcat d <> cr) defs'
return $ nowrap (brackets labelText) <> cr <> contents <> cr
-- | Convert list of Pandoc block elements to haddock
blockListToHaddock :: WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
-> State WriterState Doc
blockListToHaddock opts blocks =
mapM (blockToHaddock opts) blocks >>= return . cat
-- | Convert list of Pandoc inline elements to haddock.
inlineListToHaddock :: WriterOptions -> [Inline] -> State WriterState Doc
inlineListToHaddock opts lst =
mapM (inlineToHaddock opts) lst >>= return . cat
-- | Convert Pandoc inline element to haddock.
inlineToHaddock :: WriterOptions -> Inline -> State WriterState Doc
inlineToHaddock opts (Span (ident,_,_) ils) = do
contents <- inlineListToHaddock opts ils
if not (null ident) && null ils
then return $ "#" <> text ident <> "#"
else return contents
inlineToHaddock opts (Emph lst) = do
contents <- inlineListToHaddock opts lst
return $ "/" <> contents <> "/"
inlineToHaddock opts (Strong lst) = do
contents <- inlineListToHaddock opts lst
return $ "__" <> contents <> "__"
inlineToHaddock opts (Strikeout lst) = do
contents <- inlineListToHaddock opts lst
-- not supported in haddock, but we fake it:
return $ "~~" <> contents <> "~~"
-- not supported in haddock:
inlineToHaddock opts (Superscript lst) = inlineListToHaddock opts lst
-- not supported in haddock:
inlineToHaddock opts (Subscript lst) = inlineListToHaddock opts lst
-- not supported in haddock:
inlineToHaddock opts (SmallCaps lst) = inlineListToHaddock opts lst
inlineToHaddock opts (Quoted SingleQuote lst) = do
contents <- inlineListToHaddock opts lst
return $ "" <> contents <> ""
inlineToHaddock opts (Quoted DoubleQuote lst) = do
contents <- inlineListToHaddock opts lst
return $ "" <> contents <> ""
inlineToHaddock _ (Code _ str) =
return $ "@" <> text (escapeString str) <> "@"
inlineToHaddock _ (Str str) = do
return $ text $ escapeString str
inlineToHaddock opts (Math mt str) = do
let adjust x = case mt of
DisplayMath -> cr <> x <> cr
InlineMath -> x
adjust `fmap` (inlineListToHaddock opts $ readTeXMath' mt str)
inlineToHaddock _ (RawInline f str)
| f == "haddock" = return $ text str
| otherwise = return empty
-- no line break in haddock (see above on CodeBlock)
inlineToHaddock _ (LineBreak) = return cr
inlineToHaddock _ Space = return space
inlineToHaddock opts (Cite _ lst) = inlineListToHaddock opts lst
inlineToHaddock opts (Link txt (src, _)) = do
linktext <- inlineListToHaddock opts txt
let useAuto = isURI src &&
case txt of
[Str s] | escapeURI s == src -> True
_ -> False
return $ nowrap $ "<" <> text src <>
(if useAuto then empty else space <> linktext) <> ">"
inlineToHaddock opts (Image alternate (source, tit)) = do
linkhaddock <- inlineToHaddock opts (Link alternate (source, tit))
return $ "<" <> linkhaddock <> ">"
-- haddock doesn't have notes, but we can fake it:
inlineToHaddock opts (Note contents) = do
modify (\st -> st{ stNotes = contents : stNotes st })
st <- get
let ref = text $ writerIdentifierPrefix opts ++ show (length $ stNotes st)
return $ "[" <> ref <> "]"