447 lines
19 KiB
Haskell
447 lines
19 KiB
Haskell
{-# LANGUAGE OverloadedStrings #-}
|
||
{-
|
||
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
|
||
|
||
This program is free software; you can redistribute it and/or modify
|
||
it under the terms of the GNU General Public License as published by
|
||
the Free Software Foundation; either version 2 of the License, or
|
||
(at your option) any later version.
|
||
|
||
This program is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with this program; if not, write to the Free Software
|
||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
-}
|
||
|
||
{- |
|
||
Module : Text.Pandoc.Writers.RST
|
||
Copyright : Copyright (C) 2006-2014 John MacFarlane
|
||
License : GNU GPL, version 2 or above
|
||
|
||
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||
Stability : alpha
|
||
Portability : portable
|
||
|
||
Conversion of 'Pandoc' documents to reStructuredText.
|
||
|
||
reStructuredText: <http://docutils.sourceforge.net/rst.html>
|
||
-}
|
||
module Text.Pandoc.Writers.RST ( writeRST ) where
|
||
import Text.Pandoc.Definition
|
||
import Text.Pandoc.Options
|
||
import Text.Pandoc.Shared
|
||
import Text.Pandoc.Writers.Shared
|
||
import Text.Pandoc.Templates (renderTemplate')
|
||
import Text.Pandoc.Builder (deleteMeta)
|
||
import Data.Maybe (fromMaybe)
|
||
import Data.List ( isPrefixOf, stripPrefix, intersperse, transpose )
|
||
import Network.URI (isURI)
|
||
import Text.Pandoc.Pretty
|
||
import Control.Monad.State
|
||
import Control.Applicative ( (<$>) )
|
||
import Data.Char (isSpace, toLower)
|
||
|
||
type Refs = [([Inline], Target)]
|
||
|
||
data WriterState =
|
||
WriterState { stNotes :: [[Block]]
|
||
, stLinks :: Refs
|
||
, stImages :: [([Inline], (String, String, Maybe String))]
|
||
, stHasMath :: Bool
|
||
, stOptions :: WriterOptions
|
||
}
|
||
|
||
-- | Convert Pandoc to RST.
|
||
writeRST :: WriterOptions -> Pandoc -> String
|
||
writeRST opts document =
|
||
let st = WriterState { stNotes = [], stLinks = [],
|
||
stImages = [], stHasMath = False,
|
||
stOptions = opts }
|
||
in evalState (pandocToRST document) st
|
||
|
||
-- | Return RST representation of document.
|
||
pandocToRST :: Pandoc -> State WriterState String
|
||
pandocToRST (Pandoc meta blocks) = do
|
||
opts <- liftM stOptions get
|
||
let colwidth = if writerWrapText opts
|
||
then Just $ writerColumns opts
|
||
else Nothing
|
||
let subtit = case lookupMeta "subtitle" meta of
|
||
Just (MetaBlocks [Plain xs]) -> xs
|
||
_ -> []
|
||
title <- titleToRST (docTitle meta) subtit
|
||
metadata <- metaToJSON opts
|
||
(fmap (render colwidth) . blockListToRST)
|
||
(fmap (trimr . render colwidth) . inlineListToRST)
|
||
$ deleteMeta "title" $ deleteMeta "subtitle" meta
|
||
body <- blockListToRST blocks
|
||
notes <- liftM (reverse . stNotes) get >>= notesToRST
|
||
-- note that the notes may contain refs, so we do them first
|
||
refs <- liftM (reverse . stLinks) get >>= refsToRST
|
||
pics <- liftM (reverse . stImages) get >>= pictRefsToRST
|
||
hasMath <- liftM stHasMath get
|
||
let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics]
|
||
let context = defField "body" main
|
||
$ defField "toc" (writerTableOfContents opts)
|
||
$ defField "toc-depth" (writerTOCDepth opts)
|
||
$ defField "math" hasMath
|
||
$ defField "title" (render Nothing title :: String)
|
||
$ defField "math" hasMath
|
||
$ metadata
|
||
if writerStandalone opts
|
||
then return $ renderTemplate' (writerTemplate opts) context
|
||
else return main
|
||
|
||
-- | Return RST representation of reference key table.
|
||
refsToRST :: Refs -> State WriterState Doc
|
||
refsToRST refs = mapM keyToRST refs >>= return . vcat
|
||
|
||
-- | Return RST representation of a reference key.
|
||
keyToRST :: ([Inline], (String, String))
|
||
-> State WriterState Doc
|
||
keyToRST (label, (src, _)) = do
|
||
label' <- inlineListToRST label
|
||
let label'' = if ':' `elem` (render Nothing label')
|
||
then char '`' <> label' <> char '`'
|
||
else label'
|
||
return $ nowrap $ ".. _" <> label'' <> ": " <> text src
|
||
|
||
-- | Return RST representation of notes.
|
||
notesToRST :: [[Block]] -> State WriterState Doc
|
||
notesToRST notes =
|
||
mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>=
|
||
return . vsep
|
||
|
||
-- | Return RST representation of a note.
|
||
noteToRST :: Int -> [Block] -> State WriterState Doc
|
||
noteToRST num note = do
|
||
contents <- blockListToRST note
|
||
let marker = ".. [" <> text (show num) <> "]"
|
||
return $ nowrap $ marker $$ nest 3 contents
|
||
|
||
-- | Return RST representation of picture reference table.
|
||
pictRefsToRST :: [([Inline], (String, String, Maybe String))]
|
||
-> State WriterState Doc
|
||
pictRefsToRST refs = mapM pictToRST refs >>= return . vcat
|
||
|
||
-- | Return RST representation of a picture substitution reference.
|
||
pictToRST :: ([Inline], (String, String,Maybe String))
|
||
-> State WriterState Doc
|
||
pictToRST (label, (src, _, mbtarget)) = do
|
||
label' <- inlineListToRST label
|
||
return $ nowrap
|
||
$ ".. |" <> label' <> "| image:: " <> text src
|
||
$$ case mbtarget of
|
||
Nothing -> empty
|
||
Just t -> " :target: " <> text t
|
||
|
||
-- | Escape special characters for RST.
|
||
escapeString :: String -> String
|
||
escapeString = escapeStringUsing (backslashEscapes "`\\|*_")
|
||
|
||
titleToRST :: [Inline] -> [Inline] -> State WriterState Doc
|
||
titleToRST [] _ = return empty
|
||
titleToRST tit subtit = do
|
||
title <- inlineListToRST tit
|
||
subtitle <- inlineListToRST subtit
|
||
return $ bordered title '=' $$ bordered subtitle '-'
|
||
|
||
bordered :: Doc -> Char -> Doc
|
||
bordered contents c =
|
||
if len > 0
|
||
then border $$ contents $$ border
|
||
else empty
|
||
where len = offset contents
|
||
border = text (replicate len c)
|
||
|
||
-- | Convert Pandoc block element to RST.
|
||
blockToRST :: Block -- ^ Block element
|
||
-> State WriterState Doc
|
||
blockToRST Null = return empty
|
||
blockToRST (Div attr bs) = do
|
||
contents <- blockListToRST bs
|
||
let startTag = ".. raw:: html" $+$ nest 3 (tagWithAttrs "div" attr)
|
||
let endTag = ".. raw:: html" $+$ nest 3 "</div>"
|
||
return $ blankline <> startTag $+$ contents $+$ endTag $$ blankline
|
||
blockToRST (Plain inlines) = inlineListToRST inlines
|
||
-- title beginning with fig: indicates that the image is a figure
|
||
blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
|
||
capt <- inlineListToRST txt
|
||
let fig = "figure:: " <> text src
|
||
let alt = ":alt: " <> if null tit then capt else text tit
|
||
return $ hang 3 ".. " $ fig $$ alt $+$ capt $$ blankline
|
||
blockToRST (Para inlines)
|
||
| LineBreak `elem` inlines = do -- use line block if LineBreaks
|
||
lns <- mapM inlineListToRST $ splitBy (==LineBreak) inlines
|
||
return $ (vcat $ map (text "| " <>) lns) <> blankline
|
||
| otherwise = do
|
||
contents <- inlineListToRST inlines
|
||
return $ contents <> blankline
|
||
blockToRST (RawBlock f@(Format f') str)
|
||
| f == "rst" = return $ text str
|
||
| otherwise = return $ blankline <> ".. raw:: " <>
|
||
text (map toLower f') $+$
|
||
(nest 3 $ text str) $$ blankline
|
||
blockToRST HorizontalRule =
|
||
return $ blankline $$ "--------------" $$ blankline
|
||
blockToRST (Header level _ inlines) = do
|
||
contents <- inlineListToRST inlines
|
||
let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
|
||
let border = text $ replicate (offset contents) headerChar
|
||
return $ nowrap $ contents $$ border $$ blankline
|
||
blockToRST (CodeBlock (_,classes,kvs) str) = do
|
||
opts <- stOptions <$> get
|
||
let tabstop = writerTabStop opts
|
||
let startnum = maybe "" (\x -> " " <> text x) $ lookup "startFrom" kvs
|
||
let numberlines = if "numberLines" `elem` classes
|
||
then " :number-lines:" <> startnum
|
||
else empty
|
||
if "haskell" `elem` classes && "literate" `elem` classes &&
|
||
isEnabled Ext_literate_haskell opts
|
||
then return $ prefixed "> " (text str) $$ blankline
|
||
else return $
|
||
(case [c | c <- classes,
|
||
c `notElem` ["sourceCode","literate","numberLines"]] of
|
||
[] -> "::"
|
||
(lang:_) -> (".. code:: " <> text lang) $$ numberlines)
|
||
$+$ nest tabstop (text str) $$ blankline
|
||
blockToRST (BlockQuote blocks) = do
|
||
tabstop <- get >>= (return . writerTabStop . stOptions)
|
||
contents <- blockListToRST blocks
|
||
return $ (nest tabstop contents) <> blankline
|
||
blockToRST (Table caption _ widths headers rows) = do
|
||
caption' <- inlineListToRST caption
|
||
let caption'' = if null caption
|
||
then empty
|
||
else blankline <> text "Table: " <> caption'
|
||
headers' <- mapM blockListToRST headers
|
||
rawRows <- mapM (mapM blockListToRST) rows
|
||
-- let isSimpleCell [Plain _] = True
|
||
-- isSimpleCell [Para _] = True
|
||
-- isSimpleCell [] = True
|
||
-- isSimpleCell _ = False
|
||
-- let isSimple = all (==0) widths && all (all isSimpleCell) rows
|
||
let numChars = maximum . map offset
|
||
opts <- get >>= return . stOptions
|
||
let widthsInChars =
|
||
if all (== 0) widths
|
||
then map ((+2) . numChars) $ transpose (headers' : rawRows)
|
||
else map (floor . (fromIntegral (writerColumns opts) *)) widths
|
||
let hpipeBlocks blocks = hcat [beg, middle, end]
|
||
where h = maximum (map height blocks)
|
||
sep' = lblock 3 $ vcat (map text $ replicate h " | ")
|
||
beg = lblock 2 $ vcat (map text $ replicate h "| ")
|
||
end = lblock 2 $ vcat (map text $ replicate h " |")
|
||
middle = hcat $ intersperse sep' blocks
|
||
let makeRow = hpipeBlocks . zipWith lblock widthsInChars
|
||
let head' = makeRow headers'
|
||
rows' <- mapM (\row -> do cols <- mapM blockListToRST row
|
||
return $ makeRow cols) rows
|
||
let border ch = char '+' <> char ch <>
|
||
(hcat $ intersperse (char ch <> char '+' <> char ch) $
|
||
map (\l -> text $ replicate l ch) widthsInChars) <>
|
||
char ch <> char '+'
|
||
let body = vcat $ intersperse (border '-') rows'
|
||
let head'' = if all null headers
|
||
then empty
|
||
else head' $$ border '='
|
||
return $ border '-' $$ head'' $$ body $$ border '-' $$ caption'' $$ blankline
|
||
blockToRST (BulletList items) = do
|
||
contents <- mapM bulletListItemToRST items
|
||
-- ensure that sublists have preceding blank line
|
||
return $ blankline $$ vcat contents $$ blankline
|
||
blockToRST (OrderedList (start, style', delim) items) = do
|
||
let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim
|
||
then take (length items) $ repeat "#."
|
||
else take (length items) $ orderedListMarkers
|
||
(start, style', delim)
|
||
let maxMarkerLength = maximum $ map length markers
|
||
let markers' = map (\m -> let s = maxMarkerLength - length m
|
||
in m ++ replicate s ' ') markers
|
||
contents <- mapM (\(item, num) -> orderedListItemToRST item num) $
|
||
zip markers' items
|
||
-- ensure that sublists have preceding blank line
|
||
return $ blankline $$ vcat contents $$ blankline
|
||
blockToRST (DefinitionList items) = do
|
||
contents <- mapM definitionListItemToRST items
|
||
-- ensure that sublists have preceding blank line
|
||
return $ blankline $$ vcat contents $$ blankline
|
||
|
||
-- | Convert bullet list item (list of blocks) to RST.
|
||
bulletListItemToRST :: [Block] -> State WriterState Doc
|
||
bulletListItemToRST items = do
|
||
contents <- blockListToRST items
|
||
return $ hang 3 "- " $ contents <> cr
|
||
|
||
-- | Convert ordered list item (a list of blocks) to RST.
|
||
orderedListItemToRST :: String -- ^ marker for list item
|
||
-> [Block] -- ^ list item (list of blocks)
|
||
-> State WriterState Doc
|
||
orderedListItemToRST marker items = do
|
||
contents <- blockListToRST items
|
||
let marker' = marker ++ " "
|
||
return $ hang (length marker') (text marker') $ contents <> cr
|
||
|
||
-- | Convert defintion list item (label, list of blocks) to RST.
|
||
definitionListItemToRST :: ([Inline], [[Block]]) -> State WriterState Doc
|
||
definitionListItemToRST (label, defs) = do
|
||
label' <- inlineListToRST label
|
||
contents <- liftM vcat $ mapM blockListToRST defs
|
||
tabstop <- get >>= (return . writerTabStop . stOptions)
|
||
return $ label' $$ nest tabstop (nestle contents <> cr)
|
||
|
||
-- | Convert list of Pandoc block elements to RST.
|
||
blockListToRST :: [Block] -- ^ List of block elements
|
||
-> State WriterState Doc
|
||
blockListToRST blocks = mapM blockToRST blocks >>= return . vcat
|
||
|
||
-- | Convert list of Pandoc inline elements to RST.
|
||
inlineListToRST :: [Inline] -> State WriterState Doc
|
||
inlineListToRST lst =
|
||
mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= return . hcat
|
||
where -- remove spaces after displaymath, as they screw up indentation:
|
||
removeSpaceAfterDisplayMath (Math DisplayMath x : zs) =
|
||
Math DisplayMath x : dropWhile (==Space) zs
|
||
removeSpaceAfterDisplayMath (x:xs) = x : removeSpaceAfterDisplayMath xs
|
||
removeSpaceAfterDisplayMath [] = []
|
||
insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed
|
||
insertBS (x:y:z:zs)
|
||
| isComplex y && surroundComplex x z =
|
||
x : y : RawInline "rst" "\\ " : insertBS (z:zs)
|
||
insertBS (x:y:zs)
|
||
| isComplex x && not (okAfterComplex y) =
|
||
x : RawInline "rst" "\\ " : insertBS (y : zs)
|
||
| isComplex y && not (okBeforeComplex x) =
|
||
x : RawInline "rst" "\\ " : insertBS (y : zs)
|
||
| otherwise =
|
||
x : insertBS (y : zs)
|
||
insertBS (x:ys) = x : insertBS ys
|
||
insertBS [] = []
|
||
surroundComplex :: Inline -> Inline -> Bool
|
||
surroundComplex (Str s@(_:_)) (Str s'@(_:_)) =
|
||
case (last s, head s') of
|
||
('\'','\'') -> True
|
||
('"','"') -> True
|
||
('<','>') -> True
|
||
('[',']') -> True
|
||
('{','}') -> True
|
||
_ -> False
|
||
surroundComplex _ _ = False
|
||
okAfterComplex :: Inline -> Bool
|
||
okAfterComplex Space = True
|
||
okAfterComplex LineBreak = True
|
||
okAfterComplex (Str (c:_)) = isSpace c || c `elem` "-.,:;!?\\/'\")]}>–—"
|
||
okAfterComplex _ = False
|
||
okBeforeComplex :: Inline -> Bool
|
||
okBeforeComplex Space = True
|
||
okBeforeComplex LineBreak = True
|
||
okBeforeComplex (Str (c:_)) = isSpace c || c `elem` "-:/'\"<([{–—"
|
||
okBeforeComplex _ = False
|
||
isComplex :: Inline -> Bool
|
||
isComplex (Emph _) = True
|
||
isComplex (Strong _) = True
|
||
isComplex (SmallCaps _) = True
|
||
isComplex (Strikeout _) = True
|
||
isComplex (Superscript _) = True
|
||
isComplex (Subscript _) = True
|
||
isComplex (Link _ _) = True
|
||
isComplex (Image _ _) = True
|
||
isComplex (Code _ _) = True
|
||
isComplex (Math _ _) = True
|
||
isComplex _ = False
|
||
|
||
-- | Convert Pandoc inline element to RST.
|
||
inlineToRST :: Inline -> State WriterState Doc
|
||
inlineToRST (Span _ ils) = inlineListToRST ils
|
||
inlineToRST (Emph lst) = do
|
||
contents <- inlineListToRST lst
|
||
return $ "*" <> contents <> "*"
|
||
inlineToRST (Strong lst) = do
|
||
contents <- inlineListToRST lst
|
||
return $ "**" <> contents <> "**"
|
||
inlineToRST (Strikeout lst) = do
|
||
contents <- inlineListToRST lst
|
||
return $ "[STRIKEOUT:" <> contents <> "]"
|
||
inlineToRST (Superscript lst) = do
|
||
contents <- inlineListToRST lst
|
||
return $ ":sup:`" <> contents <> "`"
|
||
inlineToRST (Subscript lst) = do
|
||
contents <- inlineListToRST lst
|
||
return $ ":sub:`" <> contents <> "`"
|
||
inlineToRST (SmallCaps lst) = inlineListToRST lst
|
||
inlineToRST (Quoted SingleQuote lst) = do
|
||
contents <- inlineListToRST lst
|
||
return $ "‘" <> contents <> "’"
|
||
inlineToRST (Quoted DoubleQuote lst) = do
|
||
contents <- inlineListToRST lst
|
||
return $ "“" <> contents <> "”"
|
||
inlineToRST (Cite _ lst) =
|
||
inlineListToRST lst
|
||
inlineToRST (Code _ str) = return $ "``" <> text str <> "``"
|
||
inlineToRST (Str str) = return $ text $ escapeString str
|
||
inlineToRST (Math t str) = do
|
||
modify $ \st -> st{ stHasMath = True }
|
||
return $ if t == InlineMath
|
||
then ":math:`" <> text str <> "`"
|
||
else if '\n' `elem` str
|
||
then blankline $$ ".. math::" $$
|
||
blankline $$ nest 3 (text str) $$ blankline
|
||
else blankline $$ (".. math:: " <> text str) $$ blankline
|
||
inlineToRST (RawInline f x)
|
||
| f == "rst" = return $ text x
|
||
| otherwise = return empty
|
||
inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para)
|
||
inlineToRST Space = return space
|
||
-- autolink
|
||
inlineToRST (Link [Str str] (src, _))
|
||
| isURI src &&
|
||
if "mailto:" `isPrefixOf` src
|
||
then src == escapeURI ("mailto:" ++ str)
|
||
else src == escapeURI str = do
|
||
let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
|
||
return $ text srcSuffix
|
||
inlineToRST (Link [Image alt (imgsrc,imgtit)] (src, _tit)) = do
|
||
label <- registerImage alt (imgsrc,imgtit) (Just src)
|
||
return $ "|" <> label <> "|"
|
||
inlineToRST (Link txt (src, tit)) = do
|
||
useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions
|
||
linktext <- inlineListToRST $ normalizeSpaces txt
|
||
if useReferenceLinks
|
||
then do refs <- get >>= return . stLinks
|
||
case lookup txt refs of
|
||
Just (src',tit') ->
|
||
if src == src' && tit == tit'
|
||
then return $ "`" <> linktext <> "`_"
|
||
else do -- duplicate label, use non-reference link
|
||
return $ "`" <> linktext <> " <" <> text src <> ">`__"
|
||
Nothing -> do
|
||
modify $ \st -> st { stLinks = (txt,(src,tit)):refs }
|
||
return $ "`" <> linktext <> "`_"
|
||
else return $ "`" <> linktext <> " <" <> text src <> ">`__"
|
||
inlineToRST (Image alternate (source, tit)) = do
|
||
label <- registerImage alternate (source,tit) Nothing
|
||
return $ "|" <> label <> "|"
|
||
inlineToRST (Note contents) = do
|
||
-- add to notes in state
|
||
notes <- get >>= return . stNotes
|
||
modify $ \st -> st { stNotes = contents:notes }
|
||
let ref = show $ (length notes) + 1
|
||
return $ " [" <> text ref <> "]_"
|
||
|
||
registerImage :: [Inline] -> Target -> Maybe String -> State WriterState Doc
|
||
registerImage alt (src,tit) mbtarget = do
|
||
pics <- get >>= return . stImages
|
||
txt <- case lookup alt pics of
|
||
Just (s,t,mbt) | (s,t,mbt) == (src,tit,mbtarget) -> return alt
|
||
_ -> do
|
||
let alt' = if null alt || alt == [Str ""]
|
||
then [Str $ "image" ++ show (length pics)]
|
||
else alt
|
||
modify $ \st -> st { stImages =
|
||
(alt', (src,tit, mbtarget)):stImages st }
|
||
return alt'
|
||
inlineListToRST txt
|