Major code cleanup in all modules. (Removed unneeded imports,

reformatted, etc.)  More major changes are documented below:

+ Removed Text.Pandoc.ParserCombinators and moved all its definitions
  to Text.Pandoc.Shared.
+ In Text.Pandoc.Shared:
  - Removed unneeded 'try' in blanklines.
  - Removed endsWith function and rewrote functions to use isSuffixOf instead.
  - Added >>~ combinator.
  - Rewrote stripTrailingNewlines, removeLeadingSpaces.
+ Moved Text.Pandoc.Entities -> Text.Pandoc.CharacterReferences.
  - Removed unneeded functions charToEntity, charToNumericalEntity.
  - Renamed functions using proper terminology (character references,
    not entities).  decodeEntities -> decodeCharacterReferences,
    characterEntity -> characterReference.
  - Moved escapeStringToXML to Docbook writer, which is the only thing
    that uses it.
  - Removed old entity parser in HTML and Markdown readers; replaced with
    new charRef parser in Text.Pandoc.Shared.
+ Fixed accent bug in Text.Pandoc.Readers.LaTeX:  \^{} now correctly
  parses as a '^' character.
+ Text.Pandoc.ASCIIMathML is no longer an exported module.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@835 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2007-08-15 06:00:58 +00:00
parent e814a3f6d2
commit a8e2199034
21 changed files with 2042 additions and 2311 deletions

View file

@ -34,11 +34,9 @@ Description: Pandoc is a Haskell library for converting from one markup
Build-Depends: base, parsec, xhtml, mtl, regex-compat
Hs-Source-Dirs: src
Exposed-Modules: Text.Pandoc,
Text.Pandoc.ASCIIMathML,
Text.Pandoc.Blocks,
Text.Pandoc.Definition,
Text.Pandoc.Entities,
Text.Pandoc.ParserCombinators,
Text.Pandoc.CharacterReferences,
Text.Pandoc.Shared,
Text.Pandoc.UTF8,
Text.Pandoc.Readers.HTML,

View file

@ -39,8 +39,8 @@ inline links:
>
> markdownToRST :: String -> String
> markdownToRST = toUTF8 .
> (writeRST defaultWriterOptions {writerReferenceLinks = True}) .
> (readMarkdown defaultParserState) . fromUTF8
> (writeRST defaultWriterOptions {writerReferenceLinks = True}) .
> (readMarkdown defaultParserState) . fromUTF8
>
> main = interact markdownToRST

View file

@ -43,9 +43,8 @@ module Text.Pandoc.Blocks
rightAlignBlock
)
where
import Text.PrettyPrint
import Data.List (transpose, intersperse)
import Data.List ( intersperse )
-- | A fixed-width block of text. Parameters are width of block,
-- height of block, and list of lines.
@ -53,6 +52,17 @@ data TextBlock = TextBlock Int Int [String]
instance Show TextBlock where
show x = show $ blockToDoc x
-- | Break lines in a list of lines so that none are greater than
-- a given width.
breakLines :: Int -- ^ Maximum length of lines.
-> [String] -- ^ List of lines.
-> [String]
breakLines width [] = []
breakLines width (l:ls) =
if length l > width
then (take width l):(breakLines width ((drop width l):ls))
else l:(breakLines width ls)
-- | Convert a @Doc@ element into a @TextBlock@ with a specified width.
docToBlock :: Int -- ^ Width of text block.
-> Doc -- ^ @Doc@ to convert.
@ -60,13 +70,8 @@ docToBlock :: Int -- ^ Width of text block.
docToBlock width doc =
let rendered = renderStyle (style {lineLength = width,
ribbonsPerLine = 1}) doc
lns = lines rendered
chop [] = []
chop (l:ls) = if length l > width
then (take width l):(chop ((drop width l):ls))
else l:(chop ls)
lns' = chop lns
in TextBlock width (length lns') lns'
lns = breakLines width $ lines rendered
in TextBlock width (length lns) lns
-- | Convert a @TextBlock@ to a @Doc@ element.
blockToDoc :: TextBlock -> Doc
@ -116,8 +121,7 @@ isWhitespace x = x `elem` " \t"
-- | Left-aligns the contents of a @TextBlock@ within the block.
leftAlignBlock :: TextBlock -> TextBlock
leftAlignBlock (TextBlock width height lns) =
TextBlock width height $
map (dropWhile isWhitespace) lns
TextBlock width height $ map (dropWhile isWhitespace) lns
-- | Right-aligns the contents of a @TextBlock@ within the block.
rightAlignBlock :: TextBlock -> TextBlock

View file

@ -17,7 +17,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Entities
Module : Text.Pandoc.CharacterReferences
Copyright : Copyright (C) 2006-7 John MacFarlane
License : GNU GPL, version 2 or above
@ -25,37 +25,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Stability : alpha
Portability : portable
Functions for encoding unicode characters as entity references,
and vice versa.
Functions for parsing character references.
-}
module Text.Pandoc.Entities (
charToEntity,
charToNumericalEntity,
decodeEntities,
escapeCharForXML,
escapeStringForXML,
characterEntity
module Text.Pandoc.CharacterReferences (
characterReference,
decodeCharacterReferences,
) where
import Data.Char ( chr, ord )
import Data.Char ( chr )
import Text.ParserCombinators.Parsec
import Data.Maybe ( fromMaybe )
import qualified Data.Map as Map
-- | Returns a string containing an entity reference for the character.
charToEntity :: Char -> String
charToEntity char = Map.findWithDefault (charToNumericalEntity char) char reverseEntityTable
-- | Returns a string containing a numerical entity reference for the char.
charToNumericalEntity :: Char -> String
charToNumericalEntity ch = "&#" ++ show (ord ch) ++ ";"
-- | Parse character entity.
characterReference :: GenParser Char st Char
characterReference = characterEntity <|>
hexadecimalCharacterReference <|>
decimalCharacterReference <?>
"character entity"
-- | Parse character entity.
characterEntity :: GenParser Char st Char
characterEntity = namedEntity <|> hexEntity <|> decimalEntity <?> "character entity"
-- | Parse character entity.
namedEntity :: GenParser Char st Char
namedEntity = try $ do
characterEntity = try $ do
st <- char '&'
body <- many1 alphaNum
end <- char ';'
@ -63,8 +52,8 @@ namedEntity = try $ do
return $ Map.findWithDefault '?' entity entityTable
-- | Parse hexadecimal entity.
hexEntity :: GenParser Char st Char
hexEntity = try $ do
hexadecimalCharacterReference :: GenParser Char st Char
hexadecimalCharacterReference = try $ do
st <- string "&#"
hex <- oneOf "Xx"
body <- many1 (oneOf "0123456789ABCDEFabcdef")
@ -72,49 +61,23 @@ hexEntity = try $ do
return $ chr $ read ('0':'x':body)
-- | Parse decimal entity.
decimalEntity :: GenParser Char st Char
decimalEntity = try $ do
decimalCharacterReference :: GenParser Char st Char
decimalCharacterReference = try $ do
st <- string "&#"
body <- many1 digit
end <- char ';'
return $ chr $ read body
-- | Escape one character as needed for XML.
escapeCharForXML :: Char -> String
escapeCharForXML x =
case x of
'&' -> "&amp;"
'<' -> "&lt;"
'>' -> "&gt;"
'"' -> "&quot;"
'\160' -> "&nbsp;"
c -> [c]
-- | True if the character needs to be escaped.
needsEscaping :: Char -> Bool
needsEscaping c = c `elem` "&<>\"\160"
-- | Escape string as needed for XML. Entity references are not preserved.
escapeStringForXML :: String -> String
escapeStringForXML "" = ""
escapeStringForXML str =
case break needsEscaping str of
(okay, "") -> okay
(okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs
-- | Convert entities in a string to characters.
decodeEntities :: String -> String
decodeEntities str =
case parse (many (characterEntity <|> anyChar)) str str of
decodeCharacterReferences :: String -> String
decodeCharacterReferences str =
case parse (many (characterReference <|> anyChar)) str str of
Left err -> error $ "\nError: " ++ show err
Right result -> result
entityTable :: Map.Map String Char
entityTable = Map.fromList entityTableList
reverseEntityTable :: Map.Map Char String
reverseEntityTable = Map.fromList $ map (\(a,b) -> (b,a)) entityTableList
entityTableList :: [(String, Char)]
entityTableList = [
("&quot;", chr 34),

View file

@ -34,10 +34,10 @@ data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show)
-- | Bibliographic information for the document: title (list of 'Inline'),
-- authors (list of strings), date (string).
data Meta = Meta [Inline] -- title
[String] -- authors
String -- date
deriving (Eq, Show, Read)
data Meta = Meta [Inline] -- title
[String] -- authors
String -- date
deriving (Eq, Show, Read)
-- | Alignment of a table column.
data Alignment = AlignLeft
@ -65,12 +65,11 @@ data ListNumberDelim = DefaultDelim
-- | Block element.
data Block
= Plain [Inline] -- ^ Plain text, not a paragraph
| Null -- ^ Nothing
| Para [Inline] -- ^ Paragraph
| CodeBlock String -- ^ Code block (literal)
| RawHtml String -- ^ Raw HTML block (literal)
| BlockQuote [Block] -- ^ Block quote (list of blocks)
| OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes,
| OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes
-- and a list of items, each a list of blocks)
| BulletList [[Block]] -- ^ Bullet list (list of items, each
-- a list of blocks)
@ -84,6 +83,7 @@ data Block
-- relative column widths, column headers
-- (each a list of blocks), and rows
-- (each a list of lists of blocks)
| Null -- ^ Nothing
deriving (Eq, Read, Show)
-- | Type of quotation marks to use in Quoted inline.
@ -112,6 +112,5 @@ data Inline
| Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target
| Image [Inline] Target -- ^ Image: alt text (list of inlines), target
-- and target
| Note [Block] -- ^ Footnote or endnote - reference (string),
-- text (list of blocks)
| Note [Block] -- ^ Footnote or endnote
deriving (Show, Eq, Read)

View file

@ -1,198 +0,0 @@
{-
Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.ParserCombinators
Copyright : Copyright (C) 2006-7 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Parser combinators used in Pandoc readers.
-}
module Text.Pandoc.ParserCombinators (
anyLine,
many1Till,
notFollowedBy',
oneOfStrings,
spaceChar,
skipSpaces,
blankline,
blanklines,
enclosed,
stringAnyCase,
parseFromString,
lineClump,
charsInBalanced,
charsInBalanced',
romanNumeral,
withHorizDisplacement
) where
import Text.ParserCombinators.Parsec
import Data.Char ( toUpper, toLower )
--- | Parse any line of text
anyLine :: GenParser Char st [Char]
anyLine = try (manyTill anyChar newline) <|> many1 anyChar
-- second alternative is for a line ending with eof
-- | Parses a space or tab.
spaceChar :: CharParser st Char
spaceChar = oneOf " \t"
-- | Skips zero or more spaces or tabs.
skipSpaces :: GenParser Char st ()
skipSpaces = skipMany spaceChar
-- | Skips zero or more spaces or tabs, then reads a newline.
blankline :: GenParser Char st Char
blankline = try (do
skipSpaces
newline)
-- | Parses one or more blank lines and returns a string of newlines.
blanklines :: GenParser Char st [Char]
blanklines = try (many1 blankline)
-- | Parses material enclosed between start and end parsers.
enclosed :: GenParser Char st t -- ^ start parser
-> GenParser Char st end -- ^ end parser
-> GenParser Char st a -- ^ content parser (to be used repeatedly)
-> GenParser Char st [a]
enclosed start end parser = try (do
start
notFollowedBy space
result <- many1Till parser (try end)
return result)
-- | Like @manyTill@, but reads at least one item.
many1Till :: GenParser tok st a
-> GenParser tok st end
-> GenParser tok st [a]
many1Till p end = try (do
first <- p
rest <- manyTill p end
return (first:rest))
-- | A more general form of @notFollowedBy@. This one allows any
-- type of parser to be specified, and succeeds only if that parser fails.
-- It does not consume any input.
notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
notFollowedBy' parser = try (do { c <- try parser; unexpected (show c) }
<|> return ())
-- | Parses one of a list of strings (tried in order).
oneOfStrings :: [String] -> GenParser Char st String
oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
-- | Parse string, case insensitive.
stringAnyCase :: [Char] -> CharParser st String
stringAnyCase [] = string ""
stringAnyCase (x:xs) = try (do
firstChar <- choice [ char (toUpper x), char (toLower x) ]
rest <- stringAnyCase xs
return (firstChar:rest))
-- | Parse contents of 'str' using 'parser' and return result.
parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
parseFromString parser str = try $ do
oldInput <- getInput
setInput str
result <- parser
setInput oldInput
return result
-- | Parse raw line block up to and including blank lines.
lineClump :: GenParser Char st String
lineClump = do
lines <- many1 (do{notFollowedBy blankline; anyLine})
blanks <- blanklines <|> (do{eof; return "\n"})
return ((unlines lines) ++ blanks)
-- | Parse a string of characters between an open character
-- and a close character, including text between balanced
-- pairs of open and close. For example,
-- @charsInBalanced '(' ')'@ will parse "(hello (there))"
-- and return "hello (there)". Stop if a blank line is
-- encountered.
charsInBalanced :: Char -> Char -> GenParser Char st String
charsInBalanced open close = try $ do
char open
raw <- manyTill ( (do res <- charsInBalanced open close
return $ [open] ++ res ++ [close])
<|> (do notFollowedBy' (blankline >> blanklines)
count 1 anyChar))
(char close)
return $ concat raw
-- | Like charsInBalanced, but allow blank lines in the content.
charsInBalanced' :: Char -> Char -> GenParser Char st String
charsInBalanced' open close = try $ do
char open
raw <- manyTill ( (do res <- charsInBalanced open close
return $ [open] ++ res ++ [close])
<|> count 1 anyChar)
(char close)
return $ concat raw
-- | Parses a roman numeral (uppercase or lowercase), returns number.
romanNumeral :: Bool -> -- ^ Uppercase if true
GenParser Char st Int
romanNumeral upper = try $ do
let char' c = char (if upper then toUpper c else c)
let one = char' 'i'
let five = char' 'v'
let ten = char' 'x'
let fifty = char' 'l'
let hundred = char' 'c'
let fivehundred = char' 'd'
let thousand = char' 'm'
thousands <- many thousand >>= (return . (1000 *) . length)
ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
fivehundreds <- many fivehundred >>= (return . (500 *) . length)
fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
hundreds <- many hundred >>= (return . (100 *) . length)
nineties <- option 0 $ try $ ten >> hundred >> return 90
fifties <- many fifty >>= (return . (50 *) . length)
forties <- option 0 $ try $ ten >> fifty >> return 40
tens <- many ten >>= (return . (10 *) . length)
nines <- option 0 $ try $ one >> ten >> return 9
fives <- many five >>= (return . (5*) . length)
fours <- option 0 $ try $ one >> five >> return 4
ones <- many one >>= (return . length)
let total = thousands + ninehundreds + fivehundreds + fourhundreds +
hundreds + nineties + fifties + forties + tens + nines +
fives + fours + ones
if total == 0
then fail "not a roman numeral"
else return total
-- | Applies a parser, returns tuple of its results and its horizontal
-- displacement (the difference between the source column at the end
-- and the source column at the beginning). Vertical displacement
-- (source row) is ignored.
withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
-> GenParser Char st (a, Int) -- ^ (result, displacement)
withHorizDisplacement parser = do
pos1 <- getPosition
result <- parser
pos2 <- getPosition
return (result, sourceColumn pos2 - sourceColumn pos1)

View file

@ -41,12 +41,12 @@ module Text.Pandoc.Readers.HTML (
) where
import Text.ParserCombinators.Parsec
import Text.Pandoc.ParserCombinators
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Entities ( characterEntity, decodeEntities )
import Text.Pandoc.CharacterReferences ( characterReference,
decodeCharacterReferences )
import Data.Maybe ( fromMaybe )
import Data.List ( intersect, takeWhile, dropWhile )
import Data.List ( takeWhile, dropWhile )
import Data.Char ( toUpper, toLower, isAlphaNum )
-- | Convert HTML-formatted string to 'Pandoc' document.
@ -55,10 +55,6 @@ readHtml :: ParserState -- ^ Parser state
-> Pandoc
readHtml = readWith parseHtml
-- for testing
testString :: String -> IO ()
testString = testStringWith parseHtml
--
-- Constants
--
@ -74,26 +70,18 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
--
-- | Read blocks until end tag.
blocksTilEnd tag = try (do
blocks <- manyTill (do {b <- block; spaces; return b}) (htmlEndTag tag)
return $ filter (/= Null) blocks)
blocksTilEnd tag = do
blocks <- manyTill (block >>~ spaces) (htmlEndTag tag)
return $ filter (/= Null) blocks
-- | Read inlines until end tag.
inlinesTilEnd tag = try (do
inlines <- manyTill inline (htmlEndTag tag)
return inlines)
inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
-- | Parse blocks between open and close tag.
blocksIn tag = try $ do
htmlTag tag
spaces
blocksTilEnd tag
blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
-- | Parse inlines between open and close tag.
inlinesIn tag = try $ do
htmlTag tag
spaces
inlinesTilEnd tag
inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
-- | Extract type from a tag: e.g. @br@ from @\<br\>@
extractTagType :: String -> String
@ -103,19 +91,19 @@ extractTagType ('<':rest) =
extractTagType _ = ""
-- | Parse any HTML tag (closing or opening) and return text of tag
anyHtmlTag = try (do
anyHtmlTag = try $ do
char '<'
spaces
tag <- many1 alphaNum
attribs <- htmlAttributes
spaces
ender <- option "" (string "/")
let ender' = if (null ender) then "" else " /"
let ender' = if null ender then "" else " /"
spaces
char '>'
return ("<" ++ tag ++ attribs ++ ender' ++ ">"))
return $ "<" ++ tag ++ attribs ++ ender' ++ ">"
anyHtmlEndTag = try (do
anyHtmlEndTag = try $ do
char '<'
spaces
char '/'
@ -123,19 +111,19 @@ anyHtmlEndTag = try (do
tagType <- many1 alphaNum
spaces
char '>'
return ("</" ++ tagType ++ ">"))
return $ "</" ++ tagType ++ ">"
htmlTag :: String -> GenParser Char st (String, [(String, String)])
htmlTag tag = try (do
htmlTag tag = try $ do
char '<'
spaces
stringAnyCase tag
attribs <- many htmlAttribute
spaces
option "" (string "/")
optional (string "/")
spaces
char '>'
return (tag, (map (\(name, content, raw) -> (name, content)) attribs)))
return (tag, (map (\(name, content, raw) -> (name, content)) attribs))
-- parses a quoted html attribute value
quoted quoteChar = do
@ -145,20 +133,20 @@ quoted quoteChar = do
htmlAttributes = do
attrList <- many htmlAttribute
return (concatMap (\(name, content, raw) -> raw) attrList)
return $ concatMap (\(name, content, raw) -> raw) attrList
htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute
-- minimized boolean attribute (no = and value)
htmlMinimizedAttribute = try (do
-- minimized boolean attribute
htmlMinimizedAttribute = try $ do
many1 space
name <- many1 (choice [letter, oneOf ".-_:"])
spaces
notFollowedBy (char '=')
let content = name
return (name, content, (" " ++ name)))
return (name, content, (" " ++ name))
htmlRegularAttribute = try (do
htmlRegularAttribute = try $ do
many1 space
name <- many1 (choice [letter, oneOf ".-_:"])
spaces
@ -170,10 +158,10 @@ htmlRegularAttribute = try (do
a <- many (alphaNum <|> (oneOf "-._:"))
return (a,"")) ]
return (name, content,
(" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr)))
(" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))
-- | Parse an end tag of type 'tag'
htmlEndTag tag = try (do
htmlEndTag tag = try $ do
char '<'
spaces
char '/'
@ -181,87 +169,83 @@ htmlEndTag tag = try (do
stringAnyCase tag
spaces
char '>'
return ("</" ++ tag ++ ">"))
return $ "</" ++ tag ++ ">"
-- | Returns @True@ if the tag is an inline tag.
isInline tag = (extractTagType tag) `elem` inlineHtmlTags
anyHtmlBlockTag = try (do
tag <- choice [anyHtmlTag, anyHtmlEndTag]
if isInline tag then fail "inline tag" else return tag)
anyHtmlBlockTag = try $ do
tag <- anyHtmlTag <|> anyHtmlEndTag
if isInline tag then fail "inline tag" else return tag
anyHtmlInlineTag = try (do
tag <- choice [ anyHtmlTag, anyHtmlEndTag ]
if isInline tag then return tag else fail "not an inline tag")
anyHtmlInlineTag = try $ do
tag <- anyHtmlTag <|> anyHtmlEndTag
if isInline tag then return tag else fail "not an inline tag"
-- | Parses material between script tags.
-- Scripts must be treated differently, because they can contain '<>' etc.
htmlScript = try (do
htmlScript = try $ do
open <- string "<script"
rest <- manyTill anyChar (htmlEndTag "script")
return (open ++ rest ++ "</script>"))
return $ open ++ rest ++ "</script>"
htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ]
rawHtmlBlock = try (do
notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"])
rawHtmlBlock = try $ do
notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
body <- htmlBlockElement <|> anyHtmlBlockTag
sp <- (many space)
sp <- many space
state <- getState
if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null)
if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null
-- | Parses an HTML comment.
htmlComment = try (do
htmlComment = try $ do
string "<!--"
comment <- manyTill anyChar (try (string "-->"))
return ("<!--" ++ comment ++ "-->"))
return $ "<!--" ++ comment ++ "-->"
--
-- parsing documents
--
xmlDec = try (do
xmlDec = try $ do
string "<?"
rest <- manyTill anyChar (char '>')
return ("<?" ++ rest ++ ">"))
return $ "<?" ++ rest ++ ">"
definition = try (do
definition = try $ do
string "<!"
rest <- manyTill anyChar (char '>')
return ("<!" ++ rest ++ ">"))
return $ "<!" ++ rest ++ ">"
nonTitleNonHead = try (do
notFollowedBy' (htmlTag "title")
notFollowedBy' (htmlTag "/head")
result <- choice [do {rawHtmlBlock; return ' '}, anyChar]
return result)
nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >>
((rawHtmlBlock >> return ' ') <|> anyChar)
parseTitle = try (do
(tag, attribs) <- htmlTag "title"
parseTitle = try $ do
(tag, _) <- htmlTag "title"
contents <- inlinesTilEnd tag
spaces
return contents)
return contents
-- parse header and return meta-information (for now, just title)
parseHead = try (do
parseHead = try $ do
htmlTag "head"
spaces
skipMany nonTitleNonHead
contents <- option [] parseTitle
skipMany nonTitleNonHead
htmlTag "/head"
return (contents, [], ""))
return (contents, [], "")
skipHtmlTag tag = option ("",[]) (htmlTag tag)
skipHtmlTag tag = optional (htmlTag tag)
-- h1 class="title" representation of title in body
bodyTitle = try (do
bodyTitle = try $ do
(tag, attribs) <- htmlTag "h1"
cl <- case (extractAttribute "class" attribs) of
Just "title" -> do {return ""}
Just "title" -> return ""
otherwise -> fail "not title"
inlinesTilEnd "h1"
return "")
parseHtml = do
sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
@ -271,27 +255,30 @@ parseHtml = do
spaces
skipHtmlTag "body"
spaces
option "" bodyTitle -- skip title in body, because it's represented in meta
optional bodyTitle -- skip title in body, because it's represented in meta
blocks <- parseBlocks
spaces
option "" (htmlEndTag "body")
optional (htmlEndTag "body")
spaces
option "" (htmlEndTag "html")
optional (htmlEndTag "html")
many anyChar -- ignore anything after </html>
eof
return (Pandoc (Meta title authors date) blocks)
return $ Pandoc (Meta title authors date) blocks
--
-- parsing blocks
--
parseBlocks = do
spaces
result <- sepEndBy block spaces
return $ filter (/= Null) result
parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null))
block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain,
rawHtmlBlock ] <?> "block"
block = choice [ codeBlock
, header
, hrule
, list
, blockQuote
, para
, plain
, rawHtmlBlock ] <?> "block"
--
-- header blocks
@ -299,53 +286,49 @@ block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain,
header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
headerLevel n = try (do
headerLevel n = try $ do
let level = "h" ++ show n
(tag, attribs) <- htmlTag level
contents <- inlinesTilEnd level
return (Header n (normalizeSpaces contents)))
return $ Header n (normalizeSpaces contents)
--
-- hrule block
--
hrule = try (do
hrule = try $ do
(tag, attribs) <- htmlTag "hr"
state <- getState
if (not (null attribs)) && (stateParseRaw state)
then -- in this case we want to parse it as raw html
unexpected "attributes in hr"
else return HorizontalRule)
if not (null attribs) && stateParseRaw state
then unexpected "attributes in hr" -- parse as raw in this case
else return HorizontalRule
--
-- code blocks
--
codeBlock = choice [ preCodeBlock, bareCodeBlock ] <?> "code block"
codeBlock = preCodeBlock <|> bareCodeBlock <?> "code block"
preCodeBlock = try (do
preCodeBlock = try $ do
htmlTag "pre"
spaces
htmlTag "code"
result <- manyTill anyChar (htmlEndTag "code")
result <- bareCodeBlock
spaces
htmlEndTag "pre"
return (CodeBlock (stripTrailingNewlines (decodeEntities result))))
return result
bareCodeBlock = try (do
bareCodeBlock = try $ do
htmlTag "code"
result <- manyTill anyChar (htmlEndTag "code")
return (CodeBlock (stripTrailingNewlines (decodeEntities result))))
return $ CodeBlock $ stripTrailingNewlines $
decodeCharacterReferences result
--
-- block quotes
--
blockQuote = try (do
tag <- htmlTag "blockquote"
spaces
blocks <- blocksTilEnd "blockquote"
return (BlockQuote blocks))
blockQuote = try $ htmlTag "blockquote" >> spaces >>
blocksTilEnd "blockquote" >>= (return . BlockQuote)
--
-- list blocks
@ -354,119 +337,105 @@ blockQuote = try (do
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
orderedList = try $ do
(_, attribs) <- htmlTag "ol"
(start, style) <- option (1, DefaultStyle) $
do failIfStrict
let sta = fromMaybe "1" $
lookup "start" attribs
let sty = fromMaybe (fromMaybe "" $
lookup "style" attribs) $
lookup "class" attribs
let sty' = case sty of
"lower-roman" -> LowerRoman
"upper-roman" -> UpperRoman
"lower-alpha" -> LowerAlpha
"upper-alpha" -> UpperAlpha
"decimal" -> Decimal
_ -> DefaultStyle
return (read sta, sty')
spaces
items <- sepEndBy1 (blocksIn "li") spaces
htmlEndTag "ol"
return (OrderedList (start, style, DefaultDelim) items)
(_, attribs) <- htmlTag "ol"
(start, style) <- option (1, DefaultStyle) $
do failIfStrict
let sta = fromMaybe "1" $
lookup "start" attribs
let sty = fromMaybe (fromMaybe "" $
lookup "style" attribs) $
lookup "class" attribs
let sty' = case sty of
"lower-roman" -> LowerRoman
"upper-roman" -> UpperRoman
"lower-alpha" -> LowerAlpha
"upper-alpha" -> UpperAlpha
"decimal" -> Decimal
_ -> DefaultStyle
return (read sta, sty')
spaces
items <- sepEndBy1 (blocksIn "li") spaces
htmlEndTag "ol"
return $ OrderedList (start, style, DefaultDelim) items
bulletList = try $ do
htmlTag "ul"
spaces
items <- sepEndBy1 (blocksIn "li") spaces
htmlEndTag "ul"
return (BulletList items)
htmlTag "ul"
spaces
items <- sepEndBy1 (blocksIn "li") spaces
htmlEndTag "ul"
return $ BulletList items
definitionList = try $ do
failIfStrict -- def lists not part of standard markdown
tag <- htmlTag "dl"
spaces
items <- sepEndBy1 definitionListItem spaces
htmlEndTag "dl"
return (DefinitionList items)
failIfStrict -- def lists not part of standard markdown
tag <- htmlTag "dl"
spaces
items <- sepEndBy1 definitionListItem spaces
htmlEndTag "dl"
return $ DefinitionList items
definitionListItem = try $ do
terms <- sepEndBy1 (inlinesIn "dt") spaces
defs <- sepEndBy1 (blocksIn "dd") spaces
let term = joinWithSep [LineBreak] terms
return (term, concat defs)
terms <- sepEndBy1 (inlinesIn "dt") spaces
defs <- sepEndBy1 (blocksIn "dd") spaces
let term = joinWithSep [LineBreak] terms
return (term, concat defs)
--
-- paragraph block
--
para = try (do
tag <- htmlTag "p"
result <- inlinesTilEnd "p"
return (Para (normalizeSpaces result)))
para = htmlTag "p" >> inlinesTilEnd "p" >>= return . Para . normalizeSpaces
--
-- plain block
--
plain = do
result <- many1 inline
return (Plain (normalizeSpaces result))
plain = many1 inline >>= return . Plain . normalizeSpaces
--
-- inline
--
inline = choice [ text, special ] <?> "inline"
inline = choice [ charRef
, strong
, emph
, superscript
, subscript
, strikeout
, spanStrikeout
, code
, str
, linebreak
, whitespace
, link
, image
, rawHtmlInline ] <?> "inline"
text = choice [ entity, strong, emph, superscript, subscript,
strikeout, spanStrikeout, code, str,
linebreak, whitespace ] <?> "text"
special = choice [ link, image, rawHtmlInline ] <?>
"link, inline html, or image"
entity = do
ent <- characterEntity
return $ Str [ent]
code = try (do
code = try $ do
htmlTag "code"
result <- manyTill anyChar (htmlEndTag "code")
-- remove internal line breaks, leading and trailing space,
-- and decode entities
let result' = decodeEntities $ removeLeadingTrailingSpace $
joinWithSep " " $ lines result
return (Code result'))
-- and decode character references
return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $
joinWithSep " " $ lines result
rawHtmlInline = do
result <- choice [htmlScript, anyHtmlInlineTag]
result <- htmlScript <|> anyHtmlInlineTag
state <- getState
if stateParseRaw state then return (HtmlInline result) else return (Str "")
betweenTags tag = try (do
htmlTag tag
result <- inlinesTilEnd tag
return (normalizeSpaces result))
betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>=
return . normalizeSpaces
emph = try (do
result <- choice [betweenTags "em", betweenTags "it"]
return (Emph result))
emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph
superscript = try $ do
failIfStrict -- strict markdown has no superscript, so treat as raw HTML
result <- betweenTags "sup"
return (Superscript result)
strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong
subscript = try $ do
failIfStrict -- strict markdown has no subscript, so treat as raw HTML
result <- betweenTags "sub"
return (Subscript result)
superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript
strikeout = try $ do
failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
result <- choice [betweenTags "s", betweenTags "strike"]
return (Strikeout result)
subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript
strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
return . Strikeout
spanStrikeout = try $ do
failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
@ -474,25 +443,14 @@ spanStrikeout = try $ do
result <- case (extractAttribute "class" attributes) of
Just "strikeout" -> inlinesTilEnd "span"
_ -> fail "not a strikeout"
return (Strikeout result)
return $ Strikeout result
strong = try (do
result <- choice [betweenTags "b", betweenTags "strong"]
return (Strong result))
whitespace = do
many1 space
return Space
whitespace = many1 space >> return Space
-- hard line break
linebreak = do
htmlTag "br"
option ' ' newline
return LineBreak
linebreak = htmlTag "br" >> optional newline >> return LineBreak
str = do
result <- many1 (noneOf "<& \t\n")
return (Str result)
str = many1 (noneOf "<& \t\n") >>= return . Str
--
-- links and images
@ -501,27 +459,27 @@ str = do
-- extract contents of attribute (attribute names are case-insensitive)
extractAttribute name [] = Nothing
extractAttribute name ((attrName, contents):rest) =
let name' = map toLower name
attrName' = map toLower attrName in
if (attrName' == name')
then Just (decodeEntities contents)
else extractAttribute name rest
let name' = map toLower name
attrName' = map toLower attrName
in if attrName' == name'
then Just (decodeCharacterReferences contents)
else extractAttribute name rest
link = try $ do
(tag, attributes) <- htmlTag "a"
url <- case (extractAttribute "href" attributes) of
Just url -> do {return url}
Just url -> return url
Nothing -> fail "no href"
let title = fromMaybe "" (extractAttribute "title" attributes)
let title = fromMaybe "" $ extractAttribute "title" attributes
label <- inlinesTilEnd "a"
return $ Link (normalizeSpaces label) (url, title)
image = try $ do
(tag, attributes) <- htmlTag "img"
url <- case (extractAttribute "src" attributes) of
Just url -> do {return url}
Just url -> return url
Nothing -> fail "no src"
let title = fromMaybe "" (extractAttribute "title" attributes)
let title = fromMaybe "" $ extractAttribute "title" attributes
let alt = fromMaybe "" (extractAttribute "alt" attributes)
return $ Image [Str alt] (url, title)

View file

@ -34,7 +34,6 @@ module Text.Pandoc.Readers.LaTeX (
) where
import Text.ParserCombinators.Parsec
import Text.Pandoc.ParserCombinators
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Data.Maybe ( fromMaybe )
@ -47,9 +46,6 @@ readLaTeX :: ParserState -- ^ Parser state, including options for parser
-> Pandoc
readLaTeX = readWith parseLaTeX
-- for testing
testString = testStringWith parseLaTeX
-- characters with special meaning
specialChars = "\\$%&^&_~#{}\n \t|<>'\"-"
@ -58,12 +54,12 @@ specialChars = "\\$%&^&_~#{}\n \t|<>'\"-"
--
-- | Returns text between brackets and its matching pair.
bracketedText openB closeB = try (do
bracketedText openB closeB = do
result <- charsInBalanced' openB closeB
return ([openB] ++ result ++ [closeB]))
return $ [openB] ++ result ++ [closeB]
-- | Returns an option or argument of a LaTeX command.
optOrArg = choice [ (bracketedText '{' '}'), (bracketedText '[' ']') ]
optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']'
-- | True if the string begins with '{'.
isArg ('{':rest) = True
@ -73,62 +69,55 @@ isArg other = False
commandArgs = many optOrArg
-- | Parses LaTeX command, returns (name, star, list of options or arguments).
command = try (do
command = try $ do
char '\\'
name <- many1 alphaNum
star <- option "" (string "*") -- some commands have starred versions
args <- commandArgs
return (name, star, args))
return (name, star, args)
begin name = try (do
string "\\begin{"
string name
char '}'
option [] commandArgs
begin name = try $ do
string $ "\\begin{" ++ name ++ "}"
optional commandArgs
spaces
return name)
return name
end name = try (do
string "\\end{"
string name
char '}'
end name = try $ do
string $ "\\end{" ++ name ++ "}"
spaces
return name)
return name
-- | Returns a list of block elements containing the contents of an
-- environment.
environment name = try (do
begin name
spaces
contents <- manyTill block (end name)
return contents)
environment name = try $ begin name >> spaces >> manyTill block (end name)
anyEnvironment = try (do
anyEnvironment = try $ do
string "\\begin{"
name <- many alphaNum
star <- option "" (string "*") -- some environments have starred variants
char '}'
option [] commandArgs
optional commandArgs
spaces
contents <- manyTill block (end (name ++ star))
return (BlockQuote contents))
return $ BlockQuote contents
--
-- parsing documents
--
-- | Process LaTeX preamble, extracting metadata.
processLaTeXPreamble = try (do
manyTill (choice [bibliographic, comment, unknownCommand, nullBlock])
(try (string "\\begin{document}"))
spaces)
processLaTeXPreamble = try $ manyTill
(choice [bibliographic, comment, unknownCommand, nullBlock])
(try (string "\\begin{document}")) >>
spaces
-- | Parse LaTeX and return 'Pandoc'.
parseLaTeX = do
option () processLaTeXPreamble -- preamble might not be present (fragment)
optional processLaTeXPreamble -- preamble might not be present (fragment)
spaces
blocks <- parseBlocks
spaces
option "" (try (string "\\end{document}")) -- might not be present (in fragment)
optional $ try (string "\\end{document}") -- might not be present (fragment)
spaces
eof
state <- getState
@ -136,21 +125,27 @@ parseLaTeX = do
let title' = stateTitle state
let authors' = stateAuthors state
let date' = stateDate state
return (Pandoc (Meta title' authors' date') blocks')
return $ Pandoc (Meta title' authors' date') blocks'
--
-- parsing blocks
--
parseBlocks = do
spaces
result <- many block
return result
parseBlocks = spaces >> many block
block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock,
comment, bibliographic, para, specialEnvironment,
itemBlock, unknownEnvironment, unknownCommand ] <?>
"block"
block = choice [ hrule
, codeBlock
, header
, list
, blockQuote
, mathBlock
, comment
, bibliographic
, para
, specialEnvironment
, itemBlock
, unknownEnvironment
, unknownCommand ] <?> "block"
--
-- header blocks
@ -158,24 +153,21 @@ block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock,
header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
headerLevel n = try (do
headerLevel n = try $ do
let subs = concat $ replicate (n - 1) "sub"
string ("\\" ++ subs ++ "section")
option ' ' (char '*')
optional (char '*')
char '{'
title <- manyTill inline (char '}')
spaces
return (Header n (normalizeSpaces title)))
return $ Header n (normalizeSpaces title)
--
-- hrule block
--
hrule = try (do
oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
"\\newpage" ]
spaces
return HorizontalRule)
hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
"\\newpage" ] >> spaces >> return HorizontalRule
--
-- code blocks
@ -183,37 +175,28 @@ hrule = try (do
codeBlock = codeBlock1 <|> codeBlock2
codeBlock1 = try (do
codeBlock1 = try $ do
string "\\begin{verbatim}" -- don't use begin function because it
-- gobbles whitespace
option "" blanklines -- we want to gobble blank lines, but not
optional blanklines -- we want to gobble blank lines, but not
-- leading space
contents <- manyTill anyChar (try (string "\\end{verbatim}"))
spaces
return (CodeBlock (stripTrailingNewlines contents)))
return $ CodeBlock (stripTrailingNewlines contents)
codeBlock2 = try (do
string "\\begin{Verbatim}" -- used by fancyverb package
codeBlock2 = try $ do
string "\\begin{Verbatim}" -- used by fancyvrb package
option "" blanklines
contents <- manyTill anyChar (try (string "\\end{Verbatim}"))
spaces
return (CodeBlock (stripTrailingNewlines contents)))
return $ CodeBlock (stripTrailingNewlines contents)
--
-- block quotes
--
blockQuote = choice [ blockQuote1, blockQuote2 ] <?> "blockquote"
blockQuote1 = try (do
blocks <- environment "quote"
spaces
return (BlockQuote blocks))
blockQuote2 = try (do
blocks <- environment "quotation"
spaces
return (BlockQuote blocks))
blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>=
return . BlockQuote
--
-- math block
@ -223,12 +206,12 @@ mathBlock = mathBlockWith (begin "equation") (end "equation") <|>
mathBlockWith (begin "displaymath") (end "displaymath") <|>
mathBlockWith (string "\\[") (string "\\]") <?> "math block"
mathBlockWith start end = try (do
mathBlockWith start end = try $ do
start
spaces
result <- manyTill anyChar end
spaces
return (BlockQuote [Para [TeX ("$" ++ result ++ "$")]]))
return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]]
--
-- list blocks
@ -237,69 +220,66 @@ mathBlockWith start end = try (do
list = bulletList <|> orderedList <|> definitionList <?> "list"
listItem = try $ do
("item", _, args) <- command
spaces
state <- getState
let oldParserContext = stateParserContext state
updateState (\state -> state {stateParserContext = ListItemState})
blocks <- many block
updateState (\state -> state {stateParserContext = oldParserContext})
opt <- case args of
([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
parseFromString (many inline) $ tail $ init x
_ -> return []
return (opt, blocks)
("item", _, args) <- command
spaces
state <- getState
let oldParserContext = stateParserContext state
updateState (\state -> state {stateParserContext = ListItemState})
blocks <- many block
updateState (\state -> state {stateParserContext = oldParserContext})
opt <- case args of
([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
parseFromString (many inline) $ tail $ init x
_ -> return []
return (opt, blocks)
orderedList = try $ do
string "\\begin{enumerate}"
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
try $ do failIfStrict
char '['
res <- anyOrderedListMarker
char ']'
return res
spaces
option "" $ try $ do string "\\setlength{\\itemindent}"
char '{'
manyTill anyChar (char '}')
spaces
start <- option 1 $ try $ do failIfStrict
string "\\setcounter{enum"
many1 (char 'i')
string "}{"
num <- many1 digit
char '}'
spaces
return $ (read num) + 1
items <- many listItem
end "enumerate"
spaces
return $ OrderedList (start, style, delim) $ map snd items
string "\\begin{enumerate}"
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
try $ do failIfStrict
char '['
res <- anyOrderedListMarker
char ']'
return res
spaces
option "" $ try $ do string "\\setlength{\\itemindent}"
char '{'
manyTill anyChar (char '}')
spaces
start <- option 1 $ try $ do failIfStrict
string "\\setcounter{enum"
many1 (char 'i')
string "}{"
num <- many1 digit
char '}'
spaces
return $ (read num) + 1
items <- many listItem
end "enumerate"
spaces
return $ OrderedList (start, style, delim) $ map snd items
bulletList = try $ do
begin "itemize"
spaces
items <- many listItem
end "itemize"
spaces
return (BulletList $ map snd items)
begin "itemize"
spaces
items <- many listItem
end "itemize"
spaces
return (BulletList $ map snd items)
definitionList = try $ do
begin "description"
spaces
items <- many listItem
end "description"
spaces
return (DefinitionList items)
begin "description"
spaces
items <- many listItem
end "description"
spaces
return (DefinitionList items)
--
-- paragraph block
--
para = try (do
result <- many1 inline
spaces
return (Para (normalizeSpaces result)))
para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces
--
-- title authors date
@ -307,33 +287,30 @@ para = try (do
bibliographic = choice [ maketitle, title, authors, date ]
maketitle = try (do
string "\\maketitle"
spaces
return Null)
maketitle = try (string "\\maketitle") >> spaces >> return Null
title = try (do
title = try $ do
string "\\title{"
tit <- manyTill inline (char '}')
spaces
updateState (\state -> state { stateTitle = tit })
return Null)
return Null
authors = try (do
authors = try $ do
string "\\author{"
authors <- manyTill anyChar (char '}')
spaces
let authors' = map removeLeadingTrailingSpace $ lines $
substitute "\\\\" "\n" authors
updateState (\state -> state { stateAuthors = authors' })
return Null)
return Null
date = try (do
date = try $ do
string "\\date{"
date' <- manyTill anyChar (char '}')
spaces
updateState (\state -> state { stateDate = date' })
return Null)
return Null
--
-- item block
@ -341,14 +318,14 @@ date = try (do
--
-- this forces items to be parsed in different blocks
itemBlock = try (do
itemBlock = try $ do
("item", _, args) <- command
state <- getState
if (stateParserContext state == ListItemState)
then fail "item should be handled by list block"
else if null args
then return Null
else return (Plain [Str (stripFirstAndLast (head args))]))
else return $ Plain [Str (stripFirstAndLast (head args))]
--
-- raw LaTeX
@ -362,77 +339,93 @@ specialEnvironment = do -- these are always parsed as raw
-- | Parse any LaTeX environment and return a Para block containing
-- the whole literal environment as raw TeX.
rawLaTeXEnvironment :: GenParser Char st Block
rawLaTeXEnvironment = try (do
string "\\begin"
char '{'
rawLaTeXEnvironment = try $ do
string "\\begin{"
name <- many1 alphaNum
star <- option "" (string "*") -- for starred variants
let name' = name ++ star
char '}'
args <- option [] commandArgs
let argStr = concat args
contents <- manyTill (choice [(many1 (noneOf "\\")),
contents <- manyTill (choice [ (many1 (noneOf "\\")),
(do
(Para [TeX str]) <- rawLaTeXEnvironment
return str),
string "\\" ])
(end name')
spaces
return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ argStr ++
(concat contents) ++ "\\end{" ++ name' ++ "}")]))
return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++
concat contents ++ "\\end{" ++ name' ++ "}"]
unknownEnvironment = try (do
unknownEnvironment = try $ do
state <- getState
result <- if stateParseRaw state -- check whether we should include raw TeX
then rawLaTeXEnvironment -- if so, get whole raw environment
else anyEnvironment -- otherwise just the contents
return result)
return result
unknownCommand = try (do
notFollowedBy' $ choice $ map end
["itemize", "enumerate", "description", "document"]
unknownCommand = try $ do
notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
"document"]
(name, star, args) <- command
spaces
let argStr = concat args
state <- getState
if (name == "item") && ((stateParserContext state) == ListItemState)
if name == "item" && (stateParserContext state) == ListItemState
then fail "should not be parsed as raw"
else string ""
if stateParseRaw state
then return (Plain [TeX ("\\" ++ name ++ star ++ argStr)])
else return (Plain [Str (joinWithSep " " args)]))
then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)]
else return $ Plain [Str (joinWithSep " " args)]
-- latex comment
comment = try (do
char '%'
result <- manyTill anyChar newline
spaces
return Null)
comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null
--
-- inline
--
inline = choice [ strong, emph, strikeout, superscript, subscript,
ref, lab, code, linebreak, spacer,
math, ellipses, emDash, enDash, hyphen, quoted, apostrophe,
accentedChar, specialChar, specialInline, escapedChar,
unescapedChar, str, endline, whitespace ] <?> "inline"
specialInline = choice [ url, link, image, footnote, rawLaTeXInline ]
<?> "link, raw TeX, note, or image"
inline = choice [ strong
, emph
, strikeout
, superscript
, subscript
, ref
, lab
, code
, linebreak
, spacer
, math
, ellipses
, emDash
, enDash
, hyphen
, quoted
, apostrophe
, accentedChar
, specialChar
, url
, link
, image
, footnote
, rawLaTeXInline
, escapedChar
, unescapedChar
, str
, endline
, whitespace ] <?> "inline"
accentedChar = normalAccentedChar <|> specialAccentedChar
normalAccentedChar = try (do
normalAccentedChar = try $ do
char '\\'
accent <- oneOf "'`^\"~"
character <- choice [ between (char '{') (char '}') anyChar, anyChar ]
character <- (try $ char '{' >> alphaNum >>~ char '}') <|> alphaNum
let table = fromMaybe [] $ lookup character accentTable
let result = case lookup accent table of
Just num -> chr num
Nothing -> '?'
return (Str [result]))
return $ Str [result]
-- an association list of letters and association list of accents
-- and decimal character numbers.
@ -451,245 +444,179 @@ accentTable =
('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
oslash, pound, euro, copyright, sect ]
oslash, pound, euro, copyright, sect ]
ccedil = try (do
ccedil = try $ do
char '\\'
letter <- oneOfStrings ["cc", "cC"]
let num = if letter == "cc" then 231 else 199
return (Str [chr num]))
return $ Str [chr num]
aring = try (do
aring = try $ do
char '\\'
letter <- oneOfStrings ["aa", "AA"]
let num = if letter == "aa" then 229 else 197
return (Str [chr num]))
return $ Str [chr num]
iuml = try (do
string "\\\""
oneOfStrings ["\\i", "{\\i}"]
return (Str [chr 239]))
iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >>
return (Str [chr 239])
icirc = try (do
string "\\^"
oneOfStrings ["\\i", "{\\i}"]
return (Str [chr 238]))
icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >>
return (Str [chr 238])
szlig = try (do
string "\\ss"
return (Str [chr 223]))
szlig = try (string "\\ss") >> return (Str [chr 223])
oslash = try (do
oslash = try $ do
char '\\'
letter <- choice [char 'o', char 'O']
let num = if letter == 'o' then 248 else 216
return (Str [chr num]))
return $ Str [chr num]
aelig = try (do
aelig = try $ do
char '\\'
letter <- oneOfStrings ["ae", "AE"]
let num = if letter == "ae" then 230 else 198
return (Str [chr num]))
return $ Str [chr num]
pound = try (do
string "\\pounds"
return (Str [chr 163]))
pound = try (string "\\pounds") >> return (Str [chr 163])
euro = try (do
string "\\euro"
return (Str [chr 8364]))
euro = try (string "\\euro") >> return (Str [chr 8364])
copyright = try (do
string "\\copyright"
return (Str [chr 169]))
copyright = try (string "\\copyright") >> return (Str [chr 169])
sect = try (do
string "\\S"
return (Str [chr 167]))
sect = try (string "\\S") >> return (Str [chr 167])
escapedChar = do
result <- escaped (oneOf " $%&_#{}\n")
return (if result == Str "\n" then Str " " else result)
return $ if result == Str "\n" then Str " " else result
unescapedChar = do -- ignore standalone, nonescaped special characters
oneOf "$^&_#{}|<>"
return (Str "")
-- ignore standalone, nonescaped special characters
unescapedChar = oneOf "$^&_#{}|<>" >> return (Str "")
specialChar = choice [ backslash, tilde, caret, bar, lt, gt ]
backslash = try (do
string "\\textbackslash"
return (Str "\\"))
backslash = try (string "\\textbackslash") >> return (Str "\\")
tilde = try (do
string "\\ensuremath{\\sim}"
return (Str "~"))
tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
caret = try (do
string "\\^{}"
return (Str "^"))
caret = try (string "\\^{}") >> return (Str "^")
bar = try (do
string "\\textbar"
return (Str "\\"))
bar = try (string "\\textbar") >> return (Str "\\")
lt = try (do
string "\\textless"
return (Str "<"))
lt = try (string "\\textless") >> return (Str "<")
gt = try (do
string "\\textgreater"
return (Str ">"))
gt = try (string "\\textgreater") >> return (Str ">")
code = try (do
code = try $ do
string "\\verb"
marker <- anyChar
result <- manyTill anyChar (char marker)
let result' = removeLeadingTrailingSpace result
return (Code result'))
return $ Code $ removeLeadingTrailingSpace result
emph = try (do
oneOfStrings [ "\\emph{", "\\textit{" ]
result <- manyTill inline (char '}')
return (Emph result))
emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
manyTill inline (char '}') >>= return . Emph
strikeout = try $ do
string "\\sout{"
result <- manyTill inline (char '}')
return (Strikeout result)
strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
return . Strikeout
superscript = try $ do
string "\\textsuperscript{"
result <- manyTill inline (char '}')
return (Superscript result)
superscript = try $ string "\\textsuperscript{" >>
manyTill inline (char '}') >>= return . Superscript
-- note: \textsubscript isn't a standard latex command, but we use
-- a defined version in pandoc.
subscript = try $ do
string "\\textsubscript{"
result <- manyTill inline (char '}')
return (Subscript result)
subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
return . Subscript
apostrophe = do
char '\''
return Apostrophe
apostrophe = char '\'' >> return Apostrophe
quoted = do
doubleQuoted <|> singleQuoted
quoted = doubleQuoted <|> singleQuoted
singleQuoted = try (do
result <- enclosed singleQuoteStart singleQuoteEnd inline
return $ Quoted SingleQuote $ normalizeSpaces result)
singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
return . Quoted SingleQuote . normalizeSpaces
doubleQuoted = try (do
result <- enclosed doubleQuoteStart doubleQuoteEnd inline
return $ Quoted DoubleQuote $ normalizeSpaces result)
doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
return . Quoted DoubleQuote . normalizeSpaces
singleQuoteStart = char '`'
singleQuoteEnd = char '\'' >> notFollowedBy alphaNum
singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
doubleQuoteStart = string "``"
doubleQuoteEnd = string "''"
ellipses = try (do
string "\\ldots"
option "" (try (string "{}"))
return Ellipses)
ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >>
return Ellipses
enDash = try (do
string "--"
notFollowedBy (char '-')
return EnDash)
enDash = try (string "--") >> return EnDash
emDash = try (do
string "---"
return EmDash)
emDash = try (string "---") >> return EmDash
hyphen = do
char '-'
return (Str "-")
hyphen = char '-' >> return (Str "-")
lab = try (do
lab = try $ do
string "\\label{"
result <- manyTill anyChar (char '}')
return (Str ("(" ++ result ++ ")")))
return $ Str $ "(" ++ result ++ ")"
ref = try (do
string "\\ref{"
result <- manyTill anyChar (char '}')
return (Str (result)))
ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str
strong = try (do
string "\\textbf{"
result <- manyTill inline (char '}')
return (Strong result))
strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
return . Strong
whitespace = do
many1 (oneOf "~ \t")
return Space
whitespace = many1 (oneOf "~ \t") >> return Space
-- hard line break
linebreak = try (do
string "\\\\"
return LineBreak)
linebreak = try (string "\\\\") >> return LineBreak
spacer = try $ do
string "\\,"
return (Str "")
spacer = try (string "\\,") >> return (Str "")
str = do
result <- many1 (noneOf specialChars)
return (Str result)
str = many1 (noneOf specialChars) >>= return . Str
-- endline internal to paragraph
endline = try (do
newline
notFollowedBy blankline
return Space)
endline = try $ newline >> notFollowedBy blankline >> return Space
-- math
math = math1 <|> math2 <?> "math"
math1 = try (do
math1 = try $ do
char '$'
result <- many (noneOf "$")
char '$'
return (TeX ("$" ++ result ++ "$")))
return $ TeX ("$" ++ result ++ "$")
math2 = try (do
math2 = try $ do
string "\\("
result <- many (noneOf "$")
string "\\)"
return (TeX ("$" ++ result ++ "$")))
return $ TeX ("$" ++ result ++ "$")
--
-- links and images
--
url = try (do
url = try $ do
string "\\url"
url <- charsInBalanced '{' '}'
return (Link [Code url] (url, "")))
return $ Link [Code url] (url, "")
link = try (do
link = try $ do
string "\\href{"
url <- manyTill anyChar (char '}')
char '{'
label <- manyTill inline (char '}')
return (Link (normalizeSpaces label) (url, "")))
return $ Link (normalizeSpaces label) (url, "")
image = try (do
image = try $ do
("includegraphics", _, args) <- command
let args' = filter isArg args -- filter out options
let src = if null args' then
("", "")
else
(stripFirstAndLast (head args'), "")
return (Image [Str "image"] src))
return $ Image [Str "image"] src
footnote = try (do
footnote = try $ do
(name, _, (contents:[])) <- command
if ((name == "footnote") || (name == "thanks"))
then string ""
@ -700,16 +627,15 @@ footnote = try (do
setInput $ contents'
blocks <- parseBlocks
setInput rest
return (Note blocks))
return $ Note blocks
-- | Parse any LaTeX command and return it in a raw TeX inline element.
rawLaTeXInline :: GenParser Char ParserState Inline
rawLaTeXInline = try (do
rawLaTeXInline = try $ do
(name, star, args) <- command
let argStr = concat args
state <- getState
if ((name == "begin") || (name == "end") || (name == "item"))
then fail "not an inline command"
else string ""
return (TeX ("\\" ++ name ++ star ++ argStr)))
return $ TeX ("\\" ++ name ++ star ++ concat args)

File diff suppressed because it is too large Load diff

View file

@ -31,23 +31,14 @@ module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
import Text.Pandoc.ParserCombinators
import Text.Pandoc.Shared
import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag )
import Text.Regex ( matchRegex, mkRegex )
import Text.ParserCombinators.Parsec
import Data.Maybe ( fromMaybe )
import Data.List ( findIndex, delete )
import Data.Char ( toUpper )
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -> String -> Pandoc
readRST state str = (readWith parseRST) state (str ++ "\n\n")
-- | Parse a string and print result (for testing).
testString :: String -> IO ()
testString = testStringWith parseRST
--
-- Constants and data structure definitions
---
@ -62,15 +53,11 @@ specialChars = "\\`|*_<>$:[-"
-- parsing documents
--
isAnonKey (ref, src) = (ref == [Str "_"])
isAnonKey (ref, src) = ref == [Str "_"]
isHeader1 :: Block -> Bool
isHeader1 (Header 1 _) = True
isHeader1 _ = False
isHeader2 :: Block -> Bool
isHeader2 (Header 2 _) = True
isHeader2 _ = False
isHeader :: Int -> Block -> Bool
isHeader n (Header x _) = x == n
isHeader _ _ = False
-- | Promote all headers in a list of blocks. (Part of
-- title transformation for RST.)
@ -86,23 +73,23 @@ promoteHeaders num [] = []
titleTransform :: [Block] -- ^ list of blocks
-> ([Block], [Inline]) -- ^ modified list of blocks, title
titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle
if (any isHeader1 rest) || (any isHeader2 rest)
if (any (isHeader 1) rest) || (any (isHeader 2) rest)
then ((Header 1 head1):(Header 2 head2):rest, [])
else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
titleTransform ((Header 1 head1):rest) = -- title, no subtitle
if (any isHeader1 rest)
if (any (isHeader 1) rest)
then ((Header 1 head1):rest, [])
else ((promoteHeaders 1 rest), head1)
titleTransform blocks = (blocks, [])
parseRST = do
-- first pass: get anonymous keys
refs <- manyTill (referenceKey <|> (do l <- lineClump
return (LineClump l))) eof
-- first pass: get keys
refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof
let keys = map (\(KeyBlock label target) -> (label, target)) $
filter isKeyBlock refs
-- second pass, with keys stripped out
let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
setInput $ concat rawlines -- with keys stripped out
setInput $ concat rawlines
updateState (\state -> state { stateKeys = keys })
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
@ -113,7 +100,7 @@ parseRST = do
let authors = stateAuthors state
let date = stateDate state
let title' = if (null title) then (stateTitle state) else title
return (Pandoc (Meta title' authors date) blocks'')
return $ Pandoc (Meta title' authors date) blocks''
--
-- parsing blocks
@ -121,32 +108,39 @@ parseRST = do
parseBlocks = manyTill block eof
block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote,
imageBlock, unknownDirective, header,
hrule, list, fieldList, lineBlock, para, plain,
nullBlock ] <?> "block"
block = choice [ codeBlock
, rawHtmlBlock
, rawLaTeXBlock
, blockQuote
, imageBlock
, unknownDirective
, header
, hrule
, list
, fieldList
, lineBlock
, para
, plain
, nullBlock ] <?> "block"
--
-- field list
--
fieldListItem = try (do
fieldListItem = try $ do
char ':'
name <- many1 alphaNum
string ": "
skipSpaces
first <- manyTill anyChar newline
rest <- many (do
notFollowedBy (char ':')
notFollowedBy blankline
skipSpaces
manyTill anyChar newline )
return (name, (joinWithSep " " (first:rest))))
rest <- many (notFollowedBy ((char ':') <|> blankline) >>
skipSpaces >> manyTill anyChar newline)
return $ (name, (joinWithSep " " (first:rest)))
fieldList = try (do
fieldList = try $ do
items <- many1 fieldListItem
blanklines
let authors = case (lookup "Authors" items) of
let authors = case lookup "Authors" items of
Just auth -> [auth]
Nothing -> map snd (filter (\(x,y) -> x == "Author") items)
let date = case (lookup "Date" items) of
@ -162,82 +156,74 @@ fieldList = try (do
updateState (\st -> st { stateAuthors = authors,
stateDate = date,
stateTitle = title })
return (BlockQuote result))
return $ BlockQuote result
--
-- line block
--
lineBlockLine = try (do
lineBlockLine = try $ do
string "| "
white <- many (oneOf " \t")
line <- manyTill inline newline
let line' = (if null white then [] else [Str white]) ++ line ++ [LineBreak]
return line')
return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
lineBlock = try (do
lineBlock = try $ do
lines <- many1 lineBlockLine
blanklines
return $ Para (concat lines))
return $ Para (concat lines)
--
-- paragraph block
--
para = choice [ paraBeforeCodeBlock, paraNormal ] <?> "paragraph"
para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
codeBlockStart = try (do
string "::"
blankline
blankline)
codeBlockStart = try $ string "::" >> blankline >> blankline
-- paragraph that ends in a :: starting a code block
paraBeforeCodeBlock = try (do
result <- many1 (do {notFollowedBy' codeBlockStart; inline})
paraBeforeCodeBlock = try $ do
result <- many1 (notFollowedBy' codeBlockStart >> inline)
lookAhead (string "::")
return (Para (if (last result == Space)
then normalizeSpaces result
else (normalizeSpaces result) ++ [Str ":"])))
return $ Para $ if last result == Space
then normalizeSpaces result
else (normalizeSpaces result) ++ [Str ":"]
-- regular paragraph
paraNormal = try (do
paraNormal = try $ do
result <- many1 inline
newline
blanklines
let result' = normalizeSpaces result
return (Para result'))
return $ Para $ normalizeSpaces result
plain = do
result <- many1 inline
let result' = normalizeSpaces result
return (Plain result')
plain = many1 inline >>= return . Plain . normalizeSpaces
--
-- image block
--
imageBlock = try (do
imageBlock = try $ do
string ".. image:: "
src <- manyTill anyChar newline
return (Plain [Image [Str "image"] (src, "")]))
return $ Plain [Image [Str "image"] (src, "")]
--
-- header blocks
--
header = choice [ doubleHeader, singleHeader ] <?> "header"
header = doubleHeader <|> singleHeader <?> "header"
-- a header with lines on top and bottom
doubleHeader = try (do
doubleHeader = try $ do
c <- oneOf underlineChars
rest <- many (char c) -- the top line
let lenTop = length (c:rest)
skipSpaces
newline
txt <- many1 (do {notFollowedBy blankline; inline})
pos <- getPosition
txt <- many1 (notFollowedBy blankline >> inline)
pos <- getPosition
let len = (sourceColumn pos) - 1
if (len > lenTop) then fail "title longer than border" else (do {return ()})
if (len > lenTop) then fail "title longer than border" else return ()
blankline -- spaces and newline
count lenTop (char c) -- the bottom line
blanklines
@ -249,10 +235,10 @@ doubleHeader = try (do
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
return (Header level (normalizeSpaces txt)))
return $ Header level (normalizeSpaces txt)
-- a header with line on the bottom only
singleHeader = try (do
singleHeader = try $ do
notFollowedBy' whitespace
txt <- many1 (do {notFollowedBy blankline; inline})
pos <- getPosition
@ -268,19 +254,19 @@ singleHeader = try (do
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
return (Header level (normalizeSpaces txt)))
return $ Header level (normalizeSpaces txt)
--
-- hrule block
--
hruleWith chr = try (do
hruleWith chr = try $ do
count 4 (char chr)
skipMany (char chr)
skipSpaces
newline
blanklines
return HorizontalRule)
return HorizontalRule
hrule = choice (map hruleWith underlineChars) <?> "hrule"
@ -289,15 +275,16 @@ hrule = choice (map hruleWith underlineChars) <?> "hrule"
--
-- read a line indented by a given string
indentedLine indents = try (do
indentedLine indents = try $ do
string indents
result <- manyTill anyChar newline
return (result ++ "\n"))
return $ result ++ "\n"
-- two or more indented lines, possibly separated by blank lines
-- if variable = True, then any indent will work, but it must be consistent through the block
-- if variable = False, indent should be one tab or equivalent in spaces
indentedBlock variable = try (do
-- two or more indented lines, possibly separated by blank lines.
-- if variable = True, then any indent will work, but it must be
-- consistent through the block.
-- if variable = False, indent should be one tab or equivalent in spaces.
indentedBlock variable = try $ do
state <- getState
let tabStop = stateTabStop state
indents <- if variable
@ -305,51 +292,47 @@ indentedBlock variable = try (do
else oneOfStrings ["\t", (replicate tabStop ' ')]
firstline <- manyTill anyChar newline
rest <- many (choice [ indentedLine indents,
try (do
b <- blanklines
l <- indentedLine indents
return (b ++ l))])
option "" blanklines
return (firstline ++ "\n" ++ (concat rest)))
try (do b <- blanklines
l <- indentedLine indents
return (b ++ l))])
optional blanklines
return $ firstline ++ "\n" ++ concat rest
codeBlock = try (do
codeBlock = try $ do
codeBlockStart
result <- indentedBlock False
-- the False means we want one tab stop indent on each line
return (CodeBlock (stripTrailingNewlines result)))
return $ CodeBlock $ stripTrailingNewlines result
--
-- raw html
--
rawHtmlBlock = try (do
string ".. raw:: html"
blanklines
result <- indentedBlock True
return (RawHtml result))
rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
indentedBlock True >>= return . RawHtml
--
-- raw latex
--
rawLaTeXBlock = try (do
rawLaTeXBlock = try $ do
string ".. raw:: latex"
blanklines
result <- indentedBlock True
return (Para [(TeX result)]))
return $ Para [(TeX result)]
--
-- block quotes
--
blockQuote = try (do
blockQuote = try $ do
raw <- indentedBlock True
-- parse the extracted block, which may contain various block elements:
rest <- getInput
setInput $ raw ++ "\n\n"
contents <- parseBlocks
setInput rest
return (BlockQuote contents))
return $ BlockQuote contents
--
-- list blocks
@ -369,15 +352,14 @@ definitionListItem = try $ do
definitionList = try $ do
items <- many1 definitionListItem
return (DefinitionList items)
return $ DefinitionList items
-- parses bullet list start and returns its length (inc. following whitespace)
bulletListStart = try (do
bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
white <- many1 spaceChar
let len = length (marker:white)
return len)
return $ length (marker:white)
-- parses ordered list start and returns its length (inc following whitespace)
orderedListStart style delim = try $ do
@ -386,11 +368,11 @@ orderedListStart style delim = try $ do
return $ markerLen + length white
-- parse a line of a list item
listLine markerLength = try (do
listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
line <- manyTill anyChar newline
return (line ++ "\n"))
return $ line ++ "\n"
-- indent by specified number of spaces (or equiv. tabs)
indentWith num = do
@ -399,7 +381,7 @@ indentWith num = do
if (num < tabStop)
then count num (char ' ')
else choice [ try (count num (char ' ')),
(try (do {char '\t'; count (num - tabStop) (char ' ')})) ]
(try (char '\t' >> count (num - tabStop) (char ' '))) ]
-- parse raw text for one list item, excluding start marker and continuations
rawListItem start = try $ do
@ -411,19 +393,16 @@ rawListItem start = try $ do
-- continuation of a list item - indented and separated by blankline or
-- (in compact lists) endline.
-- Note: nested lists are parsed as continuations.
listContinuation markerLength = try (do
listContinuation markerLength = try $ do
blanks <- many1 blankline
result <- many1 (listLine markerLength)
return (blanks ++ (concat result)))
return $ blanks ++ concat result
listItem start = try (do
listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
blanks <- choice [ try (do
b <- many blankline
lookAhead start
return b),
many1 blankline ] -- whole list must end with blank
blanks <- choice [ try (many blankline >>~ lookAhead start),
many1 blankline ] -- whole list must end with blank.
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
-- see definition of "endline"
@ -436,52 +415,44 @@ listItem start = try (do
parsed <- parseBlocks
setInput remaining
updateState (\st -> st {stateParserContext = oldContext})
return parsed)
return parsed
orderedList = try $ do
(start, style, delim) <- lookAhead anyOrderedListMarker
items <- many1 (listItem (orderedListStart style delim))
let items' = compactify items
return (OrderedList (start, style, delim) items')
return $ OrderedList (start, style, delim) items'
bulletList = try (do
bulletList = try $ do
items <- many1 (listItem bulletListStart)
let items' = compactify items
return (BulletList items'))
return $ BulletList items'
--
-- unknown directive (e.g. comment)
--
unknownDirective = try (do
unknownDirective = try $ do
string ".. "
manyTill anyChar newline
many (do
string " "
char ':'
many1 (noneOf "\n:")
char ':'
many1 (noneOf "\n")
newline)
option "" blanklines
return Null)
many (string " :" >> many1 (noneOf "\n:") >> char ':' >>
many1 (noneOf "\n") >> newline)
optional blanklines
return Null
--
-- reference key
--
referenceKey = do
result <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
option "" blanklines
return result
referenceKey =
choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] >>~
optional blanklines
targetURI = try $ do
skipSpaces
option ' ' newline
contents <- many1 (try (do many spaceChar
newline
many1 spaceChar
noneOf " \t\n") <|> noneOf "\n")
optional newline
contents <- many1 (try (many spaceChar >> newline >>
many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
blanklines
return contents
@ -516,71 +487,73 @@ regularKey = try $ do
-- inline
--
inline = choice [ superscript, subscript,
escapedChar, link, image, hyphens, strong, emph, code,
str, tabchar, whitespace, endline, symbol ] <?> "inline"
inline = choice [ superscript
, subscript
, escapedChar
, link
, image
, hyphens
, strong
, emph
, code
, str
, tabchar
, whitespace
, endline
, symbol ] <?> "inline"
hyphens = try (do
hyphens = try $ do
result <- many1 (char '-')
option Space endline
-- don't want to treat endline after hyphen or dash as a space
return (Str result))
return $ Str result
escapedChar = escaped anyChar
symbol = do
result <- oneOf specialChars
return (Str [result])
return $ Str [result]
-- parses inline code, between codeStart and codeEnd
code = try (do
code = try $ do
string "``"
result <- manyTill anyChar (try (string "``"))
let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
return (Code result'))
return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
emph = do
result <- enclosed (char '*') (char '*') inline
return (Emph (normalizeSpaces result))
emph = enclosed (char '*') (char '*') inline >>=
return . Emph . normalizeSpaces
strong = do
result <- enclosed (string "**") (string "**") inline
return (Strong (normalizeSpaces result))
strong = enclosed (string "**") (string "**") inline >>=
return . Strong . normalizeSpaces
interpreted role = try $ do
option "" (try $ string "\\ ")
optional $ try $ string "\\ "
result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
nextChar <- lookAhead anyChar
try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
return [Str result]
superscript = interpreted "sup" >>= (return . Superscript)
subscript = interpreted "sub" >>= (return . Subscript)
whitespace = do
many1 spaceChar <?> "whitespace"
return Space
whitespace = many1 spaceChar >> return Space <?> "whitespace"
tabchar = do
tab
return (Str "\t")
tabchar = tab >> return (Str "\t")
str = do
notFollowedBy' oneWordReference
result <- many1 (noneOf (specialChars ++ "\t\n "))
return (Str result)
str = notFollowedBy' oneWordReference >>
many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
-- an endline character that can be treated as a space, not a structural break
endline = try (do
endline = try $ do
newline
notFollowedBy blankline
-- parse potential list-starts at beginning of line differently in a list:
st <- getState
if ((stateParserContext st) == ListItemState)
then do notFollowedBy' anyOrderedListMarker
notFollowedBy' bulletListStart
else option () pzero
return Space)
then notFollowedBy' anyOrderedListMarker >> notFollowedBy' bulletListStart
else return ()
return Space
--
-- links
@ -628,10 +601,10 @@ referenceLink = try $ do
uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
"mailto:", "news:", "telnet:" ]
uri = try (do
uri = try $ do
scheme <- uriScheme
identifier <- many1 (noneOf " \t\n")
return (scheme ++ identifier))
return $ scheme ++ identifier
autoURI = try $ do
src <- uri
@ -639,20 +612,20 @@ autoURI = try $ do
emailChar = alphaNum <|> oneOf "-+_."
emailAddress = try (do
emailAddress = try $ do
firstLetter <- alphaNum
restAddr <- many emailChar
let addr = firstLetter:restAddr
char '@'
dom <- domain
return (addr ++ '@':dom))
return $ addr ++ '@':dom
domainChar = alphaNum <|> char '-'
domain = try (do
domain = try $ do
first <- many1 domainChar
dom <- many1 (try (do{ char '.'; many1 domainChar }))
return (joinWithSep "." (first:dom)))
return $ joinWithSep "." (first:dom)
autoEmail = try $ do
src <- emailAddress
@ -669,5 +642,5 @@ image = try $ do
src <- case lookupKeySrc keyTable ref of
Nothing -> fail "no corresponding key"
Just target -> return target
return (Image (normalizeSpaces ref) src)
return $ Image (normalizeSpaces ref) src

File diff suppressed because it is too large Load diff

View file

@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.ConTeXt
Copyright : Copyright (C) 2006-7 John MacFarlane
Copyright : Copyright (C) 2007 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@ -27,9 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' format into ConTeXt.
-}
module Text.Pandoc.Writers.ConTeXt (
writeConTeXt
) where
module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Printf ( printf )
@ -40,8 +38,7 @@ type WriterState = Int -- number of next URL reference
-- | Convert Pandoc to ConTeXt.
writeConTeXt :: WriterOptions -> Pandoc -> String
writeConTeXt options document =
evalState (pandocToConTeXt options document) 1
writeConTeXt options document = evalState (pandocToConTeXt options document) 1
pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String
pandocToConTeXt options (Pandoc meta blocks) = do
@ -111,8 +108,8 @@ stringToConTeXt = concatMap escapeCharForConTeXt
-- | Convert Pandoc block element to ConTeXt.
blockToConTeXt :: Block -> State WriterState String
blockToConTeXt Null = return ""
blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= (return . (++ "\n"))
blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= (return . (++ "\n\n"))
blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= return . (++ "\n")
blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= return . (++ "\n\n")
blockToConTeXt (BlockQuote lst) = do
contents <- blockListToConTeXt lst
return $ "\\startblockquote\n" ++ contents ++ "\\stopblockquote\n\n"
@ -137,12 +134,12 @@ blockToConTeXt (OrderedList attribs lst) = case attribs of
return $ "\\startitemize" ++ markerWidth' ++ "\n" ++ concat contents ++
"\\stopitemize\n"
blockToConTeXt (DefinitionList lst) =
mapM defListItemToConTeXt lst >>= (return . (++ "\n") . concat)
mapM defListItemToConTeXt lst >>= return . (++ "\n") . concat
blockToConTeXt HorizontalRule = return "\\thinrule\n\n"
blockToConTeXt (Header level lst) = do
contents <- inlineListToConTeXt lst
return $ if (level > 0) && (level <= 3)
then "\\" ++ (concat (replicate (level - 1) "sub")) ++
return $ if level > 0 && level <= 3
then "\\" ++ concat (replicate (level - 1) "sub") ++
"section{" ++ contents ++ "}\n\n"
else contents ++ "\n\n"
blockToConTeXt (Table caption aligns widths heads rows) = do
@ -186,12 +183,12 @@ defListItemToConTeXt (term, def) = do
-- | Convert list of block elements to ConTeXt.
blockListToConTeXt :: [Block] -> State WriterState String
blockListToConTeXt lst = mapM blockToConTeXt lst >>= (return . concat)
blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . concat
-- | Convert list of inline elements to ConTeXt.
inlineListToConTeXt :: [Inline] -- ^ Inlines to convert
-> State WriterState String
inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= (return . concat)
inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . concat
isQuoted :: Inline -> Bool
isQuoted (Quoted _ _) = True

View file

@ -30,16 +30,35 @@ Conversion of 'Pandoc' documents to Docbook XML.
module Text.Pandoc.Writers.Docbook ( writeDocbook) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Entities ( escapeStringForXML )
import Data.Char ( toLower, ord )
import Data.List ( isPrefixOf, partition, drop )
import Data.List ( isPrefixOf, drop )
import Text.PrettyPrint.HughesPJ hiding ( Str )
--
-- code to format XML
--
-- | Escape one character as needed for XML.
escapeCharForXML :: Char -> String
escapeCharForXML x = case x of
'&' -> "&amp;"
'<' -> "&lt;"
'>' -> "&gt;"
'"' -> "&quot;"
'\160' -> "&nbsp;"
c -> [c]
-- | True if the character needs to be escaped.
needsEscaping :: Char -> Bool
needsEscaping c = c `elem` "&<>\"\160"
-- | Escape string as needed for XML. Entity references are not preserved.
escapeStringForXML :: String -> String
escapeStringForXML "" = ""
escapeStringForXML str =
case break needsEscaping str of
(okay, "") -> okay
(okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs
-- | Return a text object with a string of formatted XML attributes.
attributeList :: [(String, String)] -> Doc
attributeList = text . concatMap
@ -52,10 +71,10 @@ inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
inTags isIndented tagType attribs contents =
let openTag = char '<' <> text tagType <> attributeList attribs <>
char '>'
closeTag = text "</" <> text tagType <> char '>' in
if isIndented
then openTag $$ nest 2 contents $$ closeTag
else openTag <> contents <> closeTag
closeTag = text "</" <> text tagType <> char '>'
in if isIndented
then openTag $$ nest 2 contents $$ closeTag
else openTag <> contents <> closeTag
-- | Return a self-closing tag of tagType with specified attributes
selfClosingTag :: String -> [(String, String)] -> Doc
@ -79,42 +98,42 @@ authorToDocbook :: [Char] -> Doc
authorToDocbook name = inTagsIndented "author" $
if ',' `elem` name
then -- last name first
let (lastname, rest) = break (==',') name
firstname = removeLeadingSpace rest in
inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
inTagsSimple "surname" (text $ escapeStringForXML lastname)
let (lastname, rest) = break (==',') name
firstname = removeLeadingSpace rest in
inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
inTagsSimple "surname" (text $ escapeStringForXML lastname)
else -- last name last
let namewords = words name
lengthname = length namewords
(firstname, lastname) = case lengthname of
0 -> ("","")
1 -> ("", name)
n -> (joinWithSep " " (take (n-1) namewords), last namewords) in
inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
inTagsSimple "surname" (text $ escapeStringForXML lastname)
let namewords = words name
lengthname = length namewords
(firstname, lastname) = case lengthname of
0 -> ("","")
1 -> ("", name)
n -> (joinWithSep " " (take (n-1) namewords), last namewords)
in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
inTagsSimple "surname" (text $ escapeStringForXML lastname)
-- | Convert Pandoc document to string in Docbook format.
writeDocbook :: WriterOptions -> Pandoc -> String
writeDocbook opts (Pandoc (Meta title authors date) blocks) =
let head = if (writerStandalone opts)
then text (writerHeader opts)
else empty
meta = if (writerStandalone opts)
then inTagsIndented "articleinfo" $
(inTagsSimple "title" (wrap opts title)) $$
(vcat (map authorToDocbook authors)) $$
(inTagsSimple "date" (text $ escapeStringForXML date))
else empty
let head = if writerStandalone opts
then text (writerHeader opts)
else empty
meta = if writerStandalone opts
then inTagsIndented "articleinfo" $
(inTagsSimple "title" (wrap opts title)) $$
(vcat (map authorToDocbook authors)) $$
(inTagsSimple "date" (text $ escapeStringForXML date))
else empty
elements = hierarchicalize blocks
before = writerIncludeBefore opts
after = writerIncludeAfter opts
body = (if null before then empty else text before) $$
vcat (map (elementToDocbook opts) elements) $$
(if null after then empty else text after)
body' = if writerStandalone opts
then inTagsIndented "article" (meta $$ body)
else body in
render $ head $$ body' $$ text ""
before = writerIncludeBefore opts
after = writerIncludeAfter opts
body = (if null before then empty else text before) $$
vcat (map (elementToDocbook opts) elements) $$
(if null after then empty else text after)
body' = if writerStandalone opts
then inTagsIndented "article" (meta $$ body)
else body
in render $ head $$ body' $$ text ""
-- | Convert an Element to Docbook.
elementToDocbook :: WriterOptions -> Element -> Doc
@ -123,10 +142,10 @@ elementToDocbook opts (Sec title elements) =
-- Docbook doesn't allow sections with no content, so insert some if needed
let elements' = if null elements
then [Blk (Para [])]
else elements in
inTagsIndented "section" $
inTagsSimple "title" (wrap opts title) $$
vcat (map (elementToDocbook opts) elements')
else elements
in inTagsIndented "section" $
inTagsSimple "title" (wrap opts title) $$
vcat (map (elementToDocbook opts) elements')
-- | Convert a list of Pandoc blocks to Docbook.
blocksToDocbook :: WriterOptions -> [Block] -> Doc
@ -145,30 +164,27 @@ deflistItemsToDocbook opts items =
-- | Convert a term and a list of blocks into a Docbook varlistentry.
deflistItemToDocbook :: WriterOptions -> [Inline] -> [Block] -> Doc
deflistItemToDocbook opts term def =
let def' = map plainToPara def in
inTagsIndented "varlistentry" $
inTagsIndented "term" (inlinesToDocbook opts term) $$
inTagsIndented "listitem" (blocksToDocbook opts def')
let def' = map plainToPara def
in inTagsIndented "varlistentry" $
inTagsIndented "term" (inlinesToDocbook opts term) $$
inTagsIndented "listitem" (blocksToDocbook opts def')
-- | Convert a list of lists of blocks to a list of Docbook list items.
listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc
listItemsToDocbook opts items =
vcat $ map (listItemToDocbook opts) items
listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items
-- | Convert a list of blocks into a Docbook list item.
listItemToDocbook :: WriterOptions -> [Block] -> Doc
listItemToDocbook opts item =
let item' = map plainToPara item in
inTagsIndented "listitem" (blocksToDocbook opts item')
inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item
-- | Convert a Pandoc block element to Docbook.
blockToDocbook :: WriterOptions -> Block -> Doc
blockToDocbook opts Null = empty
blockToDocbook opts (Plain lst) = wrap opts lst
blockToDocbook opts (Para lst) =
inTagsIndented "para" (wrap opts lst)
blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst
blockToDocbook opts (BlockQuote blocks) =
inTagsIndented "blockquote" (blocksToDocbook opts blocks)
inTagsIndented "blockquote" $ blocksToDocbook opts blocks
blockToDocbook opts (CodeBlock str) =
text "<screen>\n" <> text (escapeStringForXML str) <> text "\n</screen>"
blockToDocbook opts (BulletList lst) =
@ -198,16 +214,16 @@ blockToDocbook opts (Table caption aligns widths headers rows) =
then empty
else inTagsIndented "caption"
(inlinesToDocbook opts caption)
tableType = if isEmpty captionDoc then "informaltable" else "table" in
inTagsIndented tableType $ captionDoc $$
(colHeadsToDocbook opts alignStrings widths headers) $$
(vcat $ map (tableRowToDocbook opts alignStrings) rows)
tableType = if isEmpty captionDoc then "informaltable" else "table"
in inTagsIndented tableType $ captionDoc $$
(colHeadsToDocbook opts alignStrings widths headers) $$
(vcat $ map (tableRowToDocbook opts alignStrings) rows)
colHeadsToDocbook opts alignStrings widths headers =
let heads = zipWith3
(\align width item -> tableItemToDocbook opts "th" align width item)
alignStrings widths headers in
inTagsIndented "tr" $ vcat heads
let heads = zipWith3 (\align width item ->
tableItemToDocbook opts "th" align width item)
alignStrings widths headers
in inTagsIndented "tr" $ vcat heads
alignmentToString alignment = case alignment of
AlignLeft -> "left"
@ -215,20 +231,16 @@ alignmentToString alignment = case alignment of
AlignCenter -> "center"
AlignDefault -> "left"
tableRowToDocbook opts aligns cols =
inTagsIndented "tr" $ vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols
tableRowToDocbook opts aligns cols = inTagsIndented "tr" $
vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols
tableItemToDocbook opts tag align width item =
let attrib = [("align", align)] ++
if (width /= 0)
then [("style", "{width: " ++
show (truncate (100*width)) ++ "%;}")]
else [] in
inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
-- | Put string in CDATA section
cdata :: String -> Doc
cdata str = text $ "<![CDATA[" ++ str ++ "]]>"
if width /= 0
then [("style", "{width: " ++
show (truncate (100*width)) ++ "%;}")]
else []
in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
-- | Take list of inline elements and return wrapped doc.
wrap :: WriterOptions -> [Inline] -> Doc
@ -236,25 +248,24 @@ wrap opts lst = fsep $ map (inlinesToDocbook opts) (splitBy Space lst)
-- | Convert a list of inline elements to Docbook.
inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst)
inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst
-- | Convert an inline element to Docbook.
inlineToDocbook :: WriterOptions -> Inline -> Doc
inlineToDocbook opts (Str str) = text $ escapeStringForXML str
inlineToDocbook opts (Emph lst) =
inTagsSimple "emphasis" (inlinesToDocbook opts lst)
inTagsSimple "emphasis" $ inlinesToDocbook opts lst
inlineToDocbook opts (Strong lst) =
inTags False "emphasis" [("role", "strong")]
(inlinesToDocbook opts lst)
inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst
inlineToDocbook opts (Strikeout lst) =
inTags False "emphasis" [("role", "strikethrough")]
(inlinesToDocbook opts lst)
inTags False "emphasis" [("role", "strikethrough")] $
inlinesToDocbook opts lst
inlineToDocbook opts (Superscript lst) =
inTagsSimple "superscript" (inlinesToDocbook opts lst)
inTagsSimple "superscript" $ inlinesToDocbook opts lst
inlineToDocbook opts (Subscript lst) =
inTagsSimple "subscript" (inlinesToDocbook opts lst)
inTagsSimple "subscript" $ inlinesToDocbook opts lst
inlineToDocbook opts (Quoted _ lst) =
inTagsSimple "quote" (inlinesToDocbook opts lst)
inTagsSimple "quote" $ inlinesToDocbook opts lst
inlineToDocbook opts Apostrophe = char '\''
inlineToDocbook opts Ellipses = text "&#8230;"
inlineToDocbook opts EmDash = text "&#8212;"
@ -263,26 +274,24 @@ inlineToDocbook opts (Code str) =
inTagsSimple "literal" $ text (escapeStringForXML str)
inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str)
inlineToDocbook opts (HtmlInline str) = empty
inlineToDocbook opts LineBreak =
text $ "<literallayout></literallayout>"
inlineToDocbook opts LineBreak = text $ "<literallayout></literallayout>"
inlineToDocbook opts Space = char ' '
inlineToDocbook opts (Link txt (src, tit)) =
if isPrefixOf "mailto:" src
then let src' = drop 7 src
emailLink = inTagsSimple "email" $ text (escapeStringForXML $ src')
in if txt == [Code src']
then emailLink
else inlinesToDocbook opts txt <+> char '(' <> emailLink <>
char ')'
else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
then let src' = drop 7 src
emailLink = inTagsSimple "email" $ text $
escapeStringForXML $ src'
in if txt == [Code src']
then emailLink
else inlinesToDocbook opts txt <+> char '(' <> emailLink <>
char ')'
else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
inlineToDocbook opts (Image alt (src, tit)) =
let titleDoc = if null tit
then empty
else inTagsIndented "objectinfo" $
inTagsIndented "title"
(text $ escapeStringForXML tit) in
inTagsIndented "inlinemediaobject" $
inTagsIndented "imageobject" $
titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
inTagsIndented "title" (text $ escapeStringForXML tit)
in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
inlineToDocbook opts (Note contents) =
inTagsIndented "footnote" $ blocksToDocbook opts contents

View file

@ -27,15 +27,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to HTML.
-}
module Text.Pandoc.Writers.HTML ( writeHtml, writeHtmlString ) where
module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
import Text.Pandoc.Definition
import Text.Pandoc.ASCIIMathML
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.Pandoc.Shared
import Text.Pandoc.Entities (decodeEntities)
import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
import Data.List ( isPrefixOf, partition, intersperse )
import Data.List ( isPrefixOf, intersperse )
import qualified Data.Set as S
import Control.Monad.State
import Text.XHtml.Transitional
@ -55,8 +55,8 @@ defaultWriterState = WriterState {stNotes= [], stIds = [],
writeHtmlString :: WriterOptions -> Pandoc -> String
writeHtmlString opts =
if writerStandalone opts
then renderHtml . (writeHtml opts)
else renderHtmlFragment . (writeHtml opts)
then renderHtml . writeHtml opts
else renderHtmlFragment . writeHtml opts
-- | Convert Pandoc document to Html structure.
writeHtml :: WriterOptions -> Pandoc -> Html
@ -74,49 +74,51 @@ writeHtml opts (Pandoc (Meta tit authors date) blocks) =
map (\a -> meta ! [name "author", content a]) authors) +++
(if null date
then noHtml
else meta ! [name "date", content date])
titleHeader = if (writerStandalone opts) && (not (null tit)) &&
(not (writerS5 opts))
else meta ! [name "date", content date])
titleHeader = if writerStandalone opts && not (null tit) &&
not (writerS5 opts)
then h1 ! [theclass "title"] $ topTitle
else noHtml
headerBlocks = filter isHeaderBlock blocks
ids = uniqueIdentifiers $ map (\(Header _ lst) -> lst) headerBlocks
toc = if writerTableOfContents opts
then tableOfContents opts headerBlocks ids
else noHtml
ids = uniqueIdentifiers $
map (\(Header _ lst) -> lst) headerBlocks
toc = if writerTableOfContents opts
then tableOfContents opts headerBlocks ids
else noHtml
(blocks', newstate) =
runState (blockListToHtml opts blocks)
(defaultWriterState {stIds = ids})
cssLines = stCSS newstate
css = if S.null cssLines
then noHtml
else style ! [thetype "text/css"] $ primHtml $
'\n':(unlines $ S.toList cssLines)
math = if stMath newstate
then case writerASCIIMathMLURL opts of
Just path -> script ! [src path,
thetype "text/javascript"] $ noHtml
Nothing -> primHtml asciiMathMLScript
else noHtml
head = header $ metadata +++ math +++ css +++
primHtml (writerHeader opts)
notes = reverse (stNotes newstate)
before = primHtml $ writerIncludeBefore opts
after = primHtml $ writerIncludeAfter opts
thebody = before +++ titleHeader +++ toc +++ blocks' +++
footnoteSection opts notes +++ after
runState (blockListToHtml opts blocks)
(defaultWriterState {stIds = ids})
cssLines = stCSS newstate
css = if S.null cssLines
then noHtml
else style ! [thetype "text/css"] $ primHtml $
'\n':(unlines $ S.toList cssLines)
math = if stMath newstate
then case writerASCIIMathMLURL opts of
Just path -> script ! [src path,
thetype "text/javascript"] $
noHtml
Nothing -> primHtml asciiMathMLScript
else noHtml
head = header $ metadata +++ math +++ css +++
primHtml (writerHeader opts)
notes = reverse (stNotes newstate)
before = primHtml $ writerIncludeBefore opts
after = primHtml $ writerIncludeAfter opts
thebody = before +++ titleHeader +++ toc +++ blocks' +++
footnoteSection opts notes +++ after
in if writerStandalone opts
then head +++ (body thebody)
then head +++ body thebody
else thebody
-- | Construct table of contents from list of header blocks and identifiers.
-- Assumes there are as many identifiers as header blocks.
tableOfContents :: WriterOptions -> [Block] -> [String] -> Html
tableOfContents opts headers ids =
let opts' = opts { writerIgnoreNotes = True }
let opts' = opts { writerIgnoreNotes = True }
contentsTree = hierarchicalize headers
contents = evalState (mapM (elementToListItem opts') contentsTree)
(defaultWriterState {stIds = ids})
contents = evalState (mapM (elementToListItem opts') contentsTree)
(defaultWriterState {stIds = ids})
in thediv ! [identifier "toc"] $ unordList contents
-- | Converts an Element to a list item for a table of contents,
@ -135,7 +137,8 @@ elementToListItem opts (Sec headerText subsecs) = do
let subList = if null subHeads
then noHtml
else unordList subHeads
return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++ subList
return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++
subList
-- | Convert list of Note blocks to a footnote <div>.
-- Assumes notes are sorted.
@ -143,62 +146,61 @@ footnoteSection :: WriterOptions -> [Html] -> Html
footnoteSection opts notes =
if null notes
then noHtml
else thediv ! [theclass "footnotes"] $
hr +++ (olist << notes)
else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes)
-- | Obfuscate a "mailto:" link using Javascript.
obfuscateLink :: WriterOptions -> String -> String -> Html
obfuscateLink opts text src =
let emailRegex = mkRegex "^mailto:([^@]*)@(.*)$"
src' = map toLower src in
case (matchRegex emailRegex src') of
(Just [name, domain]) ->
let domain' = substitute "." " dot " domain
at' = obfuscateChar '@'
(linkText, altText) =
if text == drop 7 src' -- autolink
then ("'<code>'+e+'</code>'", name ++ " at " ++ domain')
else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++
domain' ++ ")") in
if writerStrictMarkdown opts
then -- need to use primHtml or &'s are escaped to &amp; in URL
primHtml $ "<a href=\"" ++ (obfuscateString src')
++ "\">" ++ (obfuscateString text) ++ "</a>"
else (script ! [thetype "text/javascript"] $
primHtml ("\n<!--\nh='" ++
obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
obfuscateString name ++ "';e=n+a+h;\n" ++
"document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
noscript (primHtml $ obfuscateString altText)
_ -> anchor ! [href src] $ primHtml text -- malformed email
src' = map toLower src
in case (matchRegex emailRegex src') of
(Just [name, domain]) ->
let domain' = substitute "." " dot " domain
at' = obfuscateChar '@'
(linkText, altText) =
if text == drop 7 src' -- autolink
then ("'<code>'+e+'</code>'", name ++ " at " ++ domain')
else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++
domain' ++ ")")
in if writerStrictMarkdown opts
then -- need to use primHtml or &'s are escaped to &amp; in URL
primHtml $ "<a href=\"" ++ (obfuscateString src')
++ "\">" ++ (obfuscateString text) ++ "</a>"
else (script ! [thetype "text/javascript"] $
primHtml ("\n<!--\nh='" ++
obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
obfuscateString name ++ "';e=n+a+h;\n" ++
"document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
noscript (primHtml $ obfuscateString altText)
_ -> anchor ! [href src] $ primHtml text -- malformed email
-- | Obfuscate character as entity.
obfuscateChar :: Char -> String
obfuscateChar char =
let num = ord char in
let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in
"&#" ++ numstr ++ ";"
let num = ord char
numstr = if even num then show num else "x" ++ showHex num ""
in "&#" ++ numstr ++ ";"
-- | Obfuscate string using entities.
obfuscateString :: String -> String
obfuscateString = (concatMap obfuscateChar) . decodeEntities
obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
-- | True if character is a punctuation character (unicode).
isPunctuation :: Char -> Bool
isPunctuation c =
let c' = ord c in
if (c `elem` "!\"'()*,-./:;<>?[\\]`{|}~") || (c' >= 0x2000 && c' <= 0x206F) ||
(c' >= 0xE000 && c' <= 0xE0FF)
then True
else False
let c' = ord c
in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
c' >= 0xE000 && c' <= 0xE0FF
then True
else False
-- | Add CSS for document header.
addToCSS :: String -> State WriterState ()
addToCSS item = do
st <- get
let current = stCSS st
put $ st {stCSS = (S.insert item current)}
put $ st {stCSS = S.insert item current}
-- | Convert Pandoc inline list to plain text identifier.
inlineListToIdentifier :: [Inline] -> String
@ -206,27 +208,26 @@ inlineListToIdentifier [] = ""
inlineListToIdentifier (x:xs) =
xAsText ++ inlineListToIdentifier xs
where xAsText = case x of
Str s -> filter
(\c -> (c == '-') || not (isPunctuation c)) $
concat $ intersperse "-" $ words $ map toLower s
Emph lst -> inlineListToIdentifier lst
Strikeout lst -> inlineListToIdentifier lst
Superscript lst -> inlineListToIdentifier lst
Subscript lst -> inlineListToIdentifier lst
Strong lst -> inlineListToIdentifier lst
Quoted _ lst -> inlineListToIdentifier lst
Code s -> s
Space -> "-"
EmDash -> "-"
EnDash -> "-"
Apostrophe -> ""
Ellipses -> ""
LineBreak -> "-"
TeX _ -> ""
HtmlInline _ -> ""
Link lst _ -> inlineListToIdentifier lst
Image lst _ -> inlineListToIdentifier lst
Note _ -> ""
Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
concat $ intersperse "-" $ words $ map toLower s
Emph lst -> inlineListToIdentifier lst
Strikeout lst -> inlineListToIdentifier lst
Superscript lst -> inlineListToIdentifier lst
Subscript lst -> inlineListToIdentifier lst
Strong lst -> inlineListToIdentifier lst
Quoted _ lst -> inlineListToIdentifier lst
Code s -> s
Space -> "-"
EmDash -> "-"
EnDash -> "-"
Apostrophe -> ""
Ellipses -> ""
LineBreak -> "-"
TeX _ -> ""
HtmlInline _ -> ""
Link lst _ -> inlineListToIdentifier lst
Image lst _ -> inlineListToIdentifier lst
Note _ -> ""
-- | Return unique identifiers for list of inline lists.
uniqueIdentifiers :: [[Inline]] -> [String]
@ -236,102 +237,99 @@ uniqueIdentifiers ls =
matches = length $ filter (== new) nonuniqueIds
new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
in (new:nonuniqueIds, new':uniqueIds)
in reverse $ snd (foldl addIdentifier ([],[]) $ ls)
in reverse $ snd $ foldl addIdentifier ([],[]) ls
-- | Convert Pandoc block element to HTML.
blockToHtml :: WriterOptions -> Block -> State WriterState Html
blockToHtml opts block =
case block of
(Null) -> return $ noHtml
(Plain lst) -> inlineListToHtml opts lst
(Para lst) -> inlineListToHtml opts lst >>= (return . paragraph)
(RawHtml str) -> return $ primHtml str
(HorizontalRule) -> return $ hr
(CodeBlock str) -> return $ pre $ thecode << (str ++ "\n")
blockToHtml opts Null = return $ noHtml
blockToHtml opts (Plain lst) = inlineListToHtml opts lst
blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph)
blockToHtml opts (RawHtml str) = return $ primHtml str
blockToHtml opts (HorizontalRule) = return $ hr
blockToHtml opts (CodeBlock str) = return $ pre $ thecode << (str ++ "\n")
-- the final \n for consistency with Markdown.pl
(BlockQuote blocks) -> -- in S5, treat list in blockquote specially
-- if default is incremental, make it nonincremental;
-- otherwise incremental
if writerS5 opts
then let inc = not (writerIncremental opts) in
case blocks of
[BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
(BulletList lst)
[OrderedList attribs lst] ->
blockToHtml (opts {writerIncremental = inc})
(OrderedList attribs lst)
otherwise -> blockListToHtml opts blocks >>=
(return . blockquote)
else blockListToHtml opts blocks >>= (return . blockquote)
(Header level lst) -> do contents <- inlineListToHtml opts lst
st <- get
let ids = stIds st
let (id, rest) = if null ids
then ("", [])
else (head ids, tail ids)
put $ st {stIds = rest}
let attribs = [identifier id]
let headerHtml = case level of
1 -> h1 contents ! attribs
2 -> h2 contents ! attribs
3 -> h3 contents ! attribs
4 -> h4 contents ! attribs
5 -> h5 contents ! attribs
6 -> h6 contents ! attribs
_ -> paragraph contents ! attribs
let headerHtml' = if writerTableOfContents opts
then anchor ! [href ("#TOC-" ++ id)] $
headerHtml
else headerHtml
return headerHtml'
(BulletList lst) -> do contents <- mapM (blockListToHtml opts) lst
let attribs = if writerIncremental opts
then [theclass "incremental"]
else []
return $ unordList ! attribs $ contents
(OrderedList (startnum, numstyle, _) lst) -> do
contents <- mapM (blockListToHtml opts) lst
let numstyle' = camelCaseToHyphenated $ show numstyle
let attribs = (if writerIncremental opts
then [theclass "incremental"]
else []) ++
(if startnum /= 1
then [start startnum]
else []) ++
(if numstyle /= DefaultStyle
then [theclass numstyle']
else [])
if numstyle /= DefaultStyle
then addToCSS $ "ol." ++ numstyle' ++
" { list-style-type: " ++
numstyle' ++ "; }"
else return ()
return $ ordList ! attribs $ contents
(DefinitionList lst) -> do contents <- mapM (\(term, def) ->
do term' <- inlineListToHtml opts term
def' <- blockListToHtml opts def
return $ (term', def'))
lst
let attribs = if writerIncremental opts
then [theclass "incremental"]
else []
return $ defList ! attribs $ contents
(Table capt aligns widths headers rows) ->
do let alignStrings = map alignmentToString aligns
captionDoc <- if null capt
then return noHtml
else inlineListToHtml opts capt >>=
(return . caption)
colHeads <- colHeadsToHtml opts alignStrings
widths headers
rows' <- mapM (tableRowToHtml opts alignStrings) rows
return $ table $ captionDoc +++ colHeads +++ rows'
blockToHtml opts (BlockQuote blocks) =
-- in S5, treat list in blockquote specially
-- if default is incremental, make it nonincremental;
-- otherwise incremental
if writerS5 opts
then let inc = not (writerIncremental opts) in
case blocks of
[BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
(BulletList lst)
[OrderedList attribs lst] ->
blockToHtml (opts {writerIncremental = inc})
(OrderedList attribs lst)
otherwise -> blockListToHtml opts blocks >>=
(return . blockquote)
else blockListToHtml opts blocks >>= (return . blockquote)
blockToHtml opts (Header level lst) = do
contents <- inlineListToHtml opts lst
st <- get
let ids = stIds st
let (id, rest) = if null ids
then ("", [])
else (head ids, tail ids)
put $ st {stIds = rest}
let attribs = [identifier id]
let headerHtml = case level of
1 -> h1 contents ! attribs
2 -> h2 contents ! attribs
3 -> h3 contents ! attribs
4 -> h4 contents ! attribs
5 -> h5 contents ! attribs
6 -> h6 contents ! attribs
_ -> paragraph contents ! attribs
return $ if writerTableOfContents opts
then anchor ! [href ("#TOC-" ++ id)] $ headerHtml
else headerHtml
blockToHtml opts (BulletList lst) = do
contents <- mapM (blockListToHtml opts) lst
let attribs = if writerIncremental opts
then [theclass "incremental"]
else []
return $ unordList ! attribs $ contents
blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
contents <- mapM (blockListToHtml opts) lst
let numstyle' = camelCaseToHyphenated $ show numstyle
let attribs = (if writerIncremental opts
then [theclass "incremental"]
else []) ++
(if startnum /= 1
then [start startnum]
else []) ++
(if numstyle /= DefaultStyle
then [theclass numstyle']
else [])
if numstyle /= DefaultStyle
then addToCSS $ "ol." ++ numstyle' ++
" { list-style-type: " ++
numstyle' ++ "; }"
else return ()
return $ ordList ! attribs $ contents
blockToHtml opts (DefinitionList lst) = do
contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term
def' <- blockListToHtml opts def
return $ (term', def')) lst
let attribs = if writerIncremental opts
then [theclass "incremental"]
else []
return $ defList ! attribs $ contents
blockToHtml opts (Table capt aligns widths headers rows) = do
let alignStrings = map alignmentToString aligns
captionDoc <- if null capt
then return noHtml
else inlineListToHtml opts capt >>= return . caption
colHeads <- colHeadsToHtml opts alignStrings
widths headers
rows' <- mapM (tableRowToHtml opts alignStrings) rows
return $ table $ captionDoc +++ colHeads +++ rows'
colHeadsToHtml opts alignStrings widths headers =
do heads <- sequence $ zipWith3
(\align width item -> tableItemToHtml opts th align width item)
alignStrings widths headers
return $ tr $ toHtmlFromList heads
colHeadsToHtml opts alignStrings widths headers = do
heads <- sequence $ zipWith3
(\align width item -> tableItemToHtml opts th align width item)
alignStrings widths headers
return $ tr $ toHtmlFromList heads
alignmentToString alignment = case alignment of
AlignLeft -> "left"
@ -339,24 +337,27 @@ alignmentToString alignment = case alignment of
AlignCenter -> "center"
AlignDefault -> "left"
tableRowToHtml opts aligns cols =
do contents <- sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols
return $ tr $ toHtmlFromList contents
tableRowToHtml opts aligns cols =
(sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols) >>=
return . tr . toHtmlFromList
tableItemToHtml opts tag align' width item =
do contents <- blockListToHtml opts item
let attrib = [align align'] ++
if (width /= 0)
then [thestyle ("{width: " ++ show (truncate (100*width)) ++ "%;}")]
else []
return $ tag ! attrib $ contents
tableItemToHtml opts tag align' width item = do
contents <- blockListToHtml opts item
let attrib = [align align'] ++
if width /= 0
then [thestyle ("{width: " ++ show (truncate (100*width)) ++
"%;}")]
else []
return $ tag ! attrib $ contents
blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html
blockListToHtml opts lst = mapM (blockToHtml opts) lst >>= (return . toHtmlFromList)
blockListToHtml opts lst =
mapM (blockToHtml opts) lst >>= return . toHtmlFromList
-- | Convert list of Pandoc inline elements to HTML.
inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html
inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= (return . toHtmlFromList)
inlineListToHtml opts lst =
mapM (inlineToHtml opts) lst >>= return . toHtmlFromList
-- | Convert Pandoc inline element to HTML.
inlineToHtml :: WriterOptions -> Inline -> State WriterState Html
@ -369,52 +370,58 @@ inlineToHtml opts inline =
(EnDash) -> return $ primHtmlChar "ndash"
(Ellipses) -> return $ primHtmlChar "hellip"
(Apostrophe) -> return $ primHtmlChar "rsquo"
(Emph lst) -> inlineListToHtml opts lst >>= (return . emphasize)
(Strong lst) -> inlineListToHtml opts lst >>= (return . strong)
(Emph lst) -> inlineListToHtml opts lst >>= return . emphasize
(Strong lst) -> inlineListToHtml opts lst >>= return . strong
(Code str) -> return $ thecode << str
(Strikeout lst) -> addToCSS ".strikeout { text-decoration: line-through; }" >>
(Strikeout lst) -> addToCSS
".strikeout { text-decoration: line-through; }" >>
inlineListToHtml opts lst >>=
(return . (thespan ! [theclass "strikeout"]))
(Superscript lst) -> inlineListToHtml opts lst >>= (return . sup)
(Subscript lst) -> inlineListToHtml opts lst >>= (return . sub)
return . (thespan ! [theclass "strikeout"])
(Superscript lst) -> inlineListToHtml opts lst >>= return . sup
(Subscript lst) -> inlineListToHtml opts lst >>= return . sub
(Quoted quoteType lst) ->
let (leftQuote, rightQuote) = case quoteType of
SingleQuote -> (primHtmlChar "lsquo",
primHtmlChar "rsquo")
DoubleQuote -> (primHtmlChar "ldquo",
primHtmlChar "rdquo") in
do contents <- inlineListToHtml opts lst
return $ leftQuote +++ contents +++ rightQuote
(TeX str) -> do if writerUseASCIIMathML opts
then modify (\st -> st {stMath = True})
else return ()
return $ stringToHtml str
primHtmlChar "rdquo")
in do contents <- inlineListToHtml opts lst
return $ leftQuote +++ contents +++ rightQuote
(TeX str) -> (if writerUseASCIIMathML opts
then modify (\st -> st {stMath = True})
else return ()) >> return (stringToHtml str)
(HtmlInline str) -> return $ primHtml str
(Link [Code str] (src,tit)) | "mailto:" `isPrefixOf` src ->
do return $ obfuscateLink opts str src
(Link txt (src,tit)) | "mailto:" `isPrefixOf` src ->
do linkText <- inlineListToHtml opts txt
return $ obfuscateLink opts (show linkText) src
(Link txt (src,tit)) ->
do linkText <- inlineListToHtml opts txt
return $ anchor ! ([href src] ++
if null tit then [] else [title tit]) $ linkText
(Image txt (source,tit)) ->
do alternate <- inlineListToHtml opts txt
let alternate' = renderHtmlFragment alternate
let attributes = [src source, title tit] ++
if null txt then [] else [alt alternate']
return $ image ! attributes
-- note: null title included, as in Markdown.pl
(Note contents) -> do st <- get
let notes = stNotes st
let number = (length notes) + 1
let ref = show number
htmlContents <- blockListToNote opts ref contents
put $ st {stNotes = (htmlContents:notes)} -- push contents onto front of notes
return $ anchor ! [href ("#fn" ++ ref),
theclass "footnoteRef",
identifier ("fnref" ++ ref)] << sup << ref
return $ obfuscateLink opts str src
(Link txt (src,tit)) | "mailto:" `isPrefixOf` src -> do
linkText <- inlineListToHtml opts txt
return $ obfuscateLink opts (show linkText) src
(Link txt (src,tit)) -> do
linkText <- inlineListToHtml opts txt
return $ anchor ! ([href src] ++
if null tit then [] else [title tit]) $
linkText
(Image txt (source,tit)) -> do
alternate <- inlineListToHtml opts txt
let alternate' = renderHtmlFragment alternate
let attributes = [src source, title tit] ++
if null txt
then []
else [alt alternate']
return $ image ! attributes
-- note: null title included, as in Markdown.pl
(Note contents) -> do
st <- get
let notes = stNotes st
let number = (length notes) + 1
let ref = show number
htmlContents <- blockListToNote opts ref contents
-- push contents onto front of notes
put $ st {stNotes = (htmlContents:notes)}
return $ anchor ! [href ("#fn" ++ ref),
theclass "footnoteRef",
identifier ("fnref" ++ ref)] <<
sup << ref
blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html
blockListToNote opts ref blocks =
@ -434,6 +441,6 @@ blockListToNote opts ref blocks =
[Plain (lst ++ backlink)]
_ -> otherBlocks ++ [lastBlock,
Plain backlink]
in do contents <- blockListToHtml opts blocks'
return $ li ! [identifier ("fn" ++ ref)] $ contents
in do contents <- blockListToHtml opts blocks'
return $ li ! [identifier ("fn" ++ ref)] $ contents

View file

@ -27,16 +27,14 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' format into LaTeX.
-}
module Text.Pandoc.Writers.LaTeX (
writeLaTeX
) where
module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Printf ( printf )
import Data.List ( (\\), isInfixOf )
import Data.Char ( toLower )
import qualified Data.Set as S
import Control.Monad.State
import Data.Char ( toLower )
data WriterState =
WriterState { stIncludes :: S.Set String -- strings to include in header
@ -77,16 +75,16 @@ latexHeader :: WriterOptions -- ^ Options, including LaTeX header
-> Meta -- ^ Meta with bibliographic information
-> State WriterState String
latexHeader options (Meta title authors date) = do
titletext <- if null title
then return ""
else do title' <- inlineListToLaTeX title
return $ "\\title{" ++ title' ++ "}\n"
extras <- get >>= (return . unlines . S.toList. stIncludes)
titletext <- if null title
then return ""
else do title' <- inlineListToLaTeX title
return $ "\\title{" ++ title' ++ "}\n"
extras <- get >>= (return . unlines . S.toList. stIncludes)
let verbatim = if "\\usepackage{fancyvrb}" `isInfixOf` extras
then "\\VerbatimFootnotes % allows verbatim text in footnotes\n"
else ""
let authorstext = "\\author{" ++ (joinWithSep "\\\\"
(map stringToLaTeX authors)) ++ "}\n"
let authorstext = "\\author{" ++
joinWithSep "\\\\" (map stringToLaTeX authors) ++ "}\n"
let datetext = if date == ""
then ""
else "\\date{" ++ stringToLaTeX date ++ "}\n"
@ -124,8 +122,8 @@ deVerb (other:rest) = other:(deVerb rest)
blockToLaTeX :: Block -- ^ Block to convert
-> State WriterState String
blockToLaTeX Null = return ""
blockToLaTeX (Plain lst) = (inlineListToLaTeX lst) >>= (return . (++ "\n"))
blockToLaTeX (Para lst) = (inlineListToLaTeX lst) >>= (return . (++ "\n\n"))
blockToLaTeX (Plain lst) = inlineListToLaTeX lst >>= return . (++ "\n")
blockToLaTeX (Para lst) = inlineListToLaTeX lst >>= return . (++ "\n\n")
blockToLaTeX (BlockQuote lst) = do
contents <- blockListToLaTeX lst
return $ "\\begin{quote}\n" ++ contents ++ "\\end{quote}\n"
@ -184,22 +182,22 @@ blockToLaTeX (Table caption aligns widths heads rows) = do
colWidths aligns
let tableBody = "\\begin{tabular}{" ++ colDescriptors ++ "}\n" ++
headers ++ "\\hline\n" ++ concat rows' ++ "\\end{tabular}\n"
let centered str = "\\begin{center}\n" ++ str ++ "\\end{center}\n"
let centered str = "\\begin{center}\n" ++ str ++ "\\end{center}\n"
addToHeader "\\usepackage{array}\n\
\% This is needed because raggedright in table elements redefines \\\\:\n\
\\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\
\\\let\\PBS=\\PreserveBackslash"
\% This is needed because raggedright in table elements redefines \\\\:\n\
\\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\
\\\let\\PBS=\\PreserveBackslash"
return $ if null captionText
then centered tableBody ++ "\n"
else "\\begin{table}[h]\n" ++ centered tableBody ++ "\\caption{" ++
captionText ++ "}\n" ++ "\\end{table}\n\n"
else "\\begin{table}[h]\n" ++ centered tableBody ++
"\\caption{" ++ captionText ++ "}\n" ++ "\\end{table}\n\n"
blockListToLaTeX lst = mapM blockToLaTeX lst >>= (return . concat)
blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . concat
tableRowToLaTeX cols =
mapM blockListToLaTeX cols >>= (return . (++ "\\\\\n") . (joinWithSep " & "))
mapM blockListToLaTeX cols >>= return . (++ "\\\\\n") . (joinWithSep " & ")
listItemToLaTeX lst = blockListToLaTeX lst >>= (return . ("\\item "++))
listItemToLaTeX lst = blockListToLaTeX lst >>= return . ("\\item "++)
defListItemToLaTeX (term, def) = do
term' <- inlineListToLaTeX $ deVerb term
@ -209,8 +207,7 @@ defListItemToLaTeX (term, def) = do
-- | Convert list of inline elements to LaTeX.
inlineListToLaTeX :: [Inline] -- ^ Inlines to convert
-> State WriterState String
inlineListToLaTeX lst =
mapM inlineToLaTeX lst >>= (return . concat)
inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . concat
isQuoted :: Inline -> Bool
isQuoted (Quoted _ _) = True

View file

@ -28,14 +28,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to groff man page format.
-}
module Text.Pandoc.Writers.Man (
writeMan
) where
module Text.Pandoc.Writers.Man ( writeMan) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Printf ( printf )
import Data.Char ( toUpper )
import Data.List ( group, isPrefixOf, drop, find, nub, intersperse )
import Data.List ( isPrefixOf, drop, nub, intersperse )
import Text.PrettyPrint.HughesPJ hiding ( Str )
import Control.Monad.State
@ -45,16 +42,15 @@ type WriterState = (Notes, Preprocessors)
-- | Convert Pandoc to Man.
writeMan :: WriterOptions -> Pandoc -> String
writeMan opts document =
render $ evalState (pandocToMan opts document) ([],[])
writeMan opts document = render $ evalState (pandocToMan opts document) ([],[])
-- | Return groff man representation of document.
pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc
pandocToMan opts (Pandoc meta blocks) = do
let before = writerIncludeBefore opts
let after = writerIncludeAfter opts
before' = if null before then empty else text before
after' = if null after then empty else text after
let before' = if null before then empty else text before
let after' = if null after then empty else text after
(head, foot) <- metaToMan opts meta
body <- blockListToMan opts blocks
(notes, preprocessors) <- get
@ -84,8 +80,8 @@ metaToMan options (Meta title authors date) = do
1 -> text ".SH AUTHOR" $$ (text $ joinWithSep ", " authors)
2 -> text ".SH AUTHORS" $$ (text $ joinWithSep ", " authors)
return $ if writerStandalone options
then (head, foot)
else (empty, empty)
then (head, foot)
else (empty, empty)
-- | Return man representation of notes.
notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc
@ -93,7 +89,7 @@ notesToMan opts notes =
if null notes
then return empty
else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>=
(return . (text ".SH NOTES" $$) . vcat)
return . (text ".SH NOTES" $$) . vcat
-- | Return man representation of a note.
noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc
@ -110,8 +106,7 @@ wrappedMan opts sect = do
-- | Association list of characters to escape.
manEscapes :: [(Char, String)]
manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++
backslashEscapes "\".@\\"
manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "\".@\\"
-- | Escape special characters for Man.
escapeString :: String -> String
@ -140,8 +135,7 @@ blockToMan opts (Header level inlines) = do
return $ text heading <> contents
blockToMan opts (CodeBlock str) = return $
text ".PP" $$ text "\\f[CR]" $$
text ((unlines . map (" " ++) . lines) (escapeCode str)) <>
text "\\f[]"
text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]"
blockToMan opts (BlockQuote blocks) = do
contents <- blockListToMan opts blocks
return $ text ".RS" $$ contents $$ text ".RE"

View file

@ -29,9 +29,7 @@ Conversion of 'Pandoc' documents to markdown-formatted plain text.
Markdown: <http://daringfireball.net/projects/markdown/>
-}
module Text.Pandoc.Writers.Markdown (
writeMarkdown
) where
module Text.Pandoc.Writers.Markdown ( writeMarkdown) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Blocks
@ -53,10 +51,10 @@ pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc
pandocToMarkdown opts (Pandoc meta blocks) = do
let before = writerIncludeBefore opts
let after = writerIncludeAfter opts
before' = if null before then empty else text before
after' = if null after then empty else text after
let before' = if null before then empty else text before
let after' = if null after then empty else text after
metaBlock <- metaToMarkdown opts meta
let head = if (writerStandalone opts)
let head = if writerStandalone opts
then metaBlock $+$ text (writerHeader opts)
else empty
let headerBlocks = filter isHeaderBlock blocks
@ -73,8 +71,7 @@ pandocToMarkdown opts (Pandoc meta blocks) = do
-- | Return markdown representation of reference key table.
keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc
keyTableToMarkdown opts refs =
mapM (keyToMarkdown opts) refs >>= (return . vcat)
keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
-- | Return markdown representation of a reference key.
keyToMarkdown :: WriterOptions
@ -90,7 +87,7 @@ keyToMarkdown opts (label, (src, tit)) = do
notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc
notesToMarkdown opts notes =
mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>=
(return . vcat)
return . vcat
-- | Return markdown representation of a note.
noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc
@ -143,8 +140,7 @@ tableOfContents opts headers =
-- | Converts an Element to a list item for a table of contents,
elementToListItem :: Element -> [Block]
elementToListItem (Blk _) = []
elementToListItem (Sec headerText subsecs) =
[Plain headerText] ++
elementToListItem (Sec headerText subsecs) = [Plain headerText] ++
if null subsecs
then []
else [BulletList $ map elementToListItem subsecs]
@ -184,9 +180,8 @@ blockToMarkdown opts (Table caption aligns widths headers rows) = do
let makeRow = hsepBlocks . (zipWith alignHeader aligns) .
(zipWith docToBlock widthsInChars)
let head = makeRow headers'
rows' <- mapM (\row -> do
cols <- mapM (blockListToMarkdown opts) row
return $ makeRow cols) rows
rows' <- mapM (\row -> do cols <- mapM (blockListToMarkdown opts) row
return $ makeRow cols) rows
let tableWidth = sum widthsInChars
let maxRowHeight = maximum $ map heightOfBlock (head:rows')
let isMultilineTable = maxRowHeight > 1
@ -208,8 +203,7 @@ blockToMarkdown opts (OrderedList attribs items) = do
let markers = orderedListMarkers attribs
let markers' = map (\m -> if length m < 3
then m ++ replicate (3 - length m) ' '
else m)
markers
else m) markers
contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
zip markers' items
return $ (vcat contents) <> text "\n"
@ -241,8 +235,8 @@ definitionListItemToMarkdown opts (label, items) = do
let tabStop = writerTabStop opts
let leader = char ':'
contents <- mapM (\item -> blockToMarkdown opts item >>=
(\txt -> return (leader $$ nest tabStop txt)))
items >>= (return . vcat)
(\txt -> return (leader $$ nest tabStop txt)))
items >>= return . vcat
return $ labelText $+$ contents
-- | Convert list of Pandoc block elements to markdown.
@ -250,29 +244,30 @@ blockListToMarkdown :: WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
-> State WriterState Doc
blockListToMarkdown opts blocks =
mapM (blockToMarkdown opts) blocks >>= (return . vcat)
mapM (blockToMarkdown opts) blocks >>= return . vcat
-- | Get reference for target; if none exists, create unique one and return.
-- Prefer label if possible; otherwise, generate a unique key.
getReference :: [Inline] -> Target -> State WriterState [Inline]
getReference label (src, tit) = do
(_,refs) <- get
case find ((== (src, tit)) . snd) refs of
Just (ref, _) -> return ref
Nothing -> do
let label' = case find ((== label) . fst) refs of
Just _ -> -- label is used; generate numerical label
case find (\n -> not (any (== [Str (show n)])
(map fst refs))) [1..10000] of
Just x -> [Str (show x)]
Nothing -> error "no unique label"
Nothing -> label
modify (\(notes, refs) -> (notes, (label', (src,tit)):refs))
return label'
(_,refs) <- get
case find ((== (src, tit)) . snd) refs of
Just (ref, _) -> return ref
Nothing -> do
let label' = case find ((== label) . fst) refs of
Just _ -> -- label is used; generate numerical label
case find (\n -> not (any (== [Str (show n)])
(map fst refs))) [1..10000] of
Just x -> [Str (show x)]
Nothing -> error "no unique label"
Nothing -> label
modify (\(notes, refs) -> (notes, (label', (src,tit)):refs))
return label'
-- | Convert list of Pandoc inline elements to markdown.
inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
inlineListToMarkdown opts lst = mapM (inlineToMarkdown opts) lst >>= (return . hcat)
inlineListToMarkdown opts lst =
mapM (inlineToMarkdown opts) lst >>= return . hcat
-- | Convert Pandoc inline element to markdown.
inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
@ -327,13 +322,13 @@ inlineToMarkdown opts (Link txt (src, tit)) = do
return $ if useAuto
then char '<' <> text srcSuffix <> char '>'
else if useRefLinks
then let first = char '[' <> linktext <> char ']'
second = if txt == ref
then text "[]"
else char '[' <> reftext <> char ']'
in first <> second
else char '[' <> linktext <> char ']' <>
char '(' <> text src <> linktitle <> char ')'
then let first = char '[' <> linktext <> char ']'
second = if txt == ref
then text "[]"
else char '[' <> reftext <> char ']'
in first <> second
else char '[' <> linktext <> char ']' <>
char '(' <> text src <> linktitle <> char ')'
inlineToMarkdown opts (Image alternate (source, tit)) = do
let txt = if (null alternate) || (alternate == [Str ""]) ||
(alternate == [Str source]) -- to prevent autolinks

View file

@ -29,13 +29,11 @@ Conversion of 'Pandoc' documents to reStructuredText.
reStructuredText: <http://docutils.sourceforge.net/rst.html>
-}
module Text.Pandoc.Writers.RST (
writeRST
) where
module Text.Pandoc.Writers.RST ( writeRST) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Blocks
import Data.List ( group, isPrefixOf, drop, find, intersperse )
import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse )
import Text.PrettyPrint.HughesPJ hiding ( Str )
import Control.Monad.State
@ -70,8 +68,7 @@ pandocToRST opts (Pandoc meta blocks) = do
-- | Return RST representation of reference key table.
keyTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
keyTableToRST opts refs =
mapM (keyToRST opts) refs >>= (return . vcat)
keyTableToRST opts refs = mapM (keyToRST opts) refs >>= return . vcat
-- | Return RST representation of a reference key.
keyToRST :: WriterOptions
@ -85,7 +82,7 @@ keyToRST opts (label, (src, tit)) = do
notesToRST :: WriterOptions -> [[Block]] -> State WriterState Doc
notesToRST opts notes =
mapM (\(num, note) -> noteToRST opts num note) (zip [1..] notes) >>=
(return . vcat)
return . vcat
-- | Return RST representation of a note.
noteToRST :: WriterOptions -> Int -> [Block] -> State WriterState Doc
@ -96,8 +93,7 @@ noteToRST opts num note = do
-- | Return RST representation of picture reference table.
pictTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
pictTableToRST opts refs =
mapM (pictToRST opts) refs >>= (return . vcat)
pictTableToRST opts refs = mapM (pictToRST opts) refs >>= return . vcat
-- | Return RST representation of a picture substitution reference.
pictToRST :: WriterOptions
@ -112,7 +108,7 @@ pictToRST opts (label, (src, _)) = do
wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc
wrappedRST opts inlines =
mapM (wrappedRSTSection opts) (splitBy LineBreak inlines) >>=
(return . vcat)
return . vcat
wrappedRSTSection :: WriterOptions -> [Inline] -> State WriterState Doc
wrappedRSTSection opts sect = do
@ -160,21 +156,19 @@ blockToRST :: WriterOptions -- ^ Options
blockToRST opts Null = return empty
blockToRST opts (Plain inlines) = wrappedRST opts inlines
blockToRST opts (Para [TeX str]) =
let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in
return $ hang (text "\n.. raw:: latex\n") 3
(vcat $ map text (lines str'))
let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
return $ hang (text "\n.. raw:: latex\n") 3 $ vcat $ map text (lines str')
blockToRST opts (Para inlines) = do
contents <- wrappedRST opts inlines
return $ contents <> text "\n"
blockToRST opts (RawHtml str) =
let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in
return $ hang (text "\n.. raw:: html\n") 3
(vcat $ map text (lines str'))
let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
return $ hang (text "\n.. raw:: html\n") 3 $ vcat $ map text (lines str')
blockToRST opts HorizontalRule = return $ text "--------------\n"
blockToRST opts (Header level inlines) = do
contents <- inlineListToRST opts inlines
let headerLength = length $ render contents
let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1)
let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
let border = text $ replicate headerLength headerChar
return $ contents $+$ border <> text "\n"
blockToRST opts (CodeBlock str) = return $ (text "::\n") $+$
@ -200,11 +194,10 @@ blockToRST opts (Table caption aligns widths headers rows) = do
beg = TextBlock 2 height (replicate height "| ")
end = TextBlock 2 height (replicate height " |")
middle = hcatBlocks $ intersperse sep blocks
let makeRow = hpipeBlocks . (zipWith docToBlock widthsInChars)
let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars
let head = makeRow headers'
rows' <- mapM (\row -> do
cols <- mapM (blockListToRST opts) row
return $ makeRow cols) rows
rows' <- mapM (\row -> do cols <- mapM (blockListToRST opts) row
return $ makeRow cols) rows
let tableWidth = sum widthsInChars
let maxRowHeight = maximum $ map heightOfBlock (head:rows')
let border ch = char '+' <> char ch <>
@ -225,8 +218,7 @@ blockToRST opts (OrderedList (start, style, delim) items) = do
(start, style, delim)
let maxMarkerLength = maximum $ map length markers
let markers' = map (\m -> let s = maxMarkerLength - length m
in m ++ replicate s ' ')
markers
in m ++ replicate s ' ') markers
contents <- mapM (\(item, num) -> orderedListItemToRST opts item num) $
zip markers' items
-- ensure that sublists have preceding blank line
@ -262,11 +254,11 @@ blockListToRST :: WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
-> State WriterState Doc
blockListToRST opts blocks =
mapM (blockToRST opts) blocks >>= (return . vcat)
mapM (blockToRST opts) blocks >>= return . vcat
-- | Convert list of Pandoc inline elements to RST.
inlineListToRST :: WriterOptions -> [Inline] -> State WriterState Doc
inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= (return . hcat)
inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= return . hcat
-- | Convert Pandoc inline element to RST.
inlineToRST :: WriterOptions -> Inline -> State WriterState Doc
@ -319,8 +311,8 @@ inlineToRST opts (Link txt (src, tit)) = do
inlineToRST opts (Image alternate (source, tit)) = do
(notes, refs, pics) <- get
let labelsUsed = map fst pics
let txt = if (null alternate) || (alternate == [Str ""]) ||
(alternate `elem` labelsUsed)
let txt = if null alternate || alternate == [Str ""] ||
alternate `elem` labelsUsed
then [Str $ "image" ++ show (length refs)]
else alternate
let pics' = if (txt, (source, tit)) `elem` pics

View file

@ -27,12 +27,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to RTF (rich text format).
-}
module Text.Pandoc.Writers.RTF ( writeRTF) where
module Text.Pandoc.Writers.RTF ( writeRTF ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Regex ( matchRegexAll, mkRegex )
import Data.List ( isSuffixOf )
import Data.Char ( ord, chr )
import Data.Char ( ord )
-- | Convert Pandoc to a string in rich text format.
writeRTF :: WriterOptions -> Pandoc -> String
@ -44,22 +44,22 @@ writeRTF options (Pandoc meta blocks) =
then tableOfContents $ filter isHeaderBlock blocks
else ""
foot = if writerStandalone options then "\n}\n" else ""
body = (writerIncludeBefore options) ++
body = writerIncludeBefore options ++
concatMap (blockToRTF 0 AlignDefault) blocks ++
(writerIncludeAfter options) in
head ++ toc ++ body ++ foot
writerIncludeAfter options
in head ++ toc ++ body ++ foot
-- | Construct table of contents from list of header blocks.
tableOfContents :: [Block] -> String
tableOfContents headers =
let contentsTree = hierarchicalize headers
in concatMap (blockToRTF 0 AlignDefault) $ [Header 1 [Str "Contents"],
BulletList (map elementToListItem contentsTree)]
in concatMap (blockToRTF 0 AlignDefault) $
[Header 1 [Str "Contents"],
BulletList (map elementToListItem contentsTree)]
elementToListItem :: Element -> [Block]
elementToListItem (Blk _) = []
elementToListItem (Sec sectext subsecs) =
[Plain sectext] ++
elementToListItem (Sec sectext subsecs) = [Plain sectext] ++
if null subsecs
then []
else [BulletList (map elementToListItem subsecs)]
@ -67,10 +67,10 @@ elementToListItem (Sec sectext subsecs) =
-- | Convert unicode characters (> 127) into rich text format representation.
handleUnicode :: String -> String
handleUnicode [] = []
handleUnicode (c:cs) = if (ord c) > 127
then '\\':'u':(show (ord c)) ++ "?" ++
(handleUnicode cs)
else c:(handleUnicode cs)
handleUnicode (c:cs) =
if ord c > 127
then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs
else c:(handleUnicode cs)
-- | Escape special characters.
escapeSpecial :: String -> String
@ -127,7 +127,7 @@ listIncrement = 360
-- | Returns appropriate bullet list marker for indent level.
bulletMarker :: Int -> String
bulletMarker indent = case (indent `mod` 720) of
bulletMarker indent = case indent `mod` 720 of
0 -> "\\bullet "
otherwise -> "\\endash "
@ -135,7 +135,7 @@ bulletMarker indent = case (indent `mod` 720) of
orderedMarkers :: Int -> ListAttributes -> [String]
orderedMarkers indent (start, style, delim) =
if style == DefaultStyle && delim == DefaultDelim
then case (indent `mod` 720) of
then case indent `mod` 720 of
0 -> orderedListMarkers (start, Decimal, Period)
otherwise -> orderedListMarkers (start, LowerAlpha, Period)
else orderedListMarkers (start, style, delim)
@ -145,21 +145,21 @@ rtfHeader :: String -- ^ header text
-> Meta -- ^ bibliographic information
-> String
rtfHeader headerText (Meta title authors date) =
let titletext = if null title
let titletext = if null title
then ""
else rtfPar 0 0 AlignCenter $
"\\b \\fs36 " ++ inlineListToRTF title
authorstext = if null authors
then ""
else rtfPar 0 0 AlignCenter ("\\b \\fs36 " ++
inlineListToRTF title)
authorstext = if null authors
then ""
else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\"
(map stringToRTF authors)))
datetext = if date == ""
then ""
else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in
let spacer = if null (titletext ++ authorstext ++ datetext)
else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\" $
map stringToRTF authors))
datetext = if date == ""
then ""
else rtfPar 0 0 AlignDefault "" in
headerText ++ titletext ++ authorstext ++ datetext ++ spacer
else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in
let spacer = if null (titletext ++ authorstext ++ datetext)
then ""
else rtfPar 0 0 AlignDefault "" in
headerText ++ titletext ++ authorstext ++ datetext ++ spacer
-- | Convert Pandoc block element to RTF.
blockToRTF :: Int -- ^ indent level
@ -168,31 +168,27 @@ blockToRTF :: Int -- ^ indent level
-> String
blockToRTF _ _ Null = ""
blockToRTF indent alignment (Plain lst) =
rtfCompact indent 0 alignment (inlineListToRTF lst)
rtfCompact indent 0 alignment $ inlineListToRTF lst
blockToRTF indent alignment (Para lst) =
rtfPar indent 0 alignment (inlineListToRTF lst)
rtfPar indent 0 alignment $ inlineListToRTF lst
blockToRTF indent alignment (BlockQuote lst) =
concatMap (blockToRTF (indent + indentIncrement) alignment) lst
blockToRTF indent _ (CodeBlock str) =
rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str))
blockToRTF _ _ (RawHtml str) = ""
blockToRTF indent alignment (BulletList lst) =
spaceAtEnd $
blockToRTF indent alignment (BulletList lst) = spaceAtEnd $
concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst
blockToRTF indent alignment (OrderedList attribs lst) =
spaceAtEnd $ concat $
blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $
zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst
blockToRTF indent alignment (DefinitionList lst) =
spaceAtEnd $
blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $
concatMap (definitionListItemToRTF alignment indent) lst
blockToRTF indent _ HorizontalRule =
rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash"
blockToRTF indent alignment (Header level lst) =
rtfPar indent 0 alignment ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++
(inlineListToRTF lst))
blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $
"\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst
blockToRTF indent alignment (Table caption aligns sizes headers rows) =
(tableRowToRTF True indent aligns sizes headers) ++ (concatMap
(tableRowToRTF False indent aligns sizes) rows) ++
tableRowToRTF True indent aligns sizes headers ++
concatMap (tableRowToRTF False indent aligns sizes) rows ++
rtfPar indent 0 alignment (inlineListToRTF caption)
tableRowToRTF :: Bool -> Int -> [Alignment] -> [Float] -> [[Block]] -> String
@ -201,8 +197,10 @@ tableRowToRTF header indent aligns sizes cols =
totalTwips = 6 * 1440 -- 6 inches
rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips))
0 sizes
cellDefs = map (\edge -> (if header then "\\clbrdrb\\brdrs"
else "") ++ "\\cellx" ++ show edge) rightEdges
cellDefs = map (\edge -> (if header
then "\\clbrdrb\\brdrs"
else "") ++ "\\cellx" ++ show edge)
rightEdges
start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++
"\\trkeep\\intbl\n{\n"
end = "}\n\\intbl\\row}\n"
@ -234,11 +232,12 @@ listItemToRTF alignment indent marker list =
let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list in
-- insert the list marker into the (processed) first block
let modFirst = case matchRegexAll (mkRegex "\\\\fi-?[0-9]+") first of
Just (before, matched, after, _) -> before ++ "\\fi" ++
show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++
show listIncrement ++ "\\tab" ++ after
Just (before, matched, after, _) ->
before ++ "\\fi" ++ show (0 - listIncrement) ++
" " ++ marker ++ "\\tx" ++
show listIncrement ++ "\\tab" ++ after
Nothing -> first in
modFirst ++ (concat rest)
modFirst ++ concat rest
-- | Convert definition list item (label, list of blocks) to RTF.
definitionListItemToRTF :: Alignment -- ^ alignment
@ -285,4 +284,3 @@ inlineToRTF (Image alternate (source, tit)) =
inlineToRTF (Note contents) =
"{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
(concatMap (blockToRTF 0 AlignDefault) contents) ++ "}"

View file

@ -1,4 +1,32 @@
-- | Default headers for Pandoc writers.
{-
Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Writers.DefaultHeaders
Copyright : Copyright (C) 2006-7 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Default headers for Pandoc writers.
-}
module Text.Pandoc.Writers.DefaultHeaders (
defaultLaTeXHeader,
defaultConTeXtHeader,

View file

@ -1,5 +1,33 @@
-- | Definitions for creation of S5 powerpoint-like HTML.
-- (See <http://meyerweb.com/eric/tools/s5/>.)
{-
Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Writers.S5
Copyright : Copyright (C) 2006-7 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Definitions for creation of S5 powerpoint-like HTML.
(See <http://meyerweb.com/eric/tools/s5/>.)
-}
module Text.Pandoc.Writers.S5 (
-- * Strings
s5Meta,
@ -60,13 +88,13 @@ layoutDiv :: [Inline] -- ^ Title of document (for header or footer)
-> [Block] -- ^ List of block elements returned
layoutDiv title date = [(RawHtml "<div class=\"layout\">\n<div id=\"controls\"></div>\n<div id=\"currentSlide\"></div>\n<div id=\"header\"></div>\n<div id=\"footer\">\n"), (Header 1 [Str date]), (Header 2 title), (RawHtml "</div>\n</div>\n")]
presentationStart = (RawHtml "<div class=\"presentation\">\n\n")
presentationStart = RawHtml "<div class=\"presentation\">\n\n"
presentationEnd = (RawHtml "</div>\n")
presentationEnd = RawHtml "</div>\n"
slideStart = (RawHtml "<div class=\"slide\">\n")
slideStart = RawHtml "<div class=\"slide\">\n"
slideEnd = (RawHtml "</div>\n")
slideEnd = RawHtml "</div>\n"
-- | Returns 'True' if block is a Header 1.
isH1 :: Block -> Bool
@ -84,15 +112,22 @@ insertSlides beginning blocks =
beforeHead ++ [slideEnd]
else
if beginning then
beforeHead ++ slideStart:(head rest):(insertSlides False (tail rest))
beforeHead ++
slideStart:(head rest):(insertSlides False (tail rest))
else
beforeHead ++ slideEnd:slideStart:(head rest):(insertSlides False (tail rest))
beforeHead ++
slideEnd:slideStart:(head rest):(insertSlides False (tail rest))
-- | Insert blocks into 'Pandoc' for slide structure.
insertS5Structure :: Pandoc -> Pandoc
insertS5Structure (Pandoc meta []) = Pandoc meta []
insertS5Structure (Pandoc (Meta title authors date) blocks) =
let slides = insertSlides True blocks
firstSlide = if (not (null title)) then [slideStart, (Header 1 title), (Header 3 [Str (joinWithSep ", " authors)]), (Header 4 [Str date]), slideEnd] else [] in
let newBlocks = (layoutDiv title date) ++ presentationStart:firstSlide ++ slides ++ [presentationEnd] in
Pandoc (Meta title authors date) newBlocks
let slides = insertSlides True blocks
firstSlide = if not (null title)
then [slideStart, (Header 1 title),
(Header 3 [Str (joinWithSep ", " authors)]),
(Header 4 [Str date]), slideEnd]
else []
newBlocks = (layoutDiv title date) ++ presentationStart:firstSlide ++
slides ++ [presentationEnd]
in Pandoc (Meta title authors date) newBlocks