Major code cleanup in all modules. (Removed unneeded imports,
reformatted, etc.) More major changes are documented below: + Removed Text.Pandoc.ParserCombinators and moved all its definitions to Text.Pandoc.Shared. + In Text.Pandoc.Shared: - Removed unneeded 'try' in blanklines. - Removed endsWith function and rewrote functions to use isSuffixOf instead. - Added >>~ combinator. - Rewrote stripTrailingNewlines, removeLeadingSpaces. + Moved Text.Pandoc.Entities -> Text.Pandoc.CharacterReferences. - Removed unneeded functions charToEntity, charToNumericalEntity. - Renamed functions using proper terminology (character references, not entities). decodeEntities -> decodeCharacterReferences, characterEntity -> characterReference. - Moved escapeStringToXML to Docbook writer, which is the only thing that uses it. - Removed old entity parser in HTML and Markdown readers; replaced with new charRef parser in Text.Pandoc.Shared. + Fixed accent bug in Text.Pandoc.Readers.LaTeX: \^{} now correctly parses as a '^' character. + Text.Pandoc.ASCIIMathML is no longer an exported module. git-svn-id: https://pandoc.googlecode.com/svn/trunk@835 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
e814a3f6d2
commit
a8e2199034
21 changed files with 2042 additions and 2311 deletions
|
@ -34,11 +34,9 @@ Description: Pandoc is a Haskell library for converting from one markup
|
|||
Build-Depends: base, parsec, xhtml, mtl, regex-compat
|
||||
Hs-Source-Dirs: src
|
||||
Exposed-Modules: Text.Pandoc,
|
||||
Text.Pandoc.ASCIIMathML,
|
||||
Text.Pandoc.Blocks,
|
||||
Text.Pandoc.Definition,
|
||||
Text.Pandoc.Entities,
|
||||
Text.Pandoc.ParserCombinators,
|
||||
Text.Pandoc.CharacterReferences,
|
||||
Text.Pandoc.Shared,
|
||||
Text.Pandoc.UTF8,
|
||||
Text.Pandoc.Readers.HTML,
|
||||
|
|
|
@ -39,8 +39,8 @@ inline links:
|
|||
>
|
||||
> markdownToRST :: String -> String
|
||||
> markdownToRST = toUTF8 .
|
||||
> (writeRST defaultWriterOptions {writerReferenceLinks = True}) .
|
||||
> (readMarkdown defaultParserState) . fromUTF8
|
||||
> (writeRST defaultWriterOptions {writerReferenceLinks = True}) .
|
||||
> (readMarkdown defaultParserState) . fromUTF8
|
||||
>
|
||||
> main = interact markdownToRST
|
||||
|
||||
|
|
|
@ -43,9 +43,8 @@ module Text.Pandoc.Blocks
|
|||
rightAlignBlock
|
||||
)
|
||||
where
|
||||
|
||||
import Text.PrettyPrint
|
||||
import Data.List (transpose, intersperse)
|
||||
import Data.List ( intersperse )
|
||||
|
||||
-- | A fixed-width block of text. Parameters are width of block,
|
||||
-- height of block, and list of lines.
|
||||
|
@ -53,6 +52,17 @@ data TextBlock = TextBlock Int Int [String]
|
|||
instance Show TextBlock where
|
||||
show x = show $ blockToDoc x
|
||||
|
||||
-- | Break lines in a list of lines so that none are greater than
|
||||
-- a given width.
|
||||
breakLines :: Int -- ^ Maximum length of lines.
|
||||
-> [String] -- ^ List of lines.
|
||||
-> [String]
|
||||
breakLines width [] = []
|
||||
breakLines width (l:ls) =
|
||||
if length l > width
|
||||
then (take width l):(breakLines width ((drop width l):ls))
|
||||
else l:(breakLines width ls)
|
||||
|
||||
-- | Convert a @Doc@ element into a @TextBlock@ with a specified width.
|
||||
docToBlock :: Int -- ^ Width of text block.
|
||||
-> Doc -- ^ @Doc@ to convert.
|
||||
|
@ -60,13 +70,8 @@ docToBlock :: Int -- ^ Width of text block.
|
|||
docToBlock width doc =
|
||||
let rendered = renderStyle (style {lineLength = width,
|
||||
ribbonsPerLine = 1}) doc
|
||||
lns = lines rendered
|
||||
chop [] = []
|
||||
chop (l:ls) = if length l > width
|
||||
then (take width l):(chop ((drop width l):ls))
|
||||
else l:(chop ls)
|
||||
lns' = chop lns
|
||||
in TextBlock width (length lns') lns'
|
||||
lns = breakLines width $ lines rendered
|
||||
in TextBlock width (length lns) lns
|
||||
|
||||
-- | Convert a @TextBlock@ to a @Doc@ element.
|
||||
blockToDoc :: TextBlock -> Doc
|
||||
|
@ -116,8 +121,7 @@ isWhitespace x = x `elem` " \t"
|
|||
-- | Left-aligns the contents of a @TextBlock@ within the block.
|
||||
leftAlignBlock :: TextBlock -> TextBlock
|
||||
leftAlignBlock (TextBlock width height lns) =
|
||||
TextBlock width height $
|
||||
map (dropWhile isWhitespace) lns
|
||||
TextBlock width height $ map (dropWhile isWhitespace) lns
|
||||
|
||||
-- | Right-aligns the contents of a @TextBlock@ within the block.
|
||||
rightAlignBlock :: TextBlock -> TextBlock
|
||||
|
|
|
@ -17,7 +17,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
-}
|
||||
|
||||
{- |
|
||||
Module : Text.Pandoc.Entities
|
||||
Module : Text.Pandoc.CharacterReferences
|
||||
Copyright : Copyright (C) 2006-7 John MacFarlane
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
|
@ -25,37 +25,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
Functions for encoding unicode characters as entity references,
|
||||
and vice versa.
|
||||
Functions for parsing character references.
|
||||
-}
|
||||
module Text.Pandoc.Entities (
|
||||
charToEntity,
|
||||
charToNumericalEntity,
|
||||
decodeEntities,
|
||||
escapeCharForXML,
|
||||
escapeStringForXML,
|
||||
characterEntity
|
||||
module Text.Pandoc.CharacterReferences (
|
||||
characterReference,
|
||||
decodeCharacterReferences,
|
||||
) where
|
||||
import Data.Char ( chr, ord )
|
||||
import Data.Char ( chr )
|
||||
import Text.ParserCombinators.Parsec
|
||||
import Data.Maybe ( fromMaybe )
|
||||
import qualified Data.Map as Map
|
||||
|
||||
-- | Returns a string containing an entity reference for the character.
|
||||
charToEntity :: Char -> String
|
||||
charToEntity char = Map.findWithDefault (charToNumericalEntity char) char reverseEntityTable
|
||||
|
||||
-- | Returns a string containing a numerical entity reference for the char.
|
||||
charToNumericalEntity :: Char -> String
|
||||
charToNumericalEntity ch = "&#" ++ show (ord ch) ++ ";"
|
||||
-- | Parse character entity.
|
||||
characterReference :: GenParser Char st Char
|
||||
characterReference = characterEntity <|>
|
||||
hexadecimalCharacterReference <|>
|
||||
decimalCharacterReference <?>
|
||||
"character entity"
|
||||
|
||||
-- | Parse character entity.
|
||||
characterEntity :: GenParser Char st Char
|
||||
characterEntity = namedEntity <|> hexEntity <|> decimalEntity <?> "character entity"
|
||||
|
||||
-- | Parse character entity.
|
||||
namedEntity :: GenParser Char st Char
|
||||
namedEntity = try $ do
|
||||
characterEntity = try $ do
|
||||
st <- char '&'
|
||||
body <- many1 alphaNum
|
||||
end <- char ';'
|
||||
|
@ -63,8 +52,8 @@ namedEntity = try $ do
|
|||
return $ Map.findWithDefault '?' entity entityTable
|
||||
|
||||
-- | Parse hexadecimal entity.
|
||||
hexEntity :: GenParser Char st Char
|
||||
hexEntity = try $ do
|
||||
hexadecimalCharacterReference :: GenParser Char st Char
|
||||
hexadecimalCharacterReference = try $ do
|
||||
st <- string "&#"
|
||||
hex <- oneOf "Xx"
|
||||
body <- many1 (oneOf "0123456789ABCDEFabcdef")
|
||||
|
@ -72,49 +61,23 @@ hexEntity = try $ do
|
|||
return $ chr $ read ('0':'x':body)
|
||||
|
||||
-- | Parse decimal entity.
|
||||
decimalEntity :: GenParser Char st Char
|
||||
decimalEntity = try $ do
|
||||
decimalCharacterReference :: GenParser Char st Char
|
||||
decimalCharacterReference = try $ do
|
||||
st <- string "&#"
|
||||
body <- many1 digit
|
||||
end <- char ';'
|
||||
return $ chr $ read body
|
||||
|
||||
-- | Escape one character as needed for XML.
|
||||
escapeCharForXML :: Char -> String
|
||||
escapeCharForXML x =
|
||||
case x of
|
||||
'&' -> "&"
|
||||
'<' -> "<"
|
||||
'>' -> ">"
|
||||
'"' -> """
|
||||
'\160' -> " "
|
||||
c -> [c]
|
||||
|
||||
-- | True if the character needs to be escaped.
|
||||
needsEscaping :: Char -> Bool
|
||||
needsEscaping c = c `elem` "&<>\"\160"
|
||||
|
||||
-- | Escape string as needed for XML. Entity references are not preserved.
|
||||
escapeStringForXML :: String -> String
|
||||
escapeStringForXML "" = ""
|
||||
escapeStringForXML str =
|
||||
case break needsEscaping str of
|
||||
(okay, "") -> okay
|
||||
(okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs
|
||||
|
||||
-- | Convert entities in a string to characters.
|
||||
decodeEntities :: String -> String
|
||||
decodeEntities str =
|
||||
case parse (many (characterEntity <|> anyChar)) str str of
|
||||
decodeCharacterReferences :: String -> String
|
||||
decodeCharacterReferences str =
|
||||
case parse (many (characterReference <|> anyChar)) str str of
|
||||
Left err -> error $ "\nError: " ++ show err
|
||||
Right result -> result
|
||||
|
||||
entityTable :: Map.Map String Char
|
||||
entityTable = Map.fromList entityTableList
|
||||
|
||||
reverseEntityTable :: Map.Map Char String
|
||||
reverseEntityTable = Map.fromList $ map (\(a,b) -> (b,a)) entityTableList
|
||||
|
||||
entityTableList :: [(String, Char)]
|
||||
entityTableList = [
|
||||
(""", chr 34),
|
|
@ -34,10 +34,10 @@ data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show)
|
|||
|
||||
-- | Bibliographic information for the document: title (list of 'Inline'),
|
||||
-- authors (list of strings), date (string).
|
||||
data Meta = Meta [Inline] -- title
|
||||
[String] -- authors
|
||||
String -- date
|
||||
deriving (Eq, Show, Read)
|
||||
data Meta = Meta [Inline] -- title
|
||||
[String] -- authors
|
||||
String -- date
|
||||
deriving (Eq, Show, Read)
|
||||
|
||||
-- | Alignment of a table column.
|
||||
data Alignment = AlignLeft
|
||||
|
@ -65,12 +65,11 @@ data ListNumberDelim = DefaultDelim
|
|||
-- | Block element.
|
||||
data Block
|
||||
= Plain [Inline] -- ^ Plain text, not a paragraph
|
||||
| Null -- ^ Nothing
|
||||
| Para [Inline] -- ^ Paragraph
|
||||
| CodeBlock String -- ^ Code block (literal)
|
||||
| RawHtml String -- ^ Raw HTML block (literal)
|
||||
| BlockQuote [Block] -- ^ Block quote (list of blocks)
|
||||
| OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes,
|
||||
| OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes
|
||||
-- and a list of items, each a list of blocks)
|
||||
| BulletList [[Block]] -- ^ Bullet list (list of items, each
|
||||
-- a list of blocks)
|
||||
|
@ -84,6 +83,7 @@ data Block
|
|||
-- relative column widths, column headers
|
||||
-- (each a list of blocks), and rows
|
||||
-- (each a list of lists of blocks)
|
||||
| Null -- ^ Nothing
|
||||
deriving (Eq, Read, Show)
|
||||
|
||||
-- | Type of quotation marks to use in Quoted inline.
|
||||
|
@ -112,6 +112,5 @@ data Inline
|
|||
| Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target
|
||||
| Image [Inline] Target -- ^ Image: alt text (list of inlines), target
|
||||
-- and target
|
||||
| Note [Block] -- ^ Footnote or endnote - reference (string),
|
||||
-- text (list of blocks)
|
||||
| Note [Block] -- ^ Footnote or endnote
|
||||
deriving (Show, Eq, Read)
|
||||
|
|
|
@ -1,198 +0,0 @@
|
|||
{-
|
||||
Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
-}
|
||||
|
||||
{- |
|
||||
Module : Text.Pandoc.ParserCombinators
|
||||
Copyright : Copyright (C) 2006-7 John MacFarlane
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
Parser combinators used in Pandoc readers.
|
||||
-}
|
||||
module Text.Pandoc.ParserCombinators (
|
||||
anyLine,
|
||||
many1Till,
|
||||
notFollowedBy',
|
||||
oneOfStrings,
|
||||
spaceChar,
|
||||
skipSpaces,
|
||||
blankline,
|
||||
blanklines,
|
||||
enclosed,
|
||||
stringAnyCase,
|
||||
parseFromString,
|
||||
lineClump,
|
||||
charsInBalanced,
|
||||
charsInBalanced',
|
||||
romanNumeral,
|
||||
withHorizDisplacement
|
||||
) where
|
||||
import Text.ParserCombinators.Parsec
|
||||
import Data.Char ( toUpper, toLower )
|
||||
|
||||
--- | Parse any line of text
|
||||
anyLine :: GenParser Char st [Char]
|
||||
anyLine = try (manyTill anyChar newline) <|> many1 anyChar
|
||||
-- second alternative is for a line ending with eof
|
||||
|
||||
-- | Parses a space or tab.
|
||||
spaceChar :: CharParser st Char
|
||||
spaceChar = oneOf " \t"
|
||||
|
||||
-- | Skips zero or more spaces or tabs.
|
||||
skipSpaces :: GenParser Char st ()
|
||||
skipSpaces = skipMany spaceChar
|
||||
|
||||
-- | Skips zero or more spaces or tabs, then reads a newline.
|
||||
blankline :: GenParser Char st Char
|
||||
blankline = try (do
|
||||
skipSpaces
|
||||
newline)
|
||||
|
||||
-- | Parses one or more blank lines and returns a string of newlines.
|
||||
blanklines :: GenParser Char st [Char]
|
||||
blanklines = try (many1 blankline)
|
||||
|
||||
-- | Parses material enclosed between start and end parsers.
|
||||
enclosed :: GenParser Char st t -- ^ start parser
|
||||
-> GenParser Char st end -- ^ end parser
|
||||
-> GenParser Char st a -- ^ content parser (to be used repeatedly)
|
||||
-> GenParser Char st [a]
|
||||
enclosed start end parser = try (do
|
||||
start
|
||||
notFollowedBy space
|
||||
result <- many1Till parser (try end)
|
||||
return result)
|
||||
|
||||
-- | Like @manyTill@, but reads at least one item.
|
||||
many1Till :: GenParser tok st a
|
||||
-> GenParser tok st end
|
||||
-> GenParser tok st [a]
|
||||
many1Till p end = try (do
|
||||
first <- p
|
||||
rest <- manyTill p end
|
||||
return (first:rest))
|
||||
|
||||
-- | A more general form of @notFollowedBy@. This one allows any
|
||||
-- type of parser to be specified, and succeeds only if that parser fails.
|
||||
-- It does not consume any input.
|
||||
notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
|
||||
notFollowedBy' parser = try (do { c <- try parser; unexpected (show c) }
|
||||
<|> return ())
|
||||
|
||||
-- | Parses one of a list of strings (tried in order).
|
||||
oneOfStrings :: [String] -> GenParser Char st String
|
||||
oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
|
||||
|
||||
-- | Parse string, case insensitive.
|
||||
stringAnyCase :: [Char] -> CharParser st String
|
||||
stringAnyCase [] = string ""
|
||||
stringAnyCase (x:xs) = try (do
|
||||
firstChar <- choice [ char (toUpper x), char (toLower x) ]
|
||||
rest <- stringAnyCase xs
|
||||
return (firstChar:rest))
|
||||
|
||||
-- | Parse contents of 'str' using 'parser' and return result.
|
||||
parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
|
||||
parseFromString parser str = try $ do
|
||||
oldInput <- getInput
|
||||
setInput str
|
||||
result <- parser
|
||||
setInput oldInput
|
||||
return result
|
||||
|
||||
-- | Parse raw line block up to and including blank lines.
|
||||
lineClump :: GenParser Char st String
|
||||
lineClump = do
|
||||
lines <- many1 (do{notFollowedBy blankline; anyLine})
|
||||
blanks <- blanklines <|> (do{eof; return "\n"})
|
||||
return ((unlines lines) ++ blanks)
|
||||
|
||||
-- | Parse a string of characters between an open character
|
||||
-- and a close character, including text between balanced
|
||||
-- pairs of open and close. For example,
|
||||
-- @charsInBalanced '(' ')'@ will parse "(hello (there))"
|
||||
-- and return "hello (there)". Stop if a blank line is
|
||||
-- encountered.
|
||||
charsInBalanced :: Char -> Char -> GenParser Char st String
|
||||
charsInBalanced open close = try $ do
|
||||
char open
|
||||
raw <- manyTill ( (do res <- charsInBalanced open close
|
||||
return $ [open] ++ res ++ [close])
|
||||
<|> (do notFollowedBy' (blankline >> blanklines)
|
||||
count 1 anyChar))
|
||||
(char close)
|
||||
return $ concat raw
|
||||
|
||||
-- | Like charsInBalanced, but allow blank lines in the content.
|
||||
charsInBalanced' :: Char -> Char -> GenParser Char st String
|
||||
charsInBalanced' open close = try $ do
|
||||
char open
|
||||
raw <- manyTill ( (do res <- charsInBalanced open close
|
||||
return $ [open] ++ res ++ [close])
|
||||
<|> count 1 anyChar)
|
||||
(char close)
|
||||
return $ concat raw
|
||||
|
||||
-- | Parses a roman numeral (uppercase or lowercase), returns number.
|
||||
romanNumeral :: Bool -> -- ^ Uppercase if true
|
||||
GenParser Char st Int
|
||||
romanNumeral upper = try $ do
|
||||
let char' c = char (if upper then toUpper c else c)
|
||||
let one = char' 'i'
|
||||
let five = char' 'v'
|
||||
let ten = char' 'x'
|
||||
let fifty = char' 'l'
|
||||
let hundred = char' 'c'
|
||||
let fivehundred = char' 'd'
|
||||
let thousand = char' 'm'
|
||||
thousands <- many thousand >>= (return . (1000 *) . length)
|
||||
ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
|
||||
fivehundreds <- many fivehundred >>= (return . (500 *) . length)
|
||||
fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
|
||||
hundreds <- many hundred >>= (return . (100 *) . length)
|
||||
nineties <- option 0 $ try $ ten >> hundred >> return 90
|
||||
fifties <- many fifty >>= (return . (50 *) . length)
|
||||
forties <- option 0 $ try $ ten >> fifty >> return 40
|
||||
tens <- many ten >>= (return . (10 *) . length)
|
||||
nines <- option 0 $ try $ one >> ten >> return 9
|
||||
fives <- many five >>= (return . (5*) . length)
|
||||
fours <- option 0 $ try $ one >> five >> return 4
|
||||
ones <- many one >>= (return . length)
|
||||
let total = thousands + ninehundreds + fivehundreds + fourhundreds +
|
||||
hundreds + nineties + fifties + forties + tens + nines +
|
||||
fives + fours + ones
|
||||
if total == 0
|
||||
then fail "not a roman numeral"
|
||||
else return total
|
||||
|
||||
-- | Applies a parser, returns tuple of its results and its horizontal
|
||||
-- displacement (the difference between the source column at the end
|
||||
-- and the source column at the beginning). Vertical displacement
|
||||
-- (source row) is ignored.
|
||||
withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
|
||||
-> GenParser Char st (a, Int) -- ^ (result, displacement)
|
||||
withHorizDisplacement parser = do
|
||||
pos1 <- getPosition
|
||||
result <- parser
|
||||
pos2 <- getPosition
|
||||
return (result, sourceColumn pos2 - sourceColumn pos1)
|
||||
|
|
@ -41,12 +41,12 @@ module Text.Pandoc.Readers.HTML (
|
|||
) where
|
||||
|
||||
import Text.ParserCombinators.Parsec
|
||||
import Text.Pandoc.ParserCombinators
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.Entities ( characterEntity, decodeEntities )
|
||||
import Text.Pandoc.CharacterReferences ( characterReference,
|
||||
decodeCharacterReferences )
|
||||
import Data.Maybe ( fromMaybe )
|
||||
import Data.List ( intersect, takeWhile, dropWhile )
|
||||
import Data.List ( takeWhile, dropWhile )
|
||||
import Data.Char ( toUpper, toLower, isAlphaNum )
|
||||
|
||||
-- | Convert HTML-formatted string to 'Pandoc' document.
|
||||
|
@ -55,10 +55,6 @@ readHtml :: ParserState -- ^ Parser state
|
|||
-> Pandoc
|
||||
readHtml = readWith parseHtml
|
||||
|
||||
-- for testing
|
||||
testString :: String -> IO ()
|
||||
testString = testStringWith parseHtml
|
||||
|
||||
--
|
||||
-- Constants
|
||||
--
|
||||
|
@ -74,26 +70,18 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
|
|||
--
|
||||
|
||||
-- | Read blocks until end tag.
|
||||
blocksTilEnd tag = try (do
|
||||
blocks <- manyTill (do {b <- block; spaces; return b}) (htmlEndTag tag)
|
||||
return $ filter (/= Null) blocks)
|
||||
blocksTilEnd tag = do
|
||||
blocks <- manyTill (block >>~ spaces) (htmlEndTag tag)
|
||||
return $ filter (/= Null) blocks
|
||||
|
||||
-- | Read inlines until end tag.
|
||||
inlinesTilEnd tag = try (do
|
||||
inlines <- manyTill inline (htmlEndTag tag)
|
||||
return inlines)
|
||||
inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
|
||||
|
||||
-- | Parse blocks between open and close tag.
|
||||
blocksIn tag = try $ do
|
||||
htmlTag tag
|
||||
spaces
|
||||
blocksTilEnd tag
|
||||
blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
|
||||
|
||||
-- | Parse inlines between open and close tag.
|
||||
inlinesIn tag = try $ do
|
||||
htmlTag tag
|
||||
spaces
|
||||
inlinesTilEnd tag
|
||||
inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
|
||||
|
||||
-- | Extract type from a tag: e.g. @br@ from @\<br\>@
|
||||
extractTagType :: String -> String
|
||||
|
@ -103,19 +91,19 @@ extractTagType ('<':rest) =
|
|||
extractTagType _ = ""
|
||||
|
||||
-- | Parse any HTML tag (closing or opening) and return text of tag
|
||||
anyHtmlTag = try (do
|
||||
anyHtmlTag = try $ do
|
||||
char '<'
|
||||
spaces
|
||||
tag <- many1 alphaNum
|
||||
attribs <- htmlAttributes
|
||||
spaces
|
||||
ender <- option "" (string "/")
|
||||
let ender' = if (null ender) then "" else " /"
|
||||
let ender' = if null ender then "" else " /"
|
||||
spaces
|
||||
char '>'
|
||||
return ("<" ++ tag ++ attribs ++ ender' ++ ">"))
|
||||
return $ "<" ++ tag ++ attribs ++ ender' ++ ">"
|
||||
|
||||
anyHtmlEndTag = try (do
|
||||
anyHtmlEndTag = try $ do
|
||||
char '<'
|
||||
spaces
|
||||
char '/'
|
||||
|
@ -123,19 +111,19 @@ anyHtmlEndTag = try (do
|
|||
tagType <- many1 alphaNum
|
||||
spaces
|
||||
char '>'
|
||||
return ("</" ++ tagType ++ ">"))
|
||||
return $ "</" ++ tagType ++ ">"
|
||||
|
||||
htmlTag :: String -> GenParser Char st (String, [(String, String)])
|
||||
htmlTag tag = try (do
|
||||
htmlTag tag = try $ do
|
||||
char '<'
|
||||
spaces
|
||||
stringAnyCase tag
|
||||
attribs <- many htmlAttribute
|
||||
spaces
|
||||
option "" (string "/")
|
||||
optional (string "/")
|
||||
spaces
|
||||
char '>'
|
||||
return (tag, (map (\(name, content, raw) -> (name, content)) attribs)))
|
||||
return (tag, (map (\(name, content, raw) -> (name, content)) attribs))
|
||||
|
||||
-- parses a quoted html attribute value
|
||||
quoted quoteChar = do
|
||||
|
@ -145,20 +133,20 @@ quoted quoteChar = do
|
|||
|
||||
htmlAttributes = do
|
||||
attrList <- many htmlAttribute
|
||||
return (concatMap (\(name, content, raw) -> raw) attrList)
|
||||
return $ concatMap (\(name, content, raw) -> raw) attrList
|
||||
|
||||
htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute
|
||||
|
||||
-- minimized boolean attribute (no = and value)
|
||||
htmlMinimizedAttribute = try (do
|
||||
-- minimized boolean attribute
|
||||
htmlMinimizedAttribute = try $ do
|
||||
many1 space
|
||||
name <- many1 (choice [letter, oneOf ".-_:"])
|
||||
spaces
|
||||
notFollowedBy (char '=')
|
||||
let content = name
|
||||
return (name, content, (" " ++ name)))
|
||||
return (name, content, (" " ++ name))
|
||||
|
||||
htmlRegularAttribute = try (do
|
||||
htmlRegularAttribute = try $ do
|
||||
many1 space
|
||||
name <- many1 (choice [letter, oneOf ".-_:"])
|
||||
spaces
|
||||
|
@ -170,10 +158,10 @@ htmlRegularAttribute = try (do
|
|||
a <- many (alphaNum <|> (oneOf "-._:"))
|
||||
return (a,"")) ]
|
||||
return (name, content,
|
||||
(" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr)))
|
||||
(" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))
|
||||
|
||||
-- | Parse an end tag of type 'tag'
|
||||
htmlEndTag tag = try (do
|
||||
htmlEndTag tag = try $ do
|
||||
char '<'
|
||||
spaces
|
||||
char '/'
|
||||
|
@ -181,87 +169,83 @@ htmlEndTag tag = try (do
|
|||
stringAnyCase tag
|
||||
spaces
|
||||
char '>'
|
||||
return ("</" ++ tag ++ ">"))
|
||||
return $ "</" ++ tag ++ ">"
|
||||
|
||||
-- | Returns @True@ if the tag is an inline tag.
|
||||
isInline tag = (extractTagType tag) `elem` inlineHtmlTags
|
||||
|
||||
anyHtmlBlockTag = try (do
|
||||
tag <- choice [anyHtmlTag, anyHtmlEndTag]
|
||||
if isInline tag then fail "inline tag" else return tag)
|
||||
anyHtmlBlockTag = try $ do
|
||||
tag <- anyHtmlTag <|> anyHtmlEndTag
|
||||
if isInline tag then fail "inline tag" else return tag
|
||||
|
||||
anyHtmlInlineTag = try (do
|
||||
tag <- choice [ anyHtmlTag, anyHtmlEndTag ]
|
||||
if isInline tag then return tag else fail "not an inline tag")
|
||||
anyHtmlInlineTag = try $ do
|
||||
tag <- anyHtmlTag <|> anyHtmlEndTag
|
||||
if isInline tag then return tag else fail "not an inline tag"
|
||||
|
||||
-- | Parses material between script tags.
|
||||
-- Scripts must be treated differently, because they can contain '<>' etc.
|
||||
htmlScript = try (do
|
||||
htmlScript = try $ do
|
||||
open <- string "<script"
|
||||
rest <- manyTill anyChar (htmlEndTag "script")
|
||||
return (open ++ rest ++ "</script>"))
|
||||
return $ open ++ rest ++ "</script>"
|
||||
|
||||
htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ]
|
||||
|
||||
rawHtmlBlock = try (do
|
||||
notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"])
|
||||
rawHtmlBlock = try $ do
|
||||
notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
|
||||
body <- htmlBlockElement <|> anyHtmlBlockTag
|
||||
sp <- (many space)
|
||||
sp <- many space
|
||||
state <- getState
|
||||
if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null)
|
||||
if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null
|
||||
|
||||
-- | Parses an HTML comment.
|
||||
htmlComment = try (do
|
||||
htmlComment = try $ do
|
||||
string "<!--"
|
||||
comment <- manyTill anyChar (try (string "-->"))
|
||||
return ("<!--" ++ comment ++ "-->"))
|
||||
return $ "<!--" ++ comment ++ "-->"
|
||||
|
||||
--
|
||||
-- parsing documents
|
||||
--
|
||||
|
||||
xmlDec = try (do
|
||||
xmlDec = try $ do
|
||||
string "<?"
|
||||
rest <- manyTill anyChar (char '>')
|
||||
return ("<?" ++ rest ++ ">"))
|
||||
return $ "<?" ++ rest ++ ">"
|
||||
|
||||
definition = try (do
|
||||
definition = try $ do
|
||||
string "<!"
|
||||
rest <- manyTill anyChar (char '>')
|
||||
return ("<!" ++ rest ++ ">"))
|
||||
return $ "<!" ++ rest ++ ">"
|
||||
|
||||
nonTitleNonHead = try (do
|
||||
notFollowedBy' (htmlTag "title")
|
||||
notFollowedBy' (htmlTag "/head")
|
||||
result <- choice [do {rawHtmlBlock; return ' '}, anyChar]
|
||||
return result)
|
||||
nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >>
|
||||
((rawHtmlBlock >> return ' ') <|> anyChar)
|
||||
|
||||
parseTitle = try (do
|
||||
(tag, attribs) <- htmlTag "title"
|
||||
parseTitle = try $ do
|
||||
(tag, _) <- htmlTag "title"
|
||||
contents <- inlinesTilEnd tag
|
||||
spaces
|
||||
return contents)
|
||||
return contents
|
||||
|
||||
-- parse header and return meta-information (for now, just title)
|
||||
parseHead = try (do
|
||||
parseHead = try $ do
|
||||
htmlTag "head"
|
||||
spaces
|
||||
skipMany nonTitleNonHead
|
||||
contents <- option [] parseTitle
|
||||
skipMany nonTitleNonHead
|
||||
htmlTag "/head"
|
||||
return (contents, [], ""))
|
||||
return (contents, [], "")
|
||||
|
||||
skipHtmlTag tag = option ("",[]) (htmlTag tag)
|
||||
skipHtmlTag tag = optional (htmlTag tag)
|
||||
|
||||
-- h1 class="title" representation of title in body
|
||||
bodyTitle = try (do
|
||||
bodyTitle = try $ do
|
||||
(tag, attribs) <- htmlTag "h1"
|
||||
cl <- case (extractAttribute "class" attribs) of
|
||||
Just "title" -> do {return ""}
|
||||
Just "title" -> return ""
|
||||
otherwise -> fail "not title"
|
||||
inlinesTilEnd "h1"
|
||||
return "")
|
||||
|
||||
parseHtml = do
|
||||
sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
|
||||
|
@ -271,27 +255,30 @@ parseHtml = do
|
|||
spaces
|
||||
skipHtmlTag "body"
|
||||
spaces
|
||||
option "" bodyTitle -- skip title in body, because it's represented in meta
|
||||
optional bodyTitle -- skip title in body, because it's represented in meta
|
||||
blocks <- parseBlocks
|
||||
spaces
|
||||
option "" (htmlEndTag "body")
|
||||
optional (htmlEndTag "body")
|
||||
spaces
|
||||
option "" (htmlEndTag "html")
|
||||
optional (htmlEndTag "html")
|
||||
many anyChar -- ignore anything after </html>
|
||||
eof
|
||||
return (Pandoc (Meta title authors date) blocks)
|
||||
return $ Pandoc (Meta title authors date) blocks
|
||||
|
||||
--
|
||||
-- parsing blocks
|
||||
--
|
||||
|
||||
parseBlocks = do
|
||||
spaces
|
||||
result <- sepEndBy block spaces
|
||||
return $ filter (/= Null) result
|
||||
parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null))
|
||||
|
||||
block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain,
|
||||
rawHtmlBlock ] <?> "block"
|
||||
block = choice [ codeBlock
|
||||
, header
|
||||
, hrule
|
||||
, list
|
||||
, blockQuote
|
||||
, para
|
||||
, plain
|
||||
, rawHtmlBlock ] <?> "block"
|
||||
|
||||
--
|
||||
-- header blocks
|
||||
|
@ -299,53 +286,49 @@ block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain,
|
|||
|
||||
header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
|
||||
|
||||
headerLevel n = try (do
|
||||
headerLevel n = try $ do
|
||||
let level = "h" ++ show n
|
||||
(tag, attribs) <- htmlTag level
|
||||
contents <- inlinesTilEnd level
|
||||
return (Header n (normalizeSpaces contents)))
|
||||
return $ Header n (normalizeSpaces contents)
|
||||
|
||||
--
|
||||
-- hrule block
|
||||
--
|
||||
|
||||
hrule = try (do
|
||||
hrule = try $ do
|
||||
(tag, attribs) <- htmlTag "hr"
|
||||
state <- getState
|
||||
if (not (null attribs)) && (stateParseRaw state)
|
||||
then -- in this case we want to parse it as raw html
|
||||
unexpected "attributes in hr"
|
||||
else return HorizontalRule)
|
||||
if not (null attribs) && stateParseRaw state
|
||||
then unexpected "attributes in hr" -- parse as raw in this case
|
||||
else return HorizontalRule
|
||||
|
||||
--
|
||||
-- code blocks
|
||||
--
|
||||
|
||||
codeBlock = choice [ preCodeBlock, bareCodeBlock ] <?> "code block"
|
||||
codeBlock = preCodeBlock <|> bareCodeBlock <?> "code block"
|
||||
|
||||
preCodeBlock = try (do
|
||||
preCodeBlock = try $ do
|
||||
htmlTag "pre"
|
||||
spaces
|
||||
htmlTag "code"
|
||||
result <- manyTill anyChar (htmlEndTag "code")
|
||||
result <- bareCodeBlock
|
||||
spaces
|
||||
htmlEndTag "pre"
|
||||
return (CodeBlock (stripTrailingNewlines (decodeEntities result))))
|
||||
return result
|
||||
|
||||
bareCodeBlock = try (do
|
||||
bareCodeBlock = try $ do
|
||||
htmlTag "code"
|
||||
result <- manyTill anyChar (htmlEndTag "code")
|
||||
return (CodeBlock (stripTrailingNewlines (decodeEntities result))))
|
||||
return $ CodeBlock $ stripTrailingNewlines $
|
||||
decodeCharacterReferences result
|
||||
|
||||
--
|
||||
-- block quotes
|
||||
--
|
||||
|
||||
blockQuote = try (do
|
||||
tag <- htmlTag "blockquote"
|
||||
spaces
|
||||
blocks <- blocksTilEnd "blockquote"
|
||||
return (BlockQuote blocks))
|
||||
blockQuote = try $ htmlTag "blockquote" >> spaces >>
|
||||
blocksTilEnd "blockquote" >>= (return . BlockQuote)
|
||||
|
||||
--
|
||||
-- list blocks
|
||||
|
@ -354,119 +337,105 @@ blockQuote = try (do
|
|||
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
|
||||
|
||||
orderedList = try $ do
|
||||
(_, attribs) <- htmlTag "ol"
|
||||
(start, style) <- option (1, DefaultStyle) $
|
||||
do failIfStrict
|
||||
let sta = fromMaybe "1" $
|
||||
lookup "start" attribs
|
||||
let sty = fromMaybe (fromMaybe "" $
|
||||
lookup "style" attribs) $
|
||||
lookup "class" attribs
|
||||
let sty' = case sty of
|
||||
"lower-roman" -> LowerRoman
|
||||
"upper-roman" -> UpperRoman
|
||||
"lower-alpha" -> LowerAlpha
|
||||
"upper-alpha" -> UpperAlpha
|
||||
"decimal" -> Decimal
|
||||
_ -> DefaultStyle
|
||||
return (read sta, sty')
|
||||
spaces
|
||||
items <- sepEndBy1 (blocksIn "li") spaces
|
||||
htmlEndTag "ol"
|
||||
return (OrderedList (start, style, DefaultDelim) items)
|
||||
(_, attribs) <- htmlTag "ol"
|
||||
(start, style) <- option (1, DefaultStyle) $
|
||||
do failIfStrict
|
||||
let sta = fromMaybe "1" $
|
||||
lookup "start" attribs
|
||||
let sty = fromMaybe (fromMaybe "" $
|
||||
lookup "style" attribs) $
|
||||
lookup "class" attribs
|
||||
let sty' = case sty of
|
||||
"lower-roman" -> LowerRoman
|
||||
"upper-roman" -> UpperRoman
|
||||
"lower-alpha" -> LowerAlpha
|
||||
"upper-alpha" -> UpperAlpha
|
||||
"decimal" -> Decimal
|
||||
_ -> DefaultStyle
|
||||
return (read sta, sty')
|
||||
spaces
|
||||
items <- sepEndBy1 (blocksIn "li") spaces
|
||||
htmlEndTag "ol"
|
||||
return $ OrderedList (start, style, DefaultDelim) items
|
||||
|
||||
bulletList = try $ do
|
||||
htmlTag "ul"
|
||||
spaces
|
||||
items <- sepEndBy1 (blocksIn "li") spaces
|
||||
htmlEndTag "ul"
|
||||
return (BulletList items)
|
||||
htmlTag "ul"
|
||||
spaces
|
||||
items <- sepEndBy1 (blocksIn "li") spaces
|
||||
htmlEndTag "ul"
|
||||
return $ BulletList items
|
||||
|
||||
definitionList = try $ do
|
||||
failIfStrict -- def lists not part of standard markdown
|
||||
tag <- htmlTag "dl"
|
||||
spaces
|
||||
items <- sepEndBy1 definitionListItem spaces
|
||||
htmlEndTag "dl"
|
||||
return (DefinitionList items)
|
||||
failIfStrict -- def lists not part of standard markdown
|
||||
tag <- htmlTag "dl"
|
||||
spaces
|
||||
items <- sepEndBy1 definitionListItem spaces
|
||||
htmlEndTag "dl"
|
||||
return $ DefinitionList items
|
||||
|
||||
definitionListItem = try $ do
|
||||
terms <- sepEndBy1 (inlinesIn "dt") spaces
|
||||
defs <- sepEndBy1 (blocksIn "dd") spaces
|
||||
let term = joinWithSep [LineBreak] terms
|
||||
return (term, concat defs)
|
||||
terms <- sepEndBy1 (inlinesIn "dt") spaces
|
||||
defs <- sepEndBy1 (blocksIn "dd") spaces
|
||||
let term = joinWithSep [LineBreak] terms
|
||||
return (term, concat defs)
|
||||
|
||||
--
|
||||
-- paragraph block
|
||||
--
|
||||
|
||||
para = try (do
|
||||
tag <- htmlTag "p"
|
||||
result <- inlinesTilEnd "p"
|
||||
return (Para (normalizeSpaces result)))
|
||||
para = htmlTag "p" >> inlinesTilEnd "p" >>= return . Para . normalizeSpaces
|
||||
|
||||
--
|
||||
-- plain block
|
||||
--
|
||||
|
||||
plain = do
|
||||
result <- many1 inline
|
||||
return (Plain (normalizeSpaces result))
|
||||
plain = many1 inline >>= return . Plain . normalizeSpaces
|
||||
|
||||
--
|
||||
-- inline
|
||||
--
|
||||
|
||||
inline = choice [ text, special ] <?> "inline"
|
||||
inline = choice [ charRef
|
||||
, strong
|
||||
, emph
|
||||
, superscript
|
||||
, subscript
|
||||
, strikeout
|
||||
, spanStrikeout
|
||||
, code
|
||||
, str
|
||||
, linebreak
|
||||
, whitespace
|
||||
, link
|
||||
, image
|
||||
, rawHtmlInline ] <?> "inline"
|
||||
|
||||
text = choice [ entity, strong, emph, superscript, subscript,
|
||||
strikeout, spanStrikeout, code, str,
|
||||
linebreak, whitespace ] <?> "text"
|
||||
|
||||
special = choice [ link, image, rawHtmlInline ] <?>
|
||||
"link, inline html, or image"
|
||||
|
||||
entity = do
|
||||
ent <- characterEntity
|
||||
return $ Str [ent]
|
||||
|
||||
code = try (do
|
||||
code = try $ do
|
||||
htmlTag "code"
|
||||
result <- manyTill anyChar (htmlEndTag "code")
|
||||
-- remove internal line breaks, leading and trailing space,
|
||||
-- and decode entities
|
||||
let result' = decodeEntities $ removeLeadingTrailingSpace $
|
||||
joinWithSep " " $ lines result
|
||||
return (Code result'))
|
||||
-- and decode character references
|
||||
return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $
|
||||
joinWithSep " " $ lines result
|
||||
|
||||
rawHtmlInline = do
|
||||
result <- choice [htmlScript, anyHtmlInlineTag]
|
||||
result <- htmlScript <|> anyHtmlInlineTag
|
||||
state <- getState
|
||||
if stateParseRaw state then return (HtmlInline result) else return (Str "")
|
||||
|
||||
betweenTags tag = try (do
|
||||
htmlTag tag
|
||||
result <- inlinesTilEnd tag
|
||||
return (normalizeSpaces result))
|
||||
betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>=
|
||||
return . normalizeSpaces
|
||||
|
||||
emph = try (do
|
||||
result <- choice [betweenTags "em", betweenTags "it"]
|
||||
return (Emph result))
|
||||
emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph
|
||||
|
||||
superscript = try $ do
|
||||
failIfStrict -- strict markdown has no superscript, so treat as raw HTML
|
||||
result <- betweenTags "sup"
|
||||
return (Superscript result)
|
||||
strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong
|
||||
|
||||
subscript = try $ do
|
||||
failIfStrict -- strict markdown has no subscript, so treat as raw HTML
|
||||
result <- betweenTags "sub"
|
||||
return (Subscript result)
|
||||
superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript
|
||||
|
||||
strikeout = try $ do
|
||||
failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
|
||||
result <- choice [betweenTags "s", betweenTags "strike"]
|
||||
return (Strikeout result)
|
||||
subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript
|
||||
|
||||
strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
|
||||
return . Strikeout
|
||||
|
||||
spanStrikeout = try $ do
|
||||
failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
|
||||
|
@ -474,25 +443,14 @@ spanStrikeout = try $ do
|
|||
result <- case (extractAttribute "class" attributes) of
|
||||
Just "strikeout" -> inlinesTilEnd "span"
|
||||
_ -> fail "not a strikeout"
|
||||
return (Strikeout result)
|
||||
return $ Strikeout result
|
||||
|
||||
strong = try (do
|
||||
result <- choice [betweenTags "b", betweenTags "strong"]
|
||||
return (Strong result))
|
||||
|
||||
whitespace = do
|
||||
many1 space
|
||||
return Space
|
||||
whitespace = many1 space >> return Space
|
||||
|
||||
-- hard line break
|
||||
linebreak = do
|
||||
htmlTag "br"
|
||||
option ' ' newline
|
||||
return LineBreak
|
||||
linebreak = htmlTag "br" >> optional newline >> return LineBreak
|
||||
|
||||
str = do
|
||||
result <- many1 (noneOf "<& \t\n")
|
||||
return (Str result)
|
||||
str = many1 (noneOf "<& \t\n") >>= return . Str
|
||||
|
||||
--
|
||||
-- links and images
|
||||
|
@ -501,27 +459,27 @@ str = do
|
|||
-- extract contents of attribute (attribute names are case-insensitive)
|
||||
extractAttribute name [] = Nothing
|
||||
extractAttribute name ((attrName, contents):rest) =
|
||||
let name' = map toLower name
|
||||
attrName' = map toLower attrName in
|
||||
if (attrName' == name')
|
||||
then Just (decodeEntities contents)
|
||||
else extractAttribute name rest
|
||||
let name' = map toLower name
|
||||
attrName' = map toLower attrName
|
||||
in if attrName' == name'
|
||||
then Just (decodeCharacterReferences contents)
|
||||
else extractAttribute name rest
|
||||
|
||||
link = try $ do
|
||||
(tag, attributes) <- htmlTag "a"
|
||||
url <- case (extractAttribute "href" attributes) of
|
||||
Just url -> do {return url}
|
||||
Just url -> return url
|
||||
Nothing -> fail "no href"
|
||||
let title = fromMaybe "" (extractAttribute "title" attributes)
|
||||
let title = fromMaybe "" $ extractAttribute "title" attributes
|
||||
label <- inlinesTilEnd "a"
|
||||
return $ Link (normalizeSpaces label) (url, title)
|
||||
|
||||
image = try $ do
|
||||
(tag, attributes) <- htmlTag "img"
|
||||
url <- case (extractAttribute "src" attributes) of
|
||||
Just url -> do {return url}
|
||||
Just url -> return url
|
||||
Nothing -> fail "no src"
|
||||
let title = fromMaybe "" (extractAttribute "title" attributes)
|
||||
let title = fromMaybe "" $ extractAttribute "title" attributes
|
||||
let alt = fromMaybe "" (extractAttribute "alt" attributes)
|
||||
return $ Image [Str alt] (url, title)
|
||||
|
||||
|
|
|
@ -34,7 +34,6 @@ module Text.Pandoc.Readers.LaTeX (
|
|||
) where
|
||||
|
||||
import Text.ParserCombinators.Parsec
|
||||
import Text.Pandoc.ParserCombinators
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Data.Maybe ( fromMaybe )
|
||||
|
@ -47,9 +46,6 @@ readLaTeX :: ParserState -- ^ Parser state, including options for parser
|
|||
-> Pandoc
|
||||
readLaTeX = readWith parseLaTeX
|
||||
|
||||
-- for testing
|
||||
testString = testStringWith parseLaTeX
|
||||
|
||||
-- characters with special meaning
|
||||
specialChars = "\\$%&^&_~#{}\n \t|<>'\"-"
|
||||
|
||||
|
@ -58,12 +54,12 @@ specialChars = "\\$%&^&_~#{}\n \t|<>'\"-"
|
|||
--
|
||||
|
||||
-- | Returns text between brackets and its matching pair.
|
||||
bracketedText openB closeB = try (do
|
||||
bracketedText openB closeB = do
|
||||
result <- charsInBalanced' openB closeB
|
||||
return ([openB] ++ result ++ [closeB]))
|
||||
return $ [openB] ++ result ++ [closeB]
|
||||
|
||||
-- | Returns an option or argument of a LaTeX command.
|
||||
optOrArg = choice [ (bracketedText '{' '}'), (bracketedText '[' ']') ]
|
||||
optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']'
|
||||
|
||||
-- | True if the string begins with '{'.
|
||||
isArg ('{':rest) = True
|
||||
|
@ -73,62 +69,55 @@ isArg other = False
|
|||
commandArgs = many optOrArg
|
||||
|
||||
-- | Parses LaTeX command, returns (name, star, list of options or arguments).
|
||||
command = try (do
|
||||
command = try $ do
|
||||
char '\\'
|
||||
name <- many1 alphaNum
|
||||
star <- option "" (string "*") -- some commands have starred versions
|
||||
args <- commandArgs
|
||||
return (name, star, args))
|
||||
return (name, star, args)
|
||||
|
||||
begin name = try (do
|
||||
string "\\begin{"
|
||||
string name
|
||||
char '}'
|
||||
option [] commandArgs
|
||||
begin name = try $ do
|
||||
string $ "\\begin{" ++ name ++ "}"
|
||||
optional commandArgs
|
||||
spaces
|
||||
return name)
|
||||
return name
|
||||
|
||||
end name = try (do
|
||||
string "\\end{"
|
||||
string name
|
||||
char '}'
|
||||
end name = try $ do
|
||||
string $ "\\end{" ++ name ++ "}"
|
||||
spaces
|
||||
return name)
|
||||
return name
|
||||
|
||||
-- | Returns a list of block elements containing the contents of an
|
||||
-- environment.
|
||||
environment name = try (do
|
||||
begin name
|
||||
spaces
|
||||
contents <- manyTill block (end name)
|
||||
return contents)
|
||||
environment name = try $ begin name >> spaces >> manyTill block (end name)
|
||||
|
||||
anyEnvironment = try (do
|
||||
anyEnvironment = try $ do
|
||||
string "\\begin{"
|
||||
name <- many alphaNum
|
||||
star <- option "" (string "*") -- some environments have starred variants
|
||||
char '}'
|
||||
option [] commandArgs
|
||||
optional commandArgs
|
||||
spaces
|
||||
contents <- manyTill block (end (name ++ star))
|
||||
return (BlockQuote contents))
|
||||
return $ BlockQuote contents
|
||||
|
||||
--
|
||||
-- parsing documents
|
||||
--
|
||||
|
||||
-- | Process LaTeX preamble, extracting metadata.
|
||||
processLaTeXPreamble = try (do
|
||||
manyTill (choice [bibliographic, comment, unknownCommand, nullBlock])
|
||||
(try (string "\\begin{document}"))
|
||||
spaces)
|
||||
processLaTeXPreamble = try $ manyTill
|
||||
(choice [bibliographic, comment, unknownCommand, nullBlock])
|
||||
(try (string "\\begin{document}")) >>
|
||||
spaces
|
||||
|
||||
-- | Parse LaTeX and return 'Pandoc'.
|
||||
parseLaTeX = do
|
||||
option () processLaTeXPreamble -- preamble might not be present (fragment)
|
||||
optional processLaTeXPreamble -- preamble might not be present (fragment)
|
||||
spaces
|
||||
blocks <- parseBlocks
|
||||
spaces
|
||||
option "" (try (string "\\end{document}")) -- might not be present (in fragment)
|
||||
optional $ try (string "\\end{document}") -- might not be present (fragment)
|
||||
spaces
|
||||
eof
|
||||
state <- getState
|
||||
|
@ -136,21 +125,27 @@ parseLaTeX = do
|
|||
let title' = stateTitle state
|
||||
let authors' = stateAuthors state
|
||||
let date' = stateDate state
|
||||
return (Pandoc (Meta title' authors' date') blocks')
|
||||
return $ Pandoc (Meta title' authors' date') blocks'
|
||||
|
||||
--
|
||||
-- parsing blocks
|
||||
--
|
||||
|
||||
parseBlocks = do
|
||||
spaces
|
||||
result <- many block
|
||||
return result
|
||||
parseBlocks = spaces >> many block
|
||||
|
||||
block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock,
|
||||
comment, bibliographic, para, specialEnvironment,
|
||||
itemBlock, unknownEnvironment, unknownCommand ] <?>
|
||||
"block"
|
||||
block = choice [ hrule
|
||||
, codeBlock
|
||||
, header
|
||||
, list
|
||||
, blockQuote
|
||||
, mathBlock
|
||||
, comment
|
||||
, bibliographic
|
||||
, para
|
||||
, specialEnvironment
|
||||
, itemBlock
|
||||
, unknownEnvironment
|
||||
, unknownCommand ] <?> "block"
|
||||
|
||||
--
|
||||
-- header blocks
|
||||
|
@ -158,24 +153,21 @@ block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock,
|
|||
|
||||
header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
|
||||
|
||||
headerLevel n = try (do
|
||||
headerLevel n = try $ do
|
||||
let subs = concat $ replicate (n - 1) "sub"
|
||||
string ("\\" ++ subs ++ "section")
|
||||
option ' ' (char '*')
|
||||
optional (char '*')
|
||||
char '{'
|
||||
title <- manyTill inline (char '}')
|
||||
spaces
|
||||
return (Header n (normalizeSpaces title)))
|
||||
return $ Header n (normalizeSpaces title)
|
||||
|
||||
--
|
||||
-- hrule block
|
||||
--
|
||||
|
||||
hrule = try (do
|
||||
oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
|
||||
"\\newpage" ]
|
||||
spaces
|
||||
return HorizontalRule)
|
||||
hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
|
||||
"\\newpage" ] >> spaces >> return HorizontalRule
|
||||
|
||||
--
|
||||
-- code blocks
|
||||
|
@ -183,37 +175,28 @@ hrule = try (do
|
|||
|
||||
codeBlock = codeBlock1 <|> codeBlock2
|
||||
|
||||
codeBlock1 = try (do
|
||||
codeBlock1 = try $ do
|
||||
string "\\begin{verbatim}" -- don't use begin function because it
|
||||
-- gobbles whitespace
|
||||
option "" blanklines -- we want to gobble blank lines, but not
|
||||
optional blanklines -- we want to gobble blank lines, but not
|
||||
-- leading space
|
||||
contents <- manyTill anyChar (try (string "\\end{verbatim}"))
|
||||
spaces
|
||||
return (CodeBlock (stripTrailingNewlines contents)))
|
||||
return $ CodeBlock (stripTrailingNewlines contents)
|
||||
|
||||
codeBlock2 = try (do
|
||||
string "\\begin{Verbatim}" -- used by fancyverb package
|
||||
codeBlock2 = try $ do
|
||||
string "\\begin{Verbatim}" -- used by fancyvrb package
|
||||
option "" blanklines
|
||||
contents <- manyTill anyChar (try (string "\\end{Verbatim}"))
|
||||
spaces
|
||||
return (CodeBlock (stripTrailingNewlines contents)))
|
||||
return $ CodeBlock (stripTrailingNewlines contents)
|
||||
|
||||
--
|
||||
-- block quotes
|
||||
--
|
||||
|
||||
blockQuote = choice [ blockQuote1, blockQuote2 ] <?> "blockquote"
|
||||
|
||||
blockQuote1 = try (do
|
||||
blocks <- environment "quote"
|
||||
spaces
|
||||
return (BlockQuote blocks))
|
||||
|
||||
blockQuote2 = try (do
|
||||
blocks <- environment "quotation"
|
||||
spaces
|
||||
return (BlockQuote blocks))
|
||||
blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>=
|
||||
return . BlockQuote
|
||||
|
||||
--
|
||||
-- math block
|
||||
|
@ -223,12 +206,12 @@ mathBlock = mathBlockWith (begin "equation") (end "equation") <|>
|
|||
mathBlockWith (begin "displaymath") (end "displaymath") <|>
|
||||
mathBlockWith (string "\\[") (string "\\]") <?> "math block"
|
||||
|
||||
mathBlockWith start end = try (do
|
||||
mathBlockWith start end = try $ do
|
||||
start
|
||||
spaces
|
||||
result <- manyTill anyChar end
|
||||
spaces
|
||||
return (BlockQuote [Para [TeX ("$" ++ result ++ "$")]]))
|
||||
return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]]
|
||||
|
||||
--
|
||||
-- list blocks
|
||||
|
@ -237,69 +220,66 @@ mathBlockWith start end = try (do
|
|||
list = bulletList <|> orderedList <|> definitionList <?> "list"
|
||||
|
||||
listItem = try $ do
|
||||
("item", _, args) <- command
|
||||
spaces
|
||||
state <- getState
|
||||
let oldParserContext = stateParserContext state
|
||||
updateState (\state -> state {stateParserContext = ListItemState})
|
||||
blocks <- many block
|
||||
updateState (\state -> state {stateParserContext = oldParserContext})
|
||||
opt <- case args of
|
||||
([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
|
||||
parseFromString (many inline) $ tail $ init x
|
||||
_ -> return []
|
||||
return (opt, blocks)
|
||||
("item", _, args) <- command
|
||||
spaces
|
||||
state <- getState
|
||||
let oldParserContext = stateParserContext state
|
||||
updateState (\state -> state {stateParserContext = ListItemState})
|
||||
blocks <- many block
|
||||
updateState (\state -> state {stateParserContext = oldParserContext})
|
||||
opt <- case args of
|
||||
([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
|
||||
parseFromString (many inline) $ tail $ init x
|
||||
_ -> return []
|
||||
return (opt, blocks)
|
||||
|
||||
orderedList = try $ do
|
||||
string "\\begin{enumerate}"
|
||||
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
|
||||
try $ do failIfStrict
|
||||
char '['
|
||||
res <- anyOrderedListMarker
|
||||
char ']'
|
||||
return res
|
||||
spaces
|
||||
option "" $ try $ do string "\\setlength{\\itemindent}"
|
||||
char '{'
|
||||
manyTill anyChar (char '}')
|
||||
spaces
|
||||
start <- option 1 $ try $ do failIfStrict
|
||||
string "\\setcounter{enum"
|
||||
many1 (char 'i')
|
||||
string "}{"
|
||||
num <- many1 digit
|
||||
char '}'
|
||||
spaces
|
||||
return $ (read num) + 1
|
||||
items <- many listItem
|
||||
end "enumerate"
|
||||
spaces
|
||||
return $ OrderedList (start, style, delim) $ map snd items
|
||||
string "\\begin{enumerate}"
|
||||
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
|
||||
try $ do failIfStrict
|
||||
char '['
|
||||
res <- anyOrderedListMarker
|
||||
char ']'
|
||||
return res
|
||||
spaces
|
||||
option "" $ try $ do string "\\setlength{\\itemindent}"
|
||||
char '{'
|
||||
manyTill anyChar (char '}')
|
||||
spaces
|
||||
start <- option 1 $ try $ do failIfStrict
|
||||
string "\\setcounter{enum"
|
||||
many1 (char 'i')
|
||||
string "}{"
|
||||
num <- many1 digit
|
||||
char '}'
|
||||
spaces
|
||||
return $ (read num) + 1
|
||||
items <- many listItem
|
||||
end "enumerate"
|
||||
spaces
|
||||
return $ OrderedList (start, style, delim) $ map snd items
|
||||
|
||||
bulletList = try $ do
|
||||
begin "itemize"
|
||||
spaces
|
||||
items <- many listItem
|
||||
end "itemize"
|
||||
spaces
|
||||
return (BulletList $ map snd items)
|
||||
begin "itemize"
|
||||
spaces
|
||||
items <- many listItem
|
||||
end "itemize"
|
||||
spaces
|
||||
return (BulletList $ map snd items)
|
||||
|
||||
definitionList = try $ do
|
||||
begin "description"
|
||||
spaces
|
||||
items <- many listItem
|
||||
end "description"
|
||||
spaces
|
||||
return (DefinitionList items)
|
||||
begin "description"
|
||||
spaces
|
||||
items <- many listItem
|
||||
end "description"
|
||||
spaces
|
||||
return (DefinitionList items)
|
||||
|
||||
--
|
||||
-- paragraph block
|
||||
--
|
||||
|
||||
para = try (do
|
||||
result <- many1 inline
|
||||
spaces
|
||||
return (Para (normalizeSpaces result)))
|
||||
para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces
|
||||
|
||||
--
|
||||
-- title authors date
|
||||
|
@ -307,33 +287,30 @@ para = try (do
|
|||
|
||||
bibliographic = choice [ maketitle, title, authors, date ]
|
||||
|
||||
maketitle = try (do
|
||||
string "\\maketitle"
|
||||
spaces
|
||||
return Null)
|
||||
maketitle = try (string "\\maketitle") >> spaces >> return Null
|
||||
|
||||
title = try (do
|
||||
title = try $ do
|
||||
string "\\title{"
|
||||
tit <- manyTill inline (char '}')
|
||||
spaces
|
||||
updateState (\state -> state { stateTitle = tit })
|
||||
return Null)
|
||||
return Null
|
||||
|
||||
authors = try (do
|
||||
authors = try $ do
|
||||
string "\\author{"
|
||||
authors <- manyTill anyChar (char '}')
|
||||
spaces
|
||||
let authors' = map removeLeadingTrailingSpace $ lines $
|
||||
substitute "\\\\" "\n" authors
|
||||
updateState (\state -> state { stateAuthors = authors' })
|
||||
return Null)
|
||||
return Null
|
||||
|
||||
date = try (do
|
||||
date = try $ do
|
||||
string "\\date{"
|
||||
date' <- manyTill anyChar (char '}')
|
||||
spaces
|
||||
updateState (\state -> state { stateDate = date' })
|
||||
return Null)
|
||||
return Null
|
||||
|
||||
--
|
||||
-- item block
|
||||
|
@ -341,14 +318,14 @@ date = try (do
|
|||
--
|
||||
|
||||
-- this forces items to be parsed in different blocks
|
||||
itemBlock = try (do
|
||||
itemBlock = try $ do
|
||||
("item", _, args) <- command
|
||||
state <- getState
|
||||
if (stateParserContext state == ListItemState)
|
||||
then fail "item should be handled by list block"
|
||||
else if null args
|
||||
then return Null
|
||||
else return (Plain [Str (stripFirstAndLast (head args))]))
|
||||
else return $ Plain [Str (stripFirstAndLast (head args))]
|
||||
|
||||
--
|
||||
-- raw LaTeX
|
||||
|
@ -362,77 +339,93 @@ specialEnvironment = do -- these are always parsed as raw
|
|||
-- | Parse any LaTeX environment and return a Para block containing
|
||||
-- the whole literal environment as raw TeX.
|
||||
rawLaTeXEnvironment :: GenParser Char st Block
|
||||
rawLaTeXEnvironment = try (do
|
||||
string "\\begin"
|
||||
char '{'
|
||||
rawLaTeXEnvironment = try $ do
|
||||
string "\\begin{"
|
||||
name <- many1 alphaNum
|
||||
star <- option "" (string "*") -- for starred variants
|
||||
let name' = name ++ star
|
||||
char '}'
|
||||
args <- option [] commandArgs
|
||||
let argStr = concat args
|
||||
contents <- manyTill (choice [(many1 (noneOf "\\")),
|
||||
contents <- manyTill (choice [ (many1 (noneOf "\\")),
|
||||
(do
|
||||
(Para [TeX str]) <- rawLaTeXEnvironment
|
||||
return str),
|
||||
string "\\" ])
|
||||
(end name')
|
||||
spaces
|
||||
return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ argStr ++
|
||||
(concat contents) ++ "\\end{" ++ name' ++ "}")]))
|
||||
return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++
|
||||
concat contents ++ "\\end{" ++ name' ++ "}"]
|
||||
|
||||
unknownEnvironment = try (do
|
||||
unknownEnvironment = try $ do
|
||||
state <- getState
|
||||
result <- if stateParseRaw state -- check whether we should include raw TeX
|
||||
then rawLaTeXEnvironment -- if so, get whole raw environment
|
||||
else anyEnvironment -- otherwise just the contents
|
||||
return result)
|
||||
return result
|
||||
|
||||
unknownCommand = try (do
|
||||
notFollowedBy' $ choice $ map end
|
||||
["itemize", "enumerate", "description", "document"]
|
||||
unknownCommand = try $ do
|
||||
notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
|
||||
"document"]
|
||||
(name, star, args) <- command
|
||||
spaces
|
||||
let argStr = concat args
|
||||
state <- getState
|
||||
if (name == "item") && ((stateParserContext state) == ListItemState)
|
||||
if name == "item" && (stateParserContext state) == ListItemState
|
||||
then fail "should not be parsed as raw"
|
||||
else string ""
|
||||
if stateParseRaw state
|
||||
then return (Plain [TeX ("\\" ++ name ++ star ++ argStr)])
|
||||
else return (Plain [Str (joinWithSep " " args)]))
|
||||
then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)]
|
||||
else return $ Plain [Str (joinWithSep " " args)]
|
||||
|
||||
-- latex comment
|
||||
comment = try (do
|
||||
char '%'
|
||||
result <- manyTill anyChar newline
|
||||
spaces
|
||||
return Null)
|
||||
comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null
|
||||
|
||||
--
|
||||
-- inline
|
||||
--
|
||||
|
||||
inline = choice [ strong, emph, strikeout, superscript, subscript,
|
||||
ref, lab, code, linebreak, spacer,
|
||||
math, ellipses, emDash, enDash, hyphen, quoted, apostrophe,
|
||||
accentedChar, specialChar, specialInline, escapedChar,
|
||||
unescapedChar, str, endline, whitespace ] <?> "inline"
|
||||
|
||||
specialInline = choice [ url, link, image, footnote, rawLaTeXInline ]
|
||||
<?> "link, raw TeX, note, or image"
|
||||
inline = choice [ strong
|
||||
, emph
|
||||
, strikeout
|
||||
, superscript
|
||||
, subscript
|
||||
, ref
|
||||
, lab
|
||||
, code
|
||||
, linebreak
|
||||
, spacer
|
||||
, math
|
||||
, ellipses
|
||||
, emDash
|
||||
, enDash
|
||||
, hyphen
|
||||
, quoted
|
||||
, apostrophe
|
||||
, accentedChar
|
||||
, specialChar
|
||||
, url
|
||||
, link
|
||||
, image
|
||||
, footnote
|
||||
, rawLaTeXInline
|
||||
, escapedChar
|
||||
, unescapedChar
|
||||
, str
|
||||
, endline
|
||||
, whitespace ] <?> "inline"
|
||||
|
||||
accentedChar = normalAccentedChar <|> specialAccentedChar
|
||||
|
||||
normalAccentedChar = try (do
|
||||
normalAccentedChar = try $ do
|
||||
char '\\'
|
||||
accent <- oneOf "'`^\"~"
|
||||
character <- choice [ between (char '{') (char '}') anyChar, anyChar ]
|
||||
character <- (try $ char '{' >> alphaNum >>~ char '}') <|> alphaNum
|
||||
let table = fromMaybe [] $ lookup character accentTable
|
||||
let result = case lookup accent table of
|
||||
Just num -> chr num
|
||||
Nothing -> '?'
|
||||
return (Str [result]))
|
||||
return $ Str [result]
|
||||
|
||||
-- an association list of letters and association list of accents
|
||||
-- and decimal character numbers.
|
||||
|
@ -451,245 +444,179 @@ accentTable =
|
|||
('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
|
||||
|
||||
specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
|
||||
oslash, pound, euro, copyright, sect ]
|
||||
oslash, pound, euro, copyright, sect ]
|
||||
|
||||
ccedil = try (do
|
||||
ccedil = try $ do
|
||||
char '\\'
|
||||
letter <- oneOfStrings ["cc", "cC"]
|
||||
let num = if letter == "cc" then 231 else 199
|
||||
return (Str [chr num]))
|
||||
return $ Str [chr num]
|
||||
|
||||
aring = try (do
|
||||
aring = try $ do
|
||||
char '\\'
|
||||
letter <- oneOfStrings ["aa", "AA"]
|
||||
let num = if letter == "aa" then 229 else 197
|
||||
return (Str [chr num]))
|
||||
return $ Str [chr num]
|
||||
|
||||
iuml = try (do
|
||||
string "\\\""
|
||||
oneOfStrings ["\\i", "{\\i}"]
|
||||
return (Str [chr 239]))
|
||||
iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >>
|
||||
return (Str [chr 239])
|
||||
|
||||
icirc = try (do
|
||||
string "\\^"
|
||||
oneOfStrings ["\\i", "{\\i}"]
|
||||
return (Str [chr 238]))
|
||||
icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >>
|
||||
return (Str [chr 238])
|
||||
|
||||
szlig = try (do
|
||||
string "\\ss"
|
||||
return (Str [chr 223]))
|
||||
szlig = try (string "\\ss") >> return (Str [chr 223])
|
||||
|
||||
oslash = try (do
|
||||
oslash = try $ do
|
||||
char '\\'
|
||||
letter <- choice [char 'o', char 'O']
|
||||
let num = if letter == 'o' then 248 else 216
|
||||
return (Str [chr num]))
|
||||
return $ Str [chr num]
|
||||
|
||||
aelig = try (do
|
||||
aelig = try $ do
|
||||
char '\\'
|
||||
letter <- oneOfStrings ["ae", "AE"]
|
||||
let num = if letter == "ae" then 230 else 198
|
||||
return (Str [chr num]))
|
||||
return $ Str [chr num]
|
||||
|
||||
pound = try (do
|
||||
string "\\pounds"
|
||||
return (Str [chr 163]))
|
||||
pound = try (string "\\pounds") >> return (Str [chr 163])
|
||||
|
||||
euro = try (do
|
||||
string "\\euro"
|
||||
return (Str [chr 8364]))
|
||||
euro = try (string "\\euro") >> return (Str [chr 8364])
|
||||
|
||||
copyright = try (do
|
||||
string "\\copyright"
|
||||
return (Str [chr 169]))
|
||||
copyright = try (string "\\copyright") >> return (Str [chr 169])
|
||||
|
||||
sect = try (do
|
||||
string "\\S"
|
||||
return (Str [chr 167]))
|
||||
sect = try (string "\\S") >> return (Str [chr 167])
|
||||
|
||||
escapedChar = do
|
||||
result <- escaped (oneOf " $%&_#{}\n")
|
||||
return (if result == Str "\n" then Str " " else result)
|
||||
return $ if result == Str "\n" then Str " " else result
|
||||
|
||||
unescapedChar = do -- ignore standalone, nonescaped special characters
|
||||
oneOf "$^&_#{}|<>"
|
||||
return (Str "")
|
||||
-- ignore standalone, nonescaped special characters
|
||||
unescapedChar = oneOf "$^&_#{}|<>" >> return (Str "")
|
||||
|
||||
specialChar = choice [ backslash, tilde, caret, bar, lt, gt ]
|
||||
|
||||
backslash = try (do
|
||||
string "\\textbackslash"
|
||||
return (Str "\\"))
|
||||
backslash = try (string "\\textbackslash") >> return (Str "\\")
|
||||
|
||||
tilde = try (do
|
||||
string "\\ensuremath{\\sim}"
|
||||
return (Str "~"))
|
||||
tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
|
||||
|
||||
caret = try (do
|
||||
string "\\^{}"
|
||||
return (Str "^"))
|
||||
caret = try (string "\\^{}") >> return (Str "^")
|
||||
|
||||
bar = try (do
|
||||
string "\\textbar"
|
||||
return (Str "\\"))
|
||||
bar = try (string "\\textbar") >> return (Str "\\")
|
||||
|
||||
lt = try (do
|
||||
string "\\textless"
|
||||
return (Str "<"))
|
||||
lt = try (string "\\textless") >> return (Str "<")
|
||||
|
||||
gt = try (do
|
||||
string "\\textgreater"
|
||||
return (Str ">"))
|
||||
gt = try (string "\\textgreater") >> return (Str ">")
|
||||
|
||||
code = try (do
|
||||
code = try $ do
|
||||
string "\\verb"
|
||||
marker <- anyChar
|
||||
result <- manyTill anyChar (char marker)
|
||||
let result' = removeLeadingTrailingSpace result
|
||||
return (Code result'))
|
||||
return $ Code $ removeLeadingTrailingSpace result
|
||||
|
||||
emph = try (do
|
||||
oneOfStrings [ "\\emph{", "\\textit{" ]
|
||||
result <- manyTill inline (char '}')
|
||||
return (Emph result))
|
||||
emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
|
||||
manyTill inline (char '}') >>= return . Emph
|
||||
|
||||
strikeout = try $ do
|
||||
string "\\sout{"
|
||||
result <- manyTill inline (char '}')
|
||||
return (Strikeout result)
|
||||
strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
|
||||
return . Strikeout
|
||||
|
||||
superscript = try $ do
|
||||
string "\\textsuperscript{"
|
||||
result <- manyTill inline (char '}')
|
||||
return (Superscript result)
|
||||
superscript = try $ string "\\textsuperscript{" >>
|
||||
manyTill inline (char '}') >>= return . Superscript
|
||||
|
||||
-- note: \textsubscript isn't a standard latex command, but we use
|
||||
-- a defined version in pandoc.
|
||||
subscript = try $ do
|
||||
string "\\textsubscript{"
|
||||
result <- manyTill inline (char '}')
|
||||
return (Subscript result)
|
||||
subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
|
||||
return . Subscript
|
||||
|
||||
apostrophe = do
|
||||
char '\''
|
||||
return Apostrophe
|
||||
apostrophe = char '\'' >> return Apostrophe
|
||||
|
||||
quoted = do
|
||||
doubleQuoted <|> singleQuoted
|
||||
quoted = doubleQuoted <|> singleQuoted
|
||||
|
||||
singleQuoted = try (do
|
||||
result <- enclosed singleQuoteStart singleQuoteEnd inline
|
||||
return $ Quoted SingleQuote $ normalizeSpaces result)
|
||||
singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
|
||||
return . Quoted SingleQuote . normalizeSpaces
|
||||
|
||||
doubleQuoted = try (do
|
||||
result <- enclosed doubleQuoteStart doubleQuoteEnd inline
|
||||
return $ Quoted DoubleQuote $ normalizeSpaces result)
|
||||
doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
|
||||
return . Quoted DoubleQuote . normalizeSpaces
|
||||
|
||||
singleQuoteStart = char '`'
|
||||
|
||||
singleQuoteEnd = char '\'' >> notFollowedBy alphaNum
|
||||
singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
|
||||
|
||||
doubleQuoteStart = string "``"
|
||||
|
||||
doubleQuoteEnd = string "''"
|
||||
|
||||
ellipses = try (do
|
||||
string "\\ldots"
|
||||
option "" (try (string "{}"))
|
||||
return Ellipses)
|
||||
ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >>
|
||||
return Ellipses
|
||||
|
||||
enDash = try (do
|
||||
string "--"
|
||||
notFollowedBy (char '-')
|
||||
return EnDash)
|
||||
enDash = try (string "--") >> return EnDash
|
||||
|
||||
emDash = try (do
|
||||
string "---"
|
||||
return EmDash)
|
||||
emDash = try (string "---") >> return EmDash
|
||||
|
||||
hyphen = do
|
||||
char '-'
|
||||
return (Str "-")
|
||||
hyphen = char '-' >> return (Str "-")
|
||||
|
||||
lab = try (do
|
||||
lab = try $ do
|
||||
string "\\label{"
|
||||
result <- manyTill anyChar (char '}')
|
||||
return (Str ("(" ++ result ++ ")")))
|
||||
return $ Str $ "(" ++ result ++ ")"
|
||||
|
||||
ref = try (do
|
||||
string "\\ref{"
|
||||
result <- manyTill anyChar (char '}')
|
||||
return (Str (result)))
|
||||
ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str
|
||||
|
||||
strong = try (do
|
||||
string "\\textbf{"
|
||||
result <- manyTill inline (char '}')
|
||||
return (Strong result))
|
||||
strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
|
||||
return . Strong
|
||||
|
||||
whitespace = do
|
||||
many1 (oneOf "~ \t")
|
||||
return Space
|
||||
whitespace = many1 (oneOf "~ \t") >> return Space
|
||||
|
||||
-- hard line break
|
||||
linebreak = try (do
|
||||
string "\\\\"
|
||||
return LineBreak)
|
||||
linebreak = try (string "\\\\") >> return LineBreak
|
||||
|
||||
spacer = try $ do
|
||||
string "\\,"
|
||||
return (Str "")
|
||||
spacer = try (string "\\,") >> return (Str "")
|
||||
|
||||
str = do
|
||||
result <- many1 (noneOf specialChars)
|
||||
return (Str result)
|
||||
str = many1 (noneOf specialChars) >>= return . Str
|
||||
|
||||
-- endline internal to paragraph
|
||||
endline = try (do
|
||||
newline
|
||||
notFollowedBy blankline
|
||||
return Space)
|
||||
endline = try $ newline >> notFollowedBy blankline >> return Space
|
||||
|
||||
-- math
|
||||
math = math1 <|> math2 <?> "math"
|
||||
|
||||
math1 = try (do
|
||||
math1 = try $ do
|
||||
char '$'
|
||||
result <- many (noneOf "$")
|
||||
char '$'
|
||||
return (TeX ("$" ++ result ++ "$")))
|
||||
return $ TeX ("$" ++ result ++ "$")
|
||||
|
||||
math2 = try (do
|
||||
math2 = try $ do
|
||||
string "\\("
|
||||
result <- many (noneOf "$")
|
||||
string "\\)"
|
||||
return (TeX ("$" ++ result ++ "$")))
|
||||
return $ TeX ("$" ++ result ++ "$")
|
||||
|
||||
--
|
||||
-- links and images
|
||||
--
|
||||
|
||||
url = try (do
|
||||
url = try $ do
|
||||
string "\\url"
|
||||
url <- charsInBalanced '{' '}'
|
||||
return (Link [Code url] (url, "")))
|
||||
return $ Link [Code url] (url, "")
|
||||
|
||||
link = try (do
|
||||
link = try $ do
|
||||
string "\\href{"
|
||||
url <- manyTill anyChar (char '}')
|
||||
char '{'
|
||||
label <- manyTill inline (char '}')
|
||||
return (Link (normalizeSpaces label) (url, "")))
|
||||
return $ Link (normalizeSpaces label) (url, "")
|
||||
|
||||
image = try (do
|
||||
image = try $ do
|
||||
("includegraphics", _, args) <- command
|
||||
let args' = filter isArg args -- filter out options
|
||||
let src = if null args' then
|
||||
("", "")
|
||||
else
|
||||
(stripFirstAndLast (head args'), "")
|
||||
return (Image [Str "image"] src))
|
||||
return $ Image [Str "image"] src
|
||||
|
||||
footnote = try (do
|
||||
footnote = try $ do
|
||||
(name, _, (contents:[])) <- command
|
||||
if ((name == "footnote") || (name == "thanks"))
|
||||
then string ""
|
||||
|
@ -700,16 +627,15 @@ footnote = try (do
|
|||
setInput $ contents'
|
||||
blocks <- parseBlocks
|
||||
setInput rest
|
||||
return (Note blocks))
|
||||
return $ Note blocks
|
||||
|
||||
-- | Parse any LaTeX command and return it in a raw TeX inline element.
|
||||
rawLaTeXInline :: GenParser Char ParserState Inline
|
||||
rawLaTeXInline = try (do
|
||||
rawLaTeXInline = try $ do
|
||||
(name, star, args) <- command
|
||||
let argStr = concat args
|
||||
state <- getState
|
||||
if ((name == "begin") || (name == "end") || (name == "item"))
|
||||
then fail "not an inline command"
|
||||
else string ""
|
||||
return (TeX ("\\" ++ name ++ star ++ argStr)))
|
||||
return $ TeX ("\\" ++ name ++ star ++ concat args)
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -31,23 +31,14 @@ module Text.Pandoc.Readers.RST (
|
|||
readRST
|
||||
) where
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.ParserCombinators
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag )
|
||||
import Text.Regex ( matchRegex, mkRegex )
|
||||
import Text.ParserCombinators.Parsec
|
||||
import Data.Maybe ( fromMaybe )
|
||||
import Data.List ( findIndex, delete )
|
||||
import Data.Char ( toUpper )
|
||||
|
||||
-- | Parse reStructuredText string and return Pandoc document.
|
||||
readRST :: ParserState -> String -> Pandoc
|
||||
readRST state str = (readWith parseRST) state (str ++ "\n\n")
|
||||
|
||||
-- | Parse a string and print result (for testing).
|
||||
testString :: String -> IO ()
|
||||
testString = testStringWith parseRST
|
||||
|
||||
--
|
||||
-- Constants and data structure definitions
|
||||
---
|
||||
|
@ -62,15 +53,11 @@ specialChars = "\\`|*_<>$:[-"
|
|||
-- parsing documents
|
||||
--
|
||||
|
||||
isAnonKey (ref, src) = (ref == [Str "_"])
|
||||
isAnonKey (ref, src) = ref == [Str "_"]
|
||||
|
||||
isHeader1 :: Block -> Bool
|
||||
isHeader1 (Header 1 _) = True
|
||||
isHeader1 _ = False
|
||||
|
||||
isHeader2 :: Block -> Bool
|
||||
isHeader2 (Header 2 _) = True
|
||||
isHeader2 _ = False
|
||||
isHeader :: Int -> Block -> Bool
|
||||
isHeader n (Header x _) = x == n
|
||||
isHeader _ _ = False
|
||||
|
||||
-- | Promote all headers in a list of blocks. (Part of
|
||||
-- title transformation for RST.)
|
||||
|
@ -86,23 +73,23 @@ promoteHeaders num [] = []
|
|||
titleTransform :: [Block] -- ^ list of blocks
|
||||
-> ([Block], [Inline]) -- ^ modified list of blocks, title
|
||||
titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle
|
||||
if (any isHeader1 rest) || (any isHeader2 rest)
|
||||
if (any (isHeader 1) rest) || (any (isHeader 2) rest)
|
||||
then ((Header 1 head1):(Header 2 head2):rest, [])
|
||||
else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
|
||||
titleTransform ((Header 1 head1):rest) = -- title, no subtitle
|
||||
if (any isHeader1 rest)
|
||||
if (any (isHeader 1) rest)
|
||||
then ((Header 1 head1):rest, [])
|
||||
else ((promoteHeaders 1 rest), head1)
|
||||
titleTransform blocks = (blocks, [])
|
||||
|
||||
parseRST = do
|
||||
-- first pass: get anonymous keys
|
||||
refs <- manyTill (referenceKey <|> (do l <- lineClump
|
||||
return (LineClump l))) eof
|
||||
-- first pass: get keys
|
||||
refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof
|
||||
let keys = map (\(KeyBlock label target) -> (label, target)) $
|
||||
filter isKeyBlock refs
|
||||
-- second pass, with keys stripped out
|
||||
let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
|
||||
setInput $ concat rawlines -- with keys stripped out
|
||||
setInput $ concat rawlines
|
||||
updateState (\state -> state { stateKeys = keys })
|
||||
blocks <- parseBlocks
|
||||
let blocks' = filter (/= Null) blocks
|
||||
|
@ -113,7 +100,7 @@ parseRST = do
|
|||
let authors = stateAuthors state
|
||||
let date = stateDate state
|
||||
let title' = if (null title) then (stateTitle state) else title
|
||||
return (Pandoc (Meta title' authors date) blocks'')
|
||||
return $ Pandoc (Meta title' authors date) blocks''
|
||||
|
||||
--
|
||||
-- parsing blocks
|
||||
|
@ -121,32 +108,39 @@ parseRST = do
|
|||
|
||||
parseBlocks = manyTill block eof
|
||||
|
||||
block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote,
|
||||
imageBlock, unknownDirective, header,
|
||||
hrule, list, fieldList, lineBlock, para, plain,
|
||||
nullBlock ] <?> "block"
|
||||
block = choice [ codeBlock
|
||||
, rawHtmlBlock
|
||||
, rawLaTeXBlock
|
||||
, blockQuote
|
||||
, imageBlock
|
||||
, unknownDirective
|
||||
, header
|
||||
, hrule
|
||||
, list
|
||||
, fieldList
|
||||
, lineBlock
|
||||
, para
|
||||
, plain
|
||||
, nullBlock ] <?> "block"
|
||||
|
||||
--
|
||||
-- field list
|
||||
--
|
||||
|
||||
fieldListItem = try (do
|
||||
fieldListItem = try $ do
|
||||
char ':'
|
||||
name <- many1 alphaNum
|
||||
string ": "
|
||||
skipSpaces
|
||||
first <- manyTill anyChar newline
|
||||
rest <- many (do
|
||||
notFollowedBy (char ':')
|
||||
notFollowedBy blankline
|
||||
skipSpaces
|
||||
manyTill anyChar newline )
|
||||
return (name, (joinWithSep " " (first:rest))))
|
||||
rest <- many (notFollowedBy ((char ':') <|> blankline) >>
|
||||
skipSpaces >> manyTill anyChar newline)
|
||||
return $ (name, (joinWithSep " " (first:rest)))
|
||||
|
||||
fieldList = try (do
|
||||
fieldList = try $ do
|
||||
items <- many1 fieldListItem
|
||||
blanklines
|
||||
let authors = case (lookup "Authors" items) of
|
||||
let authors = case lookup "Authors" items of
|
||||
Just auth -> [auth]
|
||||
Nothing -> map snd (filter (\(x,y) -> x == "Author") items)
|
||||
let date = case (lookup "Date" items) of
|
||||
|
@ -162,82 +156,74 @@ fieldList = try (do
|
|||
updateState (\st -> st { stateAuthors = authors,
|
||||
stateDate = date,
|
||||
stateTitle = title })
|
||||
return (BlockQuote result))
|
||||
return $ BlockQuote result
|
||||
|
||||
--
|
||||
-- line block
|
||||
--
|
||||
|
||||
lineBlockLine = try (do
|
||||
lineBlockLine = try $ do
|
||||
string "| "
|
||||
white <- many (oneOf " \t")
|
||||
line <- manyTill inline newline
|
||||
let line' = (if null white then [] else [Str white]) ++ line ++ [LineBreak]
|
||||
return line')
|
||||
return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
|
||||
|
||||
lineBlock = try (do
|
||||
lineBlock = try $ do
|
||||
lines <- many1 lineBlockLine
|
||||
blanklines
|
||||
return $ Para (concat lines))
|
||||
return $ Para (concat lines)
|
||||
|
||||
--
|
||||
-- paragraph block
|
||||
--
|
||||
|
||||
para = choice [ paraBeforeCodeBlock, paraNormal ] <?> "paragraph"
|
||||
para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
|
||||
|
||||
codeBlockStart = try (do
|
||||
string "::"
|
||||
blankline
|
||||
blankline)
|
||||
codeBlockStart = try $ string "::" >> blankline >> blankline
|
||||
|
||||
-- paragraph that ends in a :: starting a code block
|
||||
paraBeforeCodeBlock = try (do
|
||||
result <- many1 (do {notFollowedBy' codeBlockStart; inline})
|
||||
paraBeforeCodeBlock = try $ do
|
||||
result <- many1 (notFollowedBy' codeBlockStart >> inline)
|
||||
lookAhead (string "::")
|
||||
return (Para (if (last result == Space)
|
||||
then normalizeSpaces result
|
||||
else (normalizeSpaces result) ++ [Str ":"])))
|
||||
return $ Para $ if last result == Space
|
||||
then normalizeSpaces result
|
||||
else (normalizeSpaces result) ++ [Str ":"]
|
||||
|
||||
-- regular paragraph
|
||||
paraNormal = try (do
|
||||
paraNormal = try $ do
|
||||
result <- many1 inline
|
||||
newline
|
||||
blanklines
|
||||
let result' = normalizeSpaces result
|
||||
return (Para result'))
|
||||
return $ Para $ normalizeSpaces result
|
||||
|
||||
plain = do
|
||||
result <- many1 inline
|
||||
let result' = normalizeSpaces result
|
||||
return (Plain result')
|
||||
plain = many1 inline >>= return . Plain . normalizeSpaces
|
||||
|
||||
--
|
||||
-- image block
|
||||
--
|
||||
|
||||
imageBlock = try (do
|
||||
imageBlock = try $ do
|
||||
string ".. image:: "
|
||||
src <- manyTill anyChar newline
|
||||
return (Plain [Image [Str "image"] (src, "")]))
|
||||
return $ Plain [Image [Str "image"] (src, "")]
|
||||
|
||||
--
|
||||
-- header blocks
|
||||
--
|
||||
|
||||
header = choice [ doubleHeader, singleHeader ] <?> "header"
|
||||
header = doubleHeader <|> singleHeader <?> "header"
|
||||
|
||||
-- a header with lines on top and bottom
|
||||
doubleHeader = try (do
|
||||
doubleHeader = try $ do
|
||||
c <- oneOf underlineChars
|
||||
rest <- many (char c) -- the top line
|
||||
let lenTop = length (c:rest)
|
||||
skipSpaces
|
||||
newline
|
||||
txt <- many1 (do {notFollowedBy blankline; inline})
|
||||
pos <- getPosition
|
||||
txt <- many1 (notFollowedBy blankline >> inline)
|
||||
pos <- getPosition
|
||||
let len = (sourceColumn pos) - 1
|
||||
if (len > lenTop) then fail "title longer than border" else (do {return ()})
|
||||
if (len > lenTop) then fail "title longer than border" else return ()
|
||||
blankline -- spaces and newline
|
||||
count lenTop (char c) -- the bottom line
|
||||
blanklines
|
||||
|
@ -249,10 +235,10 @@ doubleHeader = try (do
|
|||
Just ind -> (headerTable, ind + 1)
|
||||
Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
|
||||
setState (state { stateHeaderTable = headerTable' })
|
||||
return (Header level (normalizeSpaces txt)))
|
||||
return $ Header level (normalizeSpaces txt)
|
||||
|
||||
-- a header with line on the bottom only
|
||||
singleHeader = try (do
|
||||
singleHeader = try $ do
|
||||
notFollowedBy' whitespace
|
||||
txt <- many1 (do {notFollowedBy blankline; inline})
|
||||
pos <- getPosition
|
||||
|
@ -268,19 +254,19 @@ singleHeader = try (do
|
|||
Just ind -> (headerTable, ind + 1)
|
||||
Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
|
||||
setState (state { stateHeaderTable = headerTable' })
|
||||
return (Header level (normalizeSpaces txt)))
|
||||
return $ Header level (normalizeSpaces txt)
|
||||
|
||||
--
|
||||
-- hrule block
|
||||
--
|
||||
|
||||
hruleWith chr = try (do
|
||||
hruleWith chr = try $ do
|
||||
count 4 (char chr)
|
||||
skipMany (char chr)
|
||||
skipSpaces
|
||||
newline
|
||||
blanklines
|
||||
return HorizontalRule)
|
||||
return HorizontalRule
|
||||
|
||||
hrule = choice (map hruleWith underlineChars) <?> "hrule"
|
||||
|
||||
|
@ -289,15 +275,16 @@ hrule = choice (map hruleWith underlineChars) <?> "hrule"
|
|||
--
|
||||
|
||||
-- read a line indented by a given string
|
||||
indentedLine indents = try (do
|
||||
indentedLine indents = try $ do
|
||||
string indents
|
||||
result <- manyTill anyChar newline
|
||||
return (result ++ "\n"))
|
||||
return $ result ++ "\n"
|
||||
|
||||
-- two or more indented lines, possibly separated by blank lines
|
||||
-- if variable = True, then any indent will work, but it must be consistent through the block
|
||||
-- if variable = False, indent should be one tab or equivalent in spaces
|
||||
indentedBlock variable = try (do
|
||||
-- two or more indented lines, possibly separated by blank lines.
|
||||
-- if variable = True, then any indent will work, but it must be
|
||||
-- consistent through the block.
|
||||
-- if variable = False, indent should be one tab or equivalent in spaces.
|
||||
indentedBlock variable = try $ do
|
||||
state <- getState
|
||||
let tabStop = stateTabStop state
|
||||
indents <- if variable
|
||||
|
@ -305,51 +292,47 @@ indentedBlock variable = try (do
|
|||
else oneOfStrings ["\t", (replicate tabStop ' ')]
|
||||
firstline <- manyTill anyChar newline
|
||||
rest <- many (choice [ indentedLine indents,
|
||||
try (do
|
||||
b <- blanklines
|
||||
l <- indentedLine indents
|
||||
return (b ++ l))])
|
||||
option "" blanklines
|
||||
return (firstline ++ "\n" ++ (concat rest)))
|
||||
try (do b <- blanklines
|
||||
l <- indentedLine indents
|
||||
return (b ++ l))])
|
||||
optional blanklines
|
||||
return $ firstline ++ "\n" ++ concat rest
|
||||
|
||||
codeBlock = try (do
|
||||
codeBlock = try $ do
|
||||
codeBlockStart
|
||||
result <- indentedBlock False
|
||||
-- the False means we want one tab stop indent on each line
|
||||
return (CodeBlock (stripTrailingNewlines result)))
|
||||
return $ CodeBlock $ stripTrailingNewlines result
|
||||
|
||||
--
|
||||
-- raw html
|
||||
--
|
||||
|
||||
rawHtmlBlock = try (do
|
||||
string ".. raw:: html"
|
||||
blanklines
|
||||
result <- indentedBlock True
|
||||
return (RawHtml result))
|
||||
rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
|
||||
indentedBlock True >>= return . RawHtml
|
||||
|
||||
--
|
||||
-- raw latex
|
||||
--
|
||||
|
||||
rawLaTeXBlock = try (do
|
||||
rawLaTeXBlock = try $ do
|
||||
string ".. raw:: latex"
|
||||
blanklines
|
||||
result <- indentedBlock True
|
||||
return (Para [(TeX result)]))
|
||||
return $ Para [(TeX result)]
|
||||
|
||||
--
|
||||
-- block quotes
|
||||
--
|
||||
|
||||
blockQuote = try (do
|
||||
blockQuote = try $ do
|
||||
raw <- indentedBlock True
|
||||
-- parse the extracted block, which may contain various block elements:
|
||||
rest <- getInput
|
||||
setInput $ raw ++ "\n\n"
|
||||
contents <- parseBlocks
|
||||
setInput rest
|
||||
return (BlockQuote contents))
|
||||
return $ BlockQuote contents
|
||||
|
||||
--
|
||||
-- list blocks
|
||||
|
@ -369,15 +352,14 @@ definitionListItem = try $ do
|
|||
|
||||
definitionList = try $ do
|
||||
items <- many1 definitionListItem
|
||||
return (DefinitionList items)
|
||||
return $ DefinitionList items
|
||||
|
||||
-- parses bullet list start and returns its length (inc. following whitespace)
|
||||
bulletListStart = try (do
|
||||
bulletListStart = try $ do
|
||||
notFollowedBy' hrule -- because hrules start out just like lists
|
||||
marker <- oneOf bulletListMarkers
|
||||
white <- many1 spaceChar
|
||||
let len = length (marker:white)
|
||||
return len)
|
||||
return $ length (marker:white)
|
||||
|
||||
-- parses ordered list start and returns its length (inc following whitespace)
|
||||
orderedListStart style delim = try $ do
|
||||
|
@ -386,11 +368,11 @@ orderedListStart style delim = try $ do
|
|||
return $ markerLen + length white
|
||||
|
||||
-- parse a line of a list item
|
||||
listLine markerLength = try (do
|
||||
listLine markerLength = try $ do
|
||||
notFollowedBy blankline
|
||||
indentWith markerLength
|
||||
line <- manyTill anyChar newline
|
||||
return (line ++ "\n"))
|
||||
return $ line ++ "\n"
|
||||
|
||||
-- indent by specified number of spaces (or equiv. tabs)
|
||||
indentWith num = do
|
||||
|
@ -399,7 +381,7 @@ indentWith num = do
|
|||
if (num < tabStop)
|
||||
then count num (char ' ')
|
||||
else choice [ try (count num (char ' ')),
|
||||
(try (do {char '\t'; count (num - tabStop) (char ' ')})) ]
|
||||
(try (char '\t' >> count (num - tabStop) (char ' '))) ]
|
||||
|
||||
-- parse raw text for one list item, excluding start marker and continuations
|
||||
rawListItem start = try $ do
|
||||
|
@ -411,19 +393,16 @@ rawListItem start = try $ do
|
|||
-- continuation of a list item - indented and separated by blankline or
|
||||
-- (in compact lists) endline.
|
||||
-- Note: nested lists are parsed as continuations.
|
||||
listContinuation markerLength = try (do
|
||||
listContinuation markerLength = try $ do
|
||||
blanks <- many1 blankline
|
||||
result <- many1 (listLine markerLength)
|
||||
return (blanks ++ (concat result)))
|
||||
return $ blanks ++ concat result
|
||||
|
||||
listItem start = try (do
|
||||
listItem start = try $ do
|
||||
(markerLength, first) <- rawListItem start
|
||||
rest <- many (listContinuation markerLength)
|
||||
blanks <- choice [ try (do
|
||||
b <- many blankline
|
||||
lookAhead start
|
||||
return b),
|
||||
many1 blankline ] -- whole list must end with blank
|
||||
blanks <- choice [ try (many blankline >>~ lookAhead start),
|
||||
many1 blankline ] -- whole list must end with blank.
|
||||
-- parsing with ListItemState forces markers at beginning of lines to
|
||||
-- count as list item markers, even if not separated by blank space.
|
||||
-- see definition of "endline"
|
||||
|
@ -436,52 +415,44 @@ listItem start = try (do
|
|||
parsed <- parseBlocks
|
||||
setInput remaining
|
||||
updateState (\st -> st {stateParserContext = oldContext})
|
||||
return parsed)
|
||||
return parsed
|
||||
|
||||
orderedList = try $ do
|
||||
(start, style, delim) <- lookAhead anyOrderedListMarker
|
||||
items <- many1 (listItem (orderedListStart style delim))
|
||||
let items' = compactify items
|
||||
return (OrderedList (start, style, delim) items')
|
||||
return $ OrderedList (start, style, delim) items'
|
||||
|
||||
bulletList = try (do
|
||||
bulletList = try $ do
|
||||
items <- many1 (listItem bulletListStart)
|
||||
let items' = compactify items
|
||||
return (BulletList items'))
|
||||
return $ BulletList items'
|
||||
|
||||
--
|
||||
-- unknown directive (e.g. comment)
|
||||
--
|
||||
|
||||
unknownDirective = try (do
|
||||
unknownDirective = try $ do
|
||||
string ".. "
|
||||
manyTill anyChar newline
|
||||
many (do
|
||||
string " "
|
||||
char ':'
|
||||
many1 (noneOf "\n:")
|
||||
char ':'
|
||||
many1 (noneOf "\n")
|
||||
newline)
|
||||
option "" blanklines
|
||||
return Null)
|
||||
many (string " :" >> many1 (noneOf "\n:") >> char ':' >>
|
||||
many1 (noneOf "\n") >> newline)
|
||||
optional blanklines
|
||||
return Null
|
||||
|
||||
--
|
||||
-- reference key
|
||||
--
|
||||
|
||||
referenceKey = do
|
||||
result <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
|
||||
option "" blanklines
|
||||
return result
|
||||
referenceKey =
|
||||
choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] >>~
|
||||
optional blanklines
|
||||
|
||||
targetURI = try $ do
|
||||
skipSpaces
|
||||
option ' ' newline
|
||||
contents <- many1 (try (do many spaceChar
|
||||
newline
|
||||
many1 spaceChar
|
||||
noneOf " \t\n") <|> noneOf "\n")
|
||||
optional newline
|
||||
contents <- many1 (try (many spaceChar >> newline >>
|
||||
many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
|
||||
blanklines
|
||||
return contents
|
||||
|
||||
|
@ -516,71 +487,73 @@ regularKey = try $ do
|
|||
-- inline
|
||||
--
|
||||
|
||||
inline = choice [ superscript, subscript,
|
||||
escapedChar, link, image, hyphens, strong, emph, code,
|
||||
str, tabchar, whitespace, endline, symbol ] <?> "inline"
|
||||
inline = choice [ superscript
|
||||
, subscript
|
||||
, escapedChar
|
||||
, link
|
||||
, image
|
||||
, hyphens
|
||||
, strong
|
||||
, emph
|
||||
, code
|
||||
, str
|
||||
, tabchar
|
||||
, whitespace
|
||||
, endline
|
||||
, symbol ] <?> "inline"
|
||||
|
||||
hyphens = try (do
|
||||
hyphens = try $ do
|
||||
result <- many1 (char '-')
|
||||
option Space endline
|
||||
-- don't want to treat endline after hyphen or dash as a space
|
||||
return (Str result))
|
||||
return $ Str result
|
||||
|
||||
escapedChar = escaped anyChar
|
||||
|
||||
symbol = do
|
||||
result <- oneOf specialChars
|
||||
return (Str [result])
|
||||
return $ Str [result]
|
||||
|
||||
-- parses inline code, between codeStart and codeEnd
|
||||
code = try (do
|
||||
code = try $ do
|
||||
string "``"
|
||||
result <- manyTill anyChar (try (string "``"))
|
||||
let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
|
||||
return (Code result'))
|
||||
return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
|
||||
|
||||
emph = do
|
||||
result <- enclosed (char '*') (char '*') inline
|
||||
return (Emph (normalizeSpaces result))
|
||||
emph = enclosed (char '*') (char '*') inline >>=
|
||||
return . Emph . normalizeSpaces
|
||||
|
||||
strong = do
|
||||
result <- enclosed (string "**") (string "**") inline
|
||||
return (Strong (normalizeSpaces result))
|
||||
strong = enclosed (string "**") (string "**") inline >>=
|
||||
return . Strong . normalizeSpaces
|
||||
|
||||
interpreted role = try $ do
|
||||
option "" (try $ string "\\ ")
|
||||
optional $ try $ string "\\ "
|
||||
result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
|
||||
nextChar <- lookAhead anyChar
|
||||
try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
|
||||
return [Str result]
|
||||
|
||||
superscript = interpreted "sup" >>= (return . Superscript)
|
||||
|
||||
subscript = interpreted "sub" >>= (return . Subscript)
|
||||
|
||||
whitespace = do
|
||||
many1 spaceChar <?> "whitespace"
|
||||
return Space
|
||||
whitespace = many1 spaceChar >> return Space <?> "whitespace"
|
||||
|
||||
tabchar = do
|
||||
tab
|
||||
return (Str "\t")
|
||||
tabchar = tab >> return (Str "\t")
|
||||
|
||||
str = do
|
||||
notFollowedBy' oneWordReference
|
||||
result <- many1 (noneOf (specialChars ++ "\t\n "))
|
||||
return (Str result)
|
||||
str = notFollowedBy' oneWordReference >>
|
||||
many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
|
||||
|
||||
-- an endline character that can be treated as a space, not a structural break
|
||||
endline = try (do
|
||||
endline = try $ do
|
||||
newline
|
||||
notFollowedBy blankline
|
||||
-- parse potential list-starts at beginning of line differently in a list:
|
||||
st <- getState
|
||||
if ((stateParserContext st) == ListItemState)
|
||||
then do notFollowedBy' anyOrderedListMarker
|
||||
notFollowedBy' bulletListStart
|
||||
else option () pzero
|
||||
return Space)
|
||||
then notFollowedBy' anyOrderedListMarker >> notFollowedBy' bulletListStart
|
||||
else return ()
|
||||
return Space
|
||||
|
||||
--
|
||||
-- links
|
||||
|
@ -628,10 +601,10 @@ referenceLink = try $ do
|
|||
uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
|
||||
"mailto:", "news:", "telnet:" ]
|
||||
|
||||
uri = try (do
|
||||
uri = try $ do
|
||||
scheme <- uriScheme
|
||||
identifier <- many1 (noneOf " \t\n")
|
||||
return (scheme ++ identifier))
|
||||
return $ scheme ++ identifier
|
||||
|
||||
autoURI = try $ do
|
||||
src <- uri
|
||||
|
@ -639,20 +612,20 @@ autoURI = try $ do
|
|||
|
||||
emailChar = alphaNum <|> oneOf "-+_."
|
||||
|
||||
emailAddress = try (do
|
||||
emailAddress = try $ do
|
||||
firstLetter <- alphaNum
|
||||
restAddr <- many emailChar
|
||||
let addr = firstLetter:restAddr
|
||||
char '@'
|
||||
dom <- domain
|
||||
return (addr ++ '@':dom))
|
||||
return $ addr ++ '@':dom
|
||||
|
||||
domainChar = alphaNum <|> char '-'
|
||||
|
||||
domain = try (do
|
||||
domain = try $ do
|
||||
first <- many1 domainChar
|
||||
dom <- many1 (try (do{ char '.'; many1 domainChar }))
|
||||
return (joinWithSep "." (first:dom)))
|
||||
return $ joinWithSep "." (first:dom)
|
||||
|
||||
autoEmail = try $ do
|
||||
src <- emailAddress
|
||||
|
@ -669,5 +642,5 @@ image = try $ do
|
|||
src <- case lookupKeySrc keyTable ref of
|
||||
Nothing -> fail "no corresponding key"
|
||||
Just target -> return target
|
||||
return (Image (normalizeSpaces ref) src)
|
||||
return $ Image (normalizeSpaces ref) src
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
|
||||
{- |
|
||||
Module : Text.Pandoc.Writers.ConTeXt
|
||||
Copyright : Copyright (C) 2006-7 John MacFarlane
|
||||
Copyright : Copyright (C) 2007 John MacFarlane
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||
|
@ -27,9 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
|
||||
Conversion of 'Pandoc' format into ConTeXt.
|
||||
-}
|
||||
module Text.Pandoc.Writers.ConTeXt (
|
||||
writeConTeXt
|
||||
) where
|
||||
module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Printf ( printf )
|
||||
|
@ -40,8 +38,7 @@ type WriterState = Int -- number of next URL reference
|
|||
|
||||
-- | Convert Pandoc to ConTeXt.
|
||||
writeConTeXt :: WriterOptions -> Pandoc -> String
|
||||
writeConTeXt options document =
|
||||
evalState (pandocToConTeXt options document) 1
|
||||
writeConTeXt options document = evalState (pandocToConTeXt options document) 1
|
||||
|
||||
pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String
|
||||
pandocToConTeXt options (Pandoc meta blocks) = do
|
||||
|
@ -111,8 +108,8 @@ stringToConTeXt = concatMap escapeCharForConTeXt
|
|||
-- | Convert Pandoc block element to ConTeXt.
|
||||
blockToConTeXt :: Block -> State WriterState String
|
||||
blockToConTeXt Null = return ""
|
||||
blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= (return . (++ "\n"))
|
||||
blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= (return . (++ "\n\n"))
|
||||
blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= return . (++ "\n")
|
||||
blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= return . (++ "\n\n")
|
||||
blockToConTeXt (BlockQuote lst) = do
|
||||
contents <- blockListToConTeXt lst
|
||||
return $ "\\startblockquote\n" ++ contents ++ "\\stopblockquote\n\n"
|
||||
|
@ -137,12 +134,12 @@ blockToConTeXt (OrderedList attribs lst) = case attribs of
|
|||
return $ "\\startitemize" ++ markerWidth' ++ "\n" ++ concat contents ++
|
||||
"\\stopitemize\n"
|
||||
blockToConTeXt (DefinitionList lst) =
|
||||
mapM defListItemToConTeXt lst >>= (return . (++ "\n") . concat)
|
||||
mapM defListItemToConTeXt lst >>= return . (++ "\n") . concat
|
||||
blockToConTeXt HorizontalRule = return "\\thinrule\n\n"
|
||||
blockToConTeXt (Header level lst) = do
|
||||
contents <- inlineListToConTeXt lst
|
||||
return $ if (level > 0) && (level <= 3)
|
||||
then "\\" ++ (concat (replicate (level - 1) "sub")) ++
|
||||
return $ if level > 0 && level <= 3
|
||||
then "\\" ++ concat (replicate (level - 1) "sub") ++
|
||||
"section{" ++ contents ++ "}\n\n"
|
||||
else contents ++ "\n\n"
|
||||
blockToConTeXt (Table caption aligns widths heads rows) = do
|
||||
|
@ -186,12 +183,12 @@ defListItemToConTeXt (term, def) = do
|
|||
|
||||
-- | Convert list of block elements to ConTeXt.
|
||||
blockListToConTeXt :: [Block] -> State WriterState String
|
||||
blockListToConTeXt lst = mapM blockToConTeXt lst >>= (return . concat)
|
||||
blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . concat
|
||||
|
||||
-- | Convert list of inline elements to ConTeXt.
|
||||
inlineListToConTeXt :: [Inline] -- ^ Inlines to convert
|
||||
-> State WriterState String
|
||||
inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= (return . concat)
|
||||
inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . concat
|
||||
|
||||
isQuoted :: Inline -> Bool
|
||||
isQuoted (Quoted _ _) = True
|
||||
|
|
|
@ -30,16 +30,35 @@ Conversion of 'Pandoc' documents to Docbook XML.
|
|||
module Text.Pandoc.Writers.Docbook ( writeDocbook) where
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.Entities ( escapeStringForXML )
|
||||
import Data.Char ( toLower, ord )
|
||||
import Data.List ( isPrefixOf, partition, drop )
|
||||
import Data.List ( isPrefixOf, drop )
|
||||
import Text.PrettyPrint.HughesPJ hiding ( Str )
|
||||
|
||||
|
||||
--
|
||||
-- code to format XML
|
||||
--
|
||||
|
||||
-- | Escape one character as needed for XML.
|
||||
escapeCharForXML :: Char -> String
|
||||
escapeCharForXML x = case x of
|
||||
'&' -> "&"
|
||||
'<' -> "<"
|
||||
'>' -> ">"
|
||||
'"' -> """
|
||||
'\160' -> " "
|
||||
c -> [c]
|
||||
|
||||
-- | True if the character needs to be escaped.
|
||||
needsEscaping :: Char -> Bool
|
||||
needsEscaping c = c `elem` "&<>\"\160"
|
||||
|
||||
-- | Escape string as needed for XML. Entity references are not preserved.
|
||||
escapeStringForXML :: String -> String
|
||||
escapeStringForXML "" = ""
|
||||
escapeStringForXML str =
|
||||
case break needsEscaping str of
|
||||
(okay, "") -> okay
|
||||
(okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs
|
||||
|
||||
-- | Return a text object with a string of formatted XML attributes.
|
||||
attributeList :: [(String, String)] -> Doc
|
||||
attributeList = text . concatMap
|
||||
|
@ -52,10 +71,10 @@ inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
|
|||
inTags isIndented tagType attribs contents =
|
||||
let openTag = char '<' <> text tagType <> attributeList attribs <>
|
||||
char '>'
|
||||
closeTag = text "</" <> text tagType <> char '>' in
|
||||
if isIndented
|
||||
then openTag $$ nest 2 contents $$ closeTag
|
||||
else openTag <> contents <> closeTag
|
||||
closeTag = text "</" <> text tagType <> char '>'
|
||||
in if isIndented
|
||||
then openTag $$ nest 2 contents $$ closeTag
|
||||
else openTag <> contents <> closeTag
|
||||
|
||||
-- | Return a self-closing tag of tagType with specified attributes
|
||||
selfClosingTag :: String -> [(String, String)] -> Doc
|
||||
|
@ -79,42 +98,42 @@ authorToDocbook :: [Char] -> Doc
|
|||
authorToDocbook name = inTagsIndented "author" $
|
||||
if ',' `elem` name
|
||||
then -- last name first
|
||||
let (lastname, rest) = break (==',') name
|
||||
firstname = removeLeadingSpace rest in
|
||||
inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
|
||||
inTagsSimple "surname" (text $ escapeStringForXML lastname)
|
||||
let (lastname, rest) = break (==',') name
|
||||
firstname = removeLeadingSpace rest in
|
||||
inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
|
||||
inTagsSimple "surname" (text $ escapeStringForXML lastname)
|
||||
else -- last name last
|
||||
let namewords = words name
|
||||
lengthname = length namewords
|
||||
(firstname, lastname) = case lengthname of
|
||||
0 -> ("","")
|
||||
1 -> ("", name)
|
||||
n -> (joinWithSep " " (take (n-1) namewords), last namewords) in
|
||||
inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
|
||||
inTagsSimple "surname" (text $ escapeStringForXML lastname)
|
||||
let namewords = words name
|
||||
lengthname = length namewords
|
||||
(firstname, lastname) = case lengthname of
|
||||
0 -> ("","")
|
||||
1 -> ("", name)
|
||||
n -> (joinWithSep " " (take (n-1) namewords), last namewords)
|
||||
in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
|
||||
inTagsSimple "surname" (text $ escapeStringForXML lastname)
|
||||
|
||||
-- | Convert Pandoc document to string in Docbook format.
|
||||
writeDocbook :: WriterOptions -> Pandoc -> String
|
||||
writeDocbook opts (Pandoc (Meta title authors date) blocks) =
|
||||
let head = if (writerStandalone opts)
|
||||
then text (writerHeader opts)
|
||||
else empty
|
||||
meta = if (writerStandalone opts)
|
||||
then inTagsIndented "articleinfo" $
|
||||
(inTagsSimple "title" (wrap opts title)) $$
|
||||
(vcat (map authorToDocbook authors)) $$
|
||||
(inTagsSimple "date" (text $ escapeStringForXML date))
|
||||
else empty
|
||||
let head = if writerStandalone opts
|
||||
then text (writerHeader opts)
|
||||
else empty
|
||||
meta = if writerStandalone opts
|
||||
then inTagsIndented "articleinfo" $
|
||||
(inTagsSimple "title" (wrap opts title)) $$
|
||||
(vcat (map authorToDocbook authors)) $$
|
||||
(inTagsSimple "date" (text $ escapeStringForXML date))
|
||||
else empty
|
||||
elements = hierarchicalize blocks
|
||||
before = writerIncludeBefore opts
|
||||
after = writerIncludeAfter opts
|
||||
body = (if null before then empty else text before) $$
|
||||
vcat (map (elementToDocbook opts) elements) $$
|
||||
(if null after then empty else text after)
|
||||
body' = if writerStandalone opts
|
||||
then inTagsIndented "article" (meta $$ body)
|
||||
else body in
|
||||
render $ head $$ body' $$ text ""
|
||||
before = writerIncludeBefore opts
|
||||
after = writerIncludeAfter opts
|
||||
body = (if null before then empty else text before) $$
|
||||
vcat (map (elementToDocbook opts) elements) $$
|
||||
(if null after then empty else text after)
|
||||
body' = if writerStandalone opts
|
||||
then inTagsIndented "article" (meta $$ body)
|
||||
else body
|
||||
in render $ head $$ body' $$ text ""
|
||||
|
||||
-- | Convert an Element to Docbook.
|
||||
elementToDocbook :: WriterOptions -> Element -> Doc
|
||||
|
@ -123,10 +142,10 @@ elementToDocbook opts (Sec title elements) =
|
|||
-- Docbook doesn't allow sections with no content, so insert some if needed
|
||||
let elements' = if null elements
|
||||
then [Blk (Para [])]
|
||||
else elements in
|
||||
inTagsIndented "section" $
|
||||
inTagsSimple "title" (wrap opts title) $$
|
||||
vcat (map (elementToDocbook opts) elements')
|
||||
else elements
|
||||
in inTagsIndented "section" $
|
||||
inTagsSimple "title" (wrap opts title) $$
|
||||
vcat (map (elementToDocbook opts) elements')
|
||||
|
||||
-- | Convert a list of Pandoc blocks to Docbook.
|
||||
blocksToDocbook :: WriterOptions -> [Block] -> Doc
|
||||
|
@ -145,30 +164,27 @@ deflistItemsToDocbook opts items =
|
|||
-- | Convert a term and a list of blocks into a Docbook varlistentry.
|
||||
deflistItemToDocbook :: WriterOptions -> [Inline] -> [Block] -> Doc
|
||||
deflistItemToDocbook opts term def =
|
||||
let def' = map plainToPara def in
|
||||
inTagsIndented "varlistentry" $
|
||||
inTagsIndented "term" (inlinesToDocbook opts term) $$
|
||||
inTagsIndented "listitem" (blocksToDocbook opts def')
|
||||
let def' = map plainToPara def
|
||||
in inTagsIndented "varlistentry" $
|
||||
inTagsIndented "term" (inlinesToDocbook opts term) $$
|
||||
inTagsIndented "listitem" (blocksToDocbook opts def')
|
||||
|
||||
-- | Convert a list of lists of blocks to a list of Docbook list items.
|
||||
listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc
|
||||
listItemsToDocbook opts items =
|
||||
vcat $ map (listItemToDocbook opts) items
|
||||
listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items
|
||||
|
||||
-- | Convert a list of blocks into a Docbook list item.
|
||||
listItemToDocbook :: WriterOptions -> [Block] -> Doc
|
||||
listItemToDocbook opts item =
|
||||
let item' = map plainToPara item in
|
||||
inTagsIndented "listitem" (blocksToDocbook opts item')
|
||||
inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item
|
||||
|
||||
-- | Convert a Pandoc block element to Docbook.
|
||||
blockToDocbook :: WriterOptions -> Block -> Doc
|
||||
blockToDocbook opts Null = empty
|
||||
blockToDocbook opts (Plain lst) = wrap opts lst
|
||||
blockToDocbook opts (Para lst) =
|
||||
inTagsIndented "para" (wrap opts lst)
|
||||
blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst
|
||||
blockToDocbook opts (BlockQuote blocks) =
|
||||
inTagsIndented "blockquote" (blocksToDocbook opts blocks)
|
||||
inTagsIndented "blockquote" $ blocksToDocbook opts blocks
|
||||
blockToDocbook opts (CodeBlock str) =
|
||||
text "<screen>\n" <> text (escapeStringForXML str) <> text "\n</screen>"
|
||||
blockToDocbook opts (BulletList lst) =
|
||||
|
@ -198,16 +214,16 @@ blockToDocbook opts (Table caption aligns widths headers rows) =
|
|||
then empty
|
||||
else inTagsIndented "caption"
|
||||
(inlinesToDocbook opts caption)
|
||||
tableType = if isEmpty captionDoc then "informaltable" else "table" in
|
||||
inTagsIndented tableType $ captionDoc $$
|
||||
(colHeadsToDocbook opts alignStrings widths headers) $$
|
||||
(vcat $ map (tableRowToDocbook opts alignStrings) rows)
|
||||
tableType = if isEmpty captionDoc then "informaltable" else "table"
|
||||
in inTagsIndented tableType $ captionDoc $$
|
||||
(colHeadsToDocbook opts alignStrings widths headers) $$
|
||||
(vcat $ map (tableRowToDocbook opts alignStrings) rows)
|
||||
|
||||
colHeadsToDocbook opts alignStrings widths headers =
|
||||
let heads = zipWith3
|
||||
(\align width item -> tableItemToDocbook opts "th" align width item)
|
||||
alignStrings widths headers in
|
||||
inTagsIndented "tr" $ vcat heads
|
||||
let heads = zipWith3 (\align width item ->
|
||||
tableItemToDocbook opts "th" align width item)
|
||||
alignStrings widths headers
|
||||
in inTagsIndented "tr" $ vcat heads
|
||||
|
||||
alignmentToString alignment = case alignment of
|
||||
AlignLeft -> "left"
|
||||
|
@ -215,20 +231,16 @@ alignmentToString alignment = case alignment of
|
|||
AlignCenter -> "center"
|
||||
AlignDefault -> "left"
|
||||
|
||||
tableRowToDocbook opts aligns cols =
|
||||
inTagsIndented "tr" $ vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols
|
||||
tableRowToDocbook opts aligns cols = inTagsIndented "tr" $
|
||||
vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols
|
||||
|
||||
tableItemToDocbook opts tag align width item =
|
||||
let attrib = [("align", align)] ++
|
||||
if (width /= 0)
|
||||
then [("style", "{width: " ++
|
||||
show (truncate (100*width)) ++ "%;}")]
|
||||
else [] in
|
||||
inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
|
||||
|
||||
-- | Put string in CDATA section
|
||||
cdata :: String -> Doc
|
||||
cdata str = text $ "<![CDATA[" ++ str ++ "]]>"
|
||||
if width /= 0
|
||||
then [("style", "{width: " ++
|
||||
show (truncate (100*width)) ++ "%;}")]
|
||||
else []
|
||||
in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
|
||||
|
||||
-- | Take list of inline elements and return wrapped doc.
|
||||
wrap :: WriterOptions -> [Inline] -> Doc
|
||||
|
@ -236,25 +248,24 @@ wrap opts lst = fsep $ map (inlinesToDocbook opts) (splitBy Space lst)
|
|||
|
||||
-- | Convert a list of inline elements to Docbook.
|
||||
inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
|
||||
inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst)
|
||||
inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst
|
||||
|
||||
-- | Convert an inline element to Docbook.
|
||||
inlineToDocbook :: WriterOptions -> Inline -> Doc
|
||||
inlineToDocbook opts (Str str) = text $ escapeStringForXML str
|
||||
inlineToDocbook opts (Emph lst) =
|
||||
inTagsSimple "emphasis" (inlinesToDocbook opts lst)
|
||||
inTagsSimple "emphasis" $ inlinesToDocbook opts lst
|
||||
inlineToDocbook opts (Strong lst) =
|
||||
inTags False "emphasis" [("role", "strong")]
|
||||
(inlinesToDocbook opts lst)
|
||||
inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst
|
||||
inlineToDocbook opts (Strikeout lst) =
|
||||
inTags False "emphasis" [("role", "strikethrough")]
|
||||
(inlinesToDocbook opts lst)
|
||||
inTags False "emphasis" [("role", "strikethrough")] $
|
||||
inlinesToDocbook opts lst
|
||||
inlineToDocbook opts (Superscript lst) =
|
||||
inTagsSimple "superscript" (inlinesToDocbook opts lst)
|
||||
inTagsSimple "superscript" $ inlinesToDocbook opts lst
|
||||
inlineToDocbook opts (Subscript lst) =
|
||||
inTagsSimple "subscript" (inlinesToDocbook opts lst)
|
||||
inTagsSimple "subscript" $ inlinesToDocbook opts lst
|
||||
inlineToDocbook opts (Quoted _ lst) =
|
||||
inTagsSimple "quote" (inlinesToDocbook opts lst)
|
||||
inTagsSimple "quote" $ inlinesToDocbook opts lst
|
||||
inlineToDocbook opts Apostrophe = char '\''
|
||||
inlineToDocbook opts Ellipses = text "…"
|
||||
inlineToDocbook opts EmDash = text "—"
|
||||
|
@ -263,26 +274,24 @@ inlineToDocbook opts (Code str) =
|
|||
inTagsSimple "literal" $ text (escapeStringForXML str)
|
||||
inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str)
|
||||
inlineToDocbook opts (HtmlInline str) = empty
|
||||
inlineToDocbook opts LineBreak =
|
||||
text $ "<literallayout></literallayout>"
|
||||
inlineToDocbook opts LineBreak = text $ "<literallayout></literallayout>"
|
||||
inlineToDocbook opts Space = char ' '
|
||||
inlineToDocbook opts (Link txt (src, tit)) =
|
||||
if isPrefixOf "mailto:" src
|
||||
then let src' = drop 7 src
|
||||
emailLink = inTagsSimple "email" $ text (escapeStringForXML $ src')
|
||||
in if txt == [Code src']
|
||||
then emailLink
|
||||
else inlinesToDocbook opts txt <+> char '(' <> emailLink <>
|
||||
char ')'
|
||||
else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
|
||||
then let src' = drop 7 src
|
||||
emailLink = inTagsSimple "email" $ text $
|
||||
escapeStringForXML $ src'
|
||||
in if txt == [Code src']
|
||||
then emailLink
|
||||
else inlinesToDocbook opts txt <+> char '(' <> emailLink <>
|
||||
char ')'
|
||||
else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
|
||||
inlineToDocbook opts (Image alt (src, tit)) =
|
||||
let titleDoc = if null tit
|
||||
then empty
|
||||
else inTagsIndented "objectinfo" $
|
||||
inTagsIndented "title"
|
||||
(text $ escapeStringForXML tit) in
|
||||
inTagsIndented "inlinemediaobject" $
|
||||
inTagsIndented "imageobject" $
|
||||
titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
|
||||
inTagsIndented "title" (text $ escapeStringForXML tit)
|
||||
in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
|
||||
titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
|
||||
inlineToDocbook opts (Note contents) =
|
||||
inTagsIndented "footnote" $ blocksToDocbook opts contents
|
||||
|
|
|
@ -27,15 +27,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
|
||||
Conversion of 'Pandoc' documents to HTML.
|
||||
-}
|
||||
module Text.Pandoc.Writers.HTML ( writeHtml, writeHtmlString ) where
|
||||
module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.ASCIIMathML
|
||||
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.Entities (decodeEntities)
|
||||
import Text.Regex ( mkRegex, matchRegex )
|
||||
import Numeric ( showHex )
|
||||
import Data.Char ( ord, toLower )
|
||||
import Data.List ( isPrefixOf, partition, intersperse )
|
||||
import Data.List ( isPrefixOf, intersperse )
|
||||
import qualified Data.Set as S
|
||||
import Control.Monad.State
|
||||
import Text.XHtml.Transitional
|
||||
|
@ -55,8 +55,8 @@ defaultWriterState = WriterState {stNotes= [], stIds = [],
|
|||
writeHtmlString :: WriterOptions -> Pandoc -> String
|
||||
writeHtmlString opts =
|
||||
if writerStandalone opts
|
||||
then renderHtml . (writeHtml opts)
|
||||
else renderHtmlFragment . (writeHtml opts)
|
||||
then renderHtml . writeHtml opts
|
||||
else renderHtmlFragment . writeHtml opts
|
||||
|
||||
-- | Convert Pandoc document to Html structure.
|
||||
writeHtml :: WriterOptions -> Pandoc -> Html
|
||||
|
@ -74,49 +74,51 @@ writeHtml opts (Pandoc (Meta tit authors date) blocks) =
|
|||
map (\a -> meta ! [name "author", content a]) authors) +++
|
||||
(if null date
|
||||
then noHtml
|
||||
else meta ! [name "date", content date])
|
||||
titleHeader = if (writerStandalone opts) && (not (null tit)) &&
|
||||
(not (writerS5 opts))
|
||||
else meta ! [name "date", content date])
|
||||
titleHeader = if writerStandalone opts && not (null tit) &&
|
||||
not (writerS5 opts)
|
||||
then h1 ! [theclass "title"] $ topTitle
|
||||
else noHtml
|
||||
headerBlocks = filter isHeaderBlock blocks
|
||||
ids = uniqueIdentifiers $ map (\(Header _ lst) -> lst) headerBlocks
|
||||
toc = if writerTableOfContents opts
|
||||
then tableOfContents opts headerBlocks ids
|
||||
else noHtml
|
||||
ids = uniqueIdentifiers $
|
||||
map (\(Header _ lst) -> lst) headerBlocks
|
||||
toc = if writerTableOfContents opts
|
||||
then tableOfContents opts headerBlocks ids
|
||||
else noHtml
|
||||
(blocks', newstate) =
|
||||
runState (blockListToHtml opts blocks)
|
||||
(defaultWriterState {stIds = ids})
|
||||
cssLines = stCSS newstate
|
||||
css = if S.null cssLines
|
||||
then noHtml
|
||||
else style ! [thetype "text/css"] $ primHtml $
|
||||
'\n':(unlines $ S.toList cssLines)
|
||||
math = if stMath newstate
|
||||
then case writerASCIIMathMLURL opts of
|
||||
Just path -> script ! [src path,
|
||||
thetype "text/javascript"] $ noHtml
|
||||
Nothing -> primHtml asciiMathMLScript
|
||||
else noHtml
|
||||
head = header $ metadata +++ math +++ css +++
|
||||
primHtml (writerHeader opts)
|
||||
notes = reverse (stNotes newstate)
|
||||
before = primHtml $ writerIncludeBefore opts
|
||||
after = primHtml $ writerIncludeAfter opts
|
||||
thebody = before +++ titleHeader +++ toc +++ blocks' +++
|
||||
footnoteSection opts notes +++ after
|
||||
runState (blockListToHtml opts blocks)
|
||||
(defaultWriterState {stIds = ids})
|
||||
cssLines = stCSS newstate
|
||||
css = if S.null cssLines
|
||||
then noHtml
|
||||
else style ! [thetype "text/css"] $ primHtml $
|
||||
'\n':(unlines $ S.toList cssLines)
|
||||
math = if stMath newstate
|
||||
then case writerASCIIMathMLURL opts of
|
||||
Just path -> script ! [src path,
|
||||
thetype "text/javascript"] $
|
||||
noHtml
|
||||
Nothing -> primHtml asciiMathMLScript
|
||||
else noHtml
|
||||
head = header $ metadata +++ math +++ css +++
|
||||
primHtml (writerHeader opts)
|
||||
notes = reverse (stNotes newstate)
|
||||
before = primHtml $ writerIncludeBefore opts
|
||||
after = primHtml $ writerIncludeAfter opts
|
||||
thebody = before +++ titleHeader +++ toc +++ blocks' +++
|
||||
footnoteSection opts notes +++ after
|
||||
in if writerStandalone opts
|
||||
then head +++ (body thebody)
|
||||
then head +++ body thebody
|
||||
else thebody
|
||||
|
||||
-- | Construct table of contents from list of header blocks and identifiers.
|
||||
-- Assumes there are as many identifiers as header blocks.
|
||||
tableOfContents :: WriterOptions -> [Block] -> [String] -> Html
|
||||
tableOfContents opts headers ids =
|
||||
let opts' = opts { writerIgnoreNotes = True }
|
||||
let opts' = opts { writerIgnoreNotes = True }
|
||||
contentsTree = hierarchicalize headers
|
||||
contents = evalState (mapM (elementToListItem opts') contentsTree)
|
||||
(defaultWriterState {stIds = ids})
|
||||
contents = evalState (mapM (elementToListItem opts') contentsTree)
|
||||
(defaultWriterState {stIds = ids})
|
||||
in thediv ! [identifier "toc"] $ unordList contents
|
||||
|
||||
-- | Converts an Element to a list item for a table of contents,
|
||||
|
@ -135,7 +137,8 @@ elementToListItem opts (Sec headerText subsecs) = do
|
|||
let subList = if null subHeads
|
||||
then noHtml
|
||||
else unordList subHeads
|
||||
return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++ subList
|
||||
return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++
|
||||
subList
|
||||
|
||||
-- | Convert list of Note blocks to a footnote <div>.
|
||||
-- Assumes notes are sorted.
|
||||
|
@ -143,62 +146,61 @@ footnoteSection :: WriterOptions -> [Html] -> Html
|
|||
footnoteSection opts notes =
|
||||
if null notes
|
||||
then noHtml
|
||||
else thediv ! [theclass "footnotes"] $
|
||||
hr +++ (olist << notes)
|
||||
else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes)
|
||||
|
||||
-- | Obfuscate a "mailto:" link using Javascript.
|
||||
obfuscateLink :: WriterOptions -> String -> String -> Html
|
||||
obfuscateLink opts text src =
|
||||
let emailRegex = mkRegex "^mailto:([^@]*)@(.*)$"
|
||||
src' = map toLower src in
|
||||
case (matchRegex emailRegex src') of
|
||||
(Just [name, domain]) ->
|
||||
let domain' = substitute "." " dot " domain
|
||||
at' = obfuscateChar '@'
|
||||
(linkText, altText) =
|
||||
if text == drop 7 src' -- autolink
|
||||
then ("'<code>'+e+'</code>'", name ++ " at " ++ domain')
|
||||
else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++
|
||||
domain' ++ ")") in
|
||||
if writerStrictMarkdown opts
|
||||
then -- need to use primHtml or &'s are escaped to & in URL
|
||||
primHtml $ "<a href=\"" ++ (obfuscateString src')
|
||||
++ "\">" ++ (obfuscateString text) ++ "</a>"
|
||||
else (script ! [thetype "text/javascript"] $
|
||||
primHtml ("\n<!--\nh='" ++
|
||||
obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
|
||||
obfuscateString name ++ "';e=n+a+h;\n" ++
|
||||
"document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
|
||||
linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
|
||||
noscript (primHtml $ obfuscateString altText)
|
||||
_ -> anchor ! [href src] $ primHtml text -- malformed email
|
||||
src' = map toLower src
|
||||
in case (matchRegex emailRegex src') of
|
||||
(Just [name, domain]) ->
|
||||
let domain' = substitute "." " dot " domain
|
||||
at' = obfuscateChar '@'
|
||||
(linkText, altText) =
|
||||
if text == drop 7 src' -- autolink
|
||||
then ("'<code>'+e+'</code>'", name ++ " at " ++ domain')
|
||||
else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++
|
||||
domain' ++ ")")
|
||||
in if writerStrictMarkdown opts
|
||||
then -- need to use primHtml or &'s are escaped to & in URL
|
||||
primHtml $ "<a href=\"" ++ (obfuscateString src')
|
||||
++ "\">" ++ (obfuscateString text) ++ "</a>"
|
||||
else (script ! [thetype "text/javascript"] $
|
||||
primHtml ("\n<!--\nh='" ++
|
||||
obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
|
||||
obfuscateString name ++ "';e=n+a+h;\n" ++
|
||||
"document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
|
||||
linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
|
||||
noscript (primHtml $ obfuscateString altText)
|
||||
_ -> anchor ! [href src] $ primHtml text -- malformed email
|
||||
|
||||
-- | Obfuscate character as entity.
|
||||
obfuscateChar :: Char -> String
|
||||
obfuscateChar char =
|
||||
let num = ord char in
|
||||
let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in
|
||||
"&#" ++ numstr ++ ";"
|
||||
let num = ord char
|
||||
numstr = if even num then show num else "x" ++ showHex num ""
|
||||
in "&#" ++ numstr ++ ";"
|
||||
|
||||
-- | Obfuscate string using entities.
|
||||
obfuscateString :: String -> String
|
||||
obfuscateString = (concatMap obfuscateChar) . decodeEntities
|
||||
obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
|
||||
|
||||
-- | True if character is a punctuation character (unicode).
|
||||
isPunctuation :: Char -> Bool
|
||||
isPunctuation c =
|
||||
let c' = ord c in
|
||||
if (c `elem` "!\"'()*,-./:;<>?[\\]`{|}~") || (c' >= 0x2000 && c' <= 0x206F) ||
|
||||
(c' >= 0xE000 && c' <= 0xE0FF)
|
||||
then True
|
||||
else False
|
||||
let c' = ord c
|
||||
in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
|
||||
c' >= 0xE000 && c' <= 0xE0FF
|
||||
then True
|
||||
else False
|
||||
|
||||
-- | Add CSS for document header.
|
||||
addToCSS :: String -> State WriterState ()
|
||||
addToCSS item = do
|
||||
st <- get
|
||||
let current = stCSS st
|
||||
put $ st {stCSS = (S.insert item current)}
|
||||
put $ st {stCSS = S.insert item current}
|
||||
|
||||
-- | Convert Pandoc inline list to plain text identifier.
|
||||
inlineListToIdentifier :: [Inline] -> String
|
||||
|
@ -206,27 +208,26 @@ inlineListToIdentifier [] = ""
|
|||
inlineListToIdentifier (x:xs) =
|
||||
xAsText ++ inlineListToIdentifier xs
|
||||
where xAsText = case x of
|
||||
Str s -> filter
|
||||
(\c -> (c == '-') || not (isPunctuation c)) $
|
||||
concat $ intersperse "-" $ words $ map toLower s
|
||||
Emph lst -> inlineListToIdentifier lst
|
||||
Strikeout lst -> inlineListToIdentifier lst
|
||||
Superscript lst -> inlineListToIdentifier lst
|
||||
Subscript lst -> inlineListToIdentifier lst
|
||||
Strong lst -> inlineListToIdentifier lst
|
||||
Quoted _ lst -> inlineListToIdentifier lst
|
||||
Code s -> s
|
||||
Space -> "-"
|
||||
EmDash -> "-"
|
||||
EnDash -> "-"
|
||||
Apostrophe -> ""
|
||||
Ellipses -> ""
|
||||
LineBreak -> "-"
|
||||
TeX _ -> ""
|
||||
HtmlInline _ -> ""
|
||||
Link lst _ -> inlineListToIdentifier lst
|
||||
Image lst _ -> inlineListToIdentifier lst
|
||||
Note _ -> ""
|
||||
Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
|
||||
concat $ intersperse "-" $ words $ map toLower s
|
||||
Emph lst -> inlineListToIdentifier lst
|
||||
Strikeout lst -> inlineListToIdentifier lst
|
||||
Superscript lst -> inlineListToIdentifier lst
|
||||
Subscript lst -> inlineListToIdentifier lst
|
||||
Strong lst -> inlineListToIdentifier lst
|
||||
Quoted _ lst -> inlineListToIdentifier lst
|
||||
Code s -> s
|
||||
Space -> "-"
|
||||
EmDash -> "-"
|
||||
EnDash -> "-"
|
||||
Apostrophe -> ""
|
||||
Ellipses -> ""
|
||||
LineBreak -> "-"
|
||||
TeX _ -> ""
|
||||
HtmlInline _ -> ""
|
||||
Link lst _ -> inlineListToIdentifier lst
|
||||
Image lst _ -> inlineListToIdentifier lst
|
||||
Note _ -> ""
|
||||
|
||||
-- | Return unique identifiers for list of inline lists.
|
||||
uniqueIdentifiers :: [[Inline]] -> [String]
|
||||
|
@ -236,102 +237,99 @@ uniqueIdentifiers ls =
|
|||
matches = length $ filter (== new) nonuniqueIds
|
||||
new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
|
||||
in (new:nonuniqueIds, new':uniqueIds)
|
||||
in reverse $ snd (foldl addIdentifier ([],[]) $ ls)
|
||||
in reverse $ snd $ foldl addIdentifier ([],[]) ls
|
||||
|
||||
-- | Convert Pandoc block element to HTML.
|
||||
blockToHtml :: WriterOptions -> Block -> State WriterState Html
|
||||
blockToHtml opts block =
|
||||
case block of
|
||||
(Null) -> return $ noHtml
|
||||
(Plain lst) -> inlineListToHtml opts lst
|
||||
(Para lst) -> inlineListToHtml opts lst >>= (return . paragraph)
|
||||
(RawHtml str) -> return $ primHtml str
|
||||
(HorizontalRule) -> return $ hr
|
||||
(CodeBlock str) -> return $ pre $ thecode << (str ++ "\n")
|
||||
blockToHtml opts Null = return $ noHtml
|
||||
blockToHtml opts (Plain lst) = inlineListToHtml opts lst
|
||||
blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph)
|
||||
blockToHtml opts (RawHtml str) = return $ primHtml str
|
||||
blockToHtml opts (HorizontalRule) = return $ hr
|
||||
blockToHtml opts (CodeBlock str) = return $ pre $ thecode << (str ++ "\n")
|
||||
-- the final \n for consistency with Markdown.pl
|
||||
(BlockQuote blocks) -> -- in S5, treat list in blockquote specially
|
||||
-- if default is incremental, make it nonincremental;
|
||||
-- otherwise incremental
|
||||
if writerS5 opts
|
||||
then let inc = not (writerIncremental opts) in
|
||||
case blocks of
|
||||
[BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
|
||||
(BulletList lst)
|
||||
[OrderedList attribs lst] ->
|
||||
blockToHtml (opts {writerIncremental = inc})
|
||||
(OrderedList attribs lst)
|
||||
otherwise -> blockListToHtml opts blocks >>=
|
||||
(return . blockquote)
|
||||
else blockListToHtml opts blocks >>= (return . blockquote)
|
||||
(Header level lst) -> do contents <- inlineListToHtml opts lst
|
||||
st <- get
|
||||
let ids = stIds st
|
||||
let (id, rest) = if null ids
|
||||
then ("", [])
|
||||
else (head ids, tail ids)
|
||||
put $ st {stIds = rest}
|
||||
let attribs = [identifier id]
|
||||
let headerHtml = case level of
|
||||
1 -> h1 contents ! attribs
|
||||
2 -> h2 contents ! attribs
|
||||
3 -> h3 contents ! attribs
|
||||
4 -> h4 contents ! attribs
|
||||
5 -> h5 contents ! attribs
|
||||
6 -> h6 contents ! attribs
|
||||
_ -> paragraph contents ! attribs
|
||||
let headerHtml' = if writerTableOfContents opts
|
||||
then anchor ! [href ("#TOC-" ++ id)] $
|
||||
headerHtml
|
||||
else headerHtml
|
||||
return headerHtml'
|
||||
(BulletList lst) -> do contents <- mapM (blockListToHtml opts) lst
|
||||
let attribs = if writerIncremental opts
|
||||
then [theclass "incremental"]
|
||||
else []
|
||||
return $ unordList ! attribs $ contents
|
||||
(OrderedList (startnum, numstyle, _) lst) -> do
|
||||
contents <- mapM (blockListToHtml opts) lst
|
||||
let numstyle' = camelCaseToHyphenated $ show numstyle
|
||||
let attribs = (if writerIncremental opts
|
||||
then [theclass "incremental"]
|
||||
else []) ++
|
||||
(if startnum /= 1
|
||||
then [start startnum]
|
||||
else []) ++
|
||||
(if numstyle /= DefaultStyle
|
||||
then [theclass numstyle']
|
||||
else [])
|
||||
if numstyle /= DefaultStyle
|
||||
then addToCSS $ "ol." ++ numstyle' ++
|
||||
" { list-style-type: " ++
|
||||
numstyle' ++ "; }"
|
||||
else return ()
|
||||
return $ ordList ! attribs $ contents
|
||||
(DefinitionList lst) -> do contents <- mapM (\(term, def) ->
|
||||
do term' <- inlineListToHtml opts term
|
||||
def' <- blockListToHtml opts def
|
||||
return $ (term', def'))
|
||||
lst
|
||||
let attribs = if writerIncremental opts
|
||||
then [theclass "incremental"]
|
||||
else []
|
||||
return $ defList ! attribs $ contents
|
||||
(Table capt aligns widths headers rows) ->
|
||||
do let alignStrings = map alignmentToString aligns
|
||||
captionDoc <- if null capt
|
||||
then return noHtml
|
||||
else inlineListToHtml opts capt >>=
|
||||
(return . caption)
|
||||
colHeads <- colHeadsToHtml opts alignStrings
|
||||
widths headers
|
||||
rows' <- mapM (tableRowToHtml opts alignStrings) rows
|
||||
return $ table $ captionDoc +++ colHeads +++ rows'
|
||||
blockToHtml opts (BlockQuote blocks) =
|
||||
-- in S5, treat list in blockquote specially
|
||||
-- if default is incremental, make it nonincremental;
|
||||
-- otherwise incremental
|
||||
if writerS5 opts
|
||||
then let inc = not (writerIncremental opts) in
|
||||
case blocks of
|
||||
[BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
|
||||
(BulletList lst)
|
||||
[OrderedList attribs lst] ->
|
||||
blockToHtml (opts {writerIncremental = inc})
|
||||
(OrderedList attribs lst)
|
||||
otherwise -> blockListToHtml opts blocks >>=
|
||||
(return . blockquote)
|
||||
else blockListToHtml opts blocks >>= (return . blockquote)
|
||||
blockToHtml opts (Header level lst) = do
|
||||
contents <- inlineListToHtml opts lst
|
||||
st <- get
|
||||
let ids = stIds st
|
||||
let (id, rest) = if null ids
|
||||
then ("", [])
|
||||
else (head ids, tail ids)
|
||||
put $ st {stIds = rest}
|
||||
let attribs = [identifier id]
|
||||
let headerHtml = case level of
|
||||
1 -> h1 contents ! attribs
|
||||
2 -> h2 contents ! attribs
|
||||
3 -> h3 contents ! attribs
|
||||
4 -> h4 contents ! attribs
|
||||
5 -> h5 contents ! attribs
|
||||
6 -> h6 contents ! attribs
|
||||
_ -> paragraph contents ! attribs
|
||||
return $ if writerTableOfContents opts
|
||||
then anchor ! [href ("#TOC-" ++ id)] $ headerHtml
|
||||
else headerHtml
|
||||
blockToHtml opts (BulletList lst) = do
|
||||
contents <- mapM (blockListToHtml opts) lst
|
||||
let attribs = if writerIncremental opts
|
||||
then [theclass "incremental"]
|
||||
else []
|
||||
return $ unordList ! attribs $ contents
|
||||
blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
|
||||
contents <- mapM (blockListToHtml opts) lst
|
||||
let numstyle' = camelCaseToHyphenated $ show numstyle
|
||||
let attribs = (if writerIncremental opts
|
||||
then [theclass "incremental"]
|
||||
else []) ++
|
||||
(if startnum /= 1
|
||||
then [start startnum]
|
||||
else []) ++
|
||||
(if numstyle /= DefaultStyle
|
||||
then [theclass numstyle']
|
||||
else [])
|
||||
if numstyle /= DefaultStyle
|
||||
then addToCSS $ "ol." ++ numstyle' ++
|
||||
" { list-style-type: " ++
|
||||
numstyle' ++ "; }"
|
||||
else return ()
|
||||
return $ ordList ! attribs $ contents
|
||||
blockToHtml opts (DefinitionList lst) = do
|
||||
contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term
|
||||
def' <- blockListToHtml opts def
|
||||
return $ (term', def')) lst
|
||||
let attribs = if writerIncremental opts
|
||||
then [theclass "incremental"]
|
||||
else []
|
||||
return $ defList ! attribs $ contents
|
||||
blockToHtml opts (Table capt aligns widths headers rows) = do
|
||||
let alignStrings = map alignmentToString aligns
|
||||
captionDoc <- if null capt
|
||||
then return noHtml
|
||||
else inlineListToHtml opts capt >>= return . caption
|
||||
colHeads <- colHeadsToHtml opts alignStrings
|
||||
widths headers
|
||||
rows' <- mapM (tableRowToHtml opts alignStrings) rows
|
||||
return $ table $ captionDoc +++ colHeads +++ rows'
|
||||
|
||||
colHeadsToHtml opts alignStrings widths headers =
|
||||
do heads <- sequence $ zipWith3
|
||||
(\align width item -> tableItemToHtml opts th align width item)
|
||||
alignStrings widths headers
|
||||
return $ tr $ toHtmlFromList heads
|
||||
colHeadsToHtml opts alignStrings widths headers = do
|
||||
heads <- sequence $ zipWith3
|
||||
(\align width item -> tableItemToHtml opts th align width item)
|
||||
alignStrings widths headers
|
||||
return $ tr $ toHtmlFromList heads
|
||||
|
||||
alignmentToString alignment = case alignment of
|
||||
AlignLeft -> "left"
|
||||
|
@ -339,24 +337,27 @@ alignmentToString alignment = case alignment of
|
|||
AlignCenter -> "center"
|
||||
AlignDefault -> "left"
|
||||
|
||||
tableRowToHtml opts aligns cols =
|
||||
do contents <- sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols
|
||||
return $ tr $ toHtmlFromList contents
|
||||
tableRowToHtml opts aligns cols =
|
||||
(sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols) >>=
|
||||
return . tr . toHtmlFromList
|
||||
|
||||
tableItemToHtml opts tag align' width item =
|
||||
do contents <- blockListToHtml opts item
|
||||
let attrib = [align align'] ++
|
||||
if (width /= 0)
|
||||
then [thestyle ("{width: " ++ show (truncate (100*width)) ++ "%;}")]
|
||||
else []
|
||||
return $ tag ! attrib $ contents
|
||||
tableItemToHtml opts tag align' width item = do
|
||||
contents <- blockListToHtml opts item
|
||||
let attrib = [align align'] ++
|
||||
if width /= 0
|
||||
then [thestyle ("{width: " ++ show (truncate (100*width)) ++
|
||||
"%;}")]
|
||||
else []
|
||||
return $ tag ! attrib $ contents
|
||||
|
||||
blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html
|
||||
blockListToHtml opts lst = mapM (blockToHtml opts) lst >>= (return . toHtmlFromList)
|
||||
blockListToHtml opts lst =
|
||||
mapM (blockToHtml opts) lst >>= return . toHtmlFromList
|
||||
|
||||
-- | Convert list of Pandoc inline elements to HTML.
|
||||
inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html
|
||||
inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= (return . toHtmlFromList)
|
||||
inlineListToHtml opts lst =
|
||||
mapM (inlineToHtml opts) lst >>= return . toHtmlFromList
|
||||
|
||||
-- | Convert Pandoc inline element to HTML.
|
||||
inlineToHtml :: WriterOptions -> Inline -> State WriterState Html
|
||||
|
@ -369,52 +370,58 @@ inlineToHtml opts inline =
|
|||
(EnDash) -> return $ primHtmlChar "ndash"
|
||||
(Ellipses) -> return $ primHtmlChar "hellip"
|
||||
(Apostrophe) -> return $ primHtmlChar "rsquo"
|
||||
(Emph lst) -> inlineListToHtml opts lst >>= (return . emphasize)
|
||||
(Strong lst) -> inlineListToHtml opts lst >>= (return . strong)
|
||||
(Emph lst) -> inlineListToHtml opts lst >>= return . emphasize
|
||||
(Strong lst) -> inlineListToHtml opts lst >>= return . strong
|
||||
(Code str) -> return $ thecode << str
|
||||
(Strikeout lst) -> addToCSS ".strikeout { text-decoration: line-through; }" >>
|
||||
(Strikeout lst) -> addToCSS
|
||||
".strikeout { text-decoration: line-through; }" >>
|
||||
inlineListToHtml opts lst >>=
|
||||
(return . (thespan ! [theclass "strikeout"]))
|
||||
(Superscript lst) -> inlineListToHtml opts lst >>= (return . sup)
|
||||
(Subscript lst) -> inlineListToHtml opts lst >>= (return . sub)
|
||||
return . (thespan ! [theclass "strikeout"])
|
||||
(Superscript lst) -> inlineListToHtml opts lst >>= return . sup
|
||||
(Subscript lst) -> inlineListToHtml opts lst >>= return . sub
|
||||
(Quoted quoteType lst) ->
|
||||
let (leftQuote, rightQuote) = case quoteType of
|
||||
SingleQuote -> (primHtmlChar "lsquo",
|
||||
primHtmlChar "rsquo")
|
||||
DoubleQuote -> (primHtmlChar "ldquo",
|
||||
primHtmlChar "rdquo") in
|
||||
do contents <- inlineListToHtml opts lst
|
||||
return $ leftQuote +++ contents +++ rightQuote
|
||||
(TeX str) -> do if writerUseASCIIMathML opts
|
||||
then modify (\st -> st {stMath = True})
|
||||
else return ()
|
||||
return $ stringToHtml str
|
||||
primHtmlChar "rdquo")
|
||||
in do contents <- inlineListToHtml opts lst
|
||||
return $ leftQuote +++ contents +++ rightQuote
|
||||
(TeX str) -> (if writerUseASCIIMathML opts
|
||||
then modify (\st -> st {stMath = True})
|
||||
else return ()) >> return (stringToHtml str)
|
||||
(HtmlInline str) -> return $ primHtml str
|
||||
(Link [Code str] (src,tit)) | "mailto:" `isPrefixOf` src ->
|
||||
do return $ obfuscateLink opts str src
|
||||
(Link txt (src,tit)) | "mailto:" `isPrefixOf` src ->
|
||||
do linkText <- inlineListToHtml opts txt
|
||||
return $ obfuscateLink opts (show linkText) src
|
||||
(Link txt (src,tit)) ->
|
||||
do linkText <- inlineListToHtml opts txt
|
||||
return $ anchor ! ([href src] ++
|
||||
if null tit then [] else [title tit]) $ linkText
|
||||
(Image txt (source,tit)) ->
|
||||
do alternate <- inlineListToHtml opts txt
|
||||
let alternate' = renderHtmlFragment alternate
|
||||
let attributes = [src source, title tit] ++
|
||||
if null txt then [] else [alt alternate']
|
||||
return $ image ! attributes
|
||||
-- note: null title included, as in Markdown.pl
|
||||
(Note contents) -> do st <- get
|
||||
let notes = stNotes st
|
||||
let number = (length notes) + 1
|
||||
let ref = show number
|
||||
htmlContents <- blockListToNote opts ref contents
|
||||
put $ st {stNotes = (htmlContents:notes)} -- push contents onto front of notes
|
||||
return $ anchor ! [href ("#fn" ++ ref),
|
||||
theclass "footnoteRef",
|
||||
identifier ("fnref" ++ ref)] << sup << ref
|
||||
return $ obfuscateLink opts str src
|
||||
(Link txt (src,tit)) | "mailto:" `isPrefixOf` src -> do
|
||||
linkText <- inlineListToHtml opts txt
|
||||
return $ obfuscateLink opts (show linkText) src
|
||||
(Link txt (src,tit)) -> do
|
||||
linkText <- inlineListToHtml opts txt
|
||||
return $ anchor ! ([href src] ++
|
||||
if null tit then [] else [title tit]) $
|
||||
linkText
|
||||
(Image txt (source,tit)) -> do
|
||||
alternate <- inlineListToHtml opts txt
|
||||
let alternate' = renderHtmlFragment alternate
|
||||
let attributes = [src source, title tit] ++
|
||||
if null txt
|
||||
then []
|
||||
else [alt alternate']
|
||||
return $ image ! attributes
|
||||
-- note: null title included, as in Markdown.pl
|
||||
(Note contents) -> do
|
||||
st <- get
|
||||
let notes = stNotes st
|
||||
let number = (length notes) + 1
|
||||
let ref = show number
|
||||
htmlContents <- blockListToNote opts ref contents
|
||||
-- push contents onto front of notes
|
||||
put $ st {stNotes = (htmlContents:notes)}
|
||||
return $ anchor ! [href ("#fn" ++ ref),
|
||||
theclass "footnoteRef",
|
||||
identifier ("fnref" ++ ref)] <<
|
||||
sup << ref
|
||||
|
||||
blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html
|
||||
blockListToNote opts ref blocks =
|
||||
|
@ -434,6 +441,6 @@ blockListToNote opts ref blocks =
|
|||
[Plain (lst ++ backlink)]
|
||||
_ -> otherBlocks ++ [lastBlock,
|
||||
Plain backlink]
|
||||
in do contents <- blockListToHtml opts blocks'
|
||||
return $ li ! [identifier ("fn" ++ ref)] $ contents
|
||||
in do contents <- blockListToHtml opts blocks'
|
||||
return $ li ! [identifier ("fn" ++ ref)] $ contents
|
||||
|
||||
|
|
|
@ -27,16 +27,14 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
|
||||
Conversion of 'Pandoc' format into LaTeX.
|
||||
-}
|
||||
module Text.Pandoc.Writers.LaTeX (
|
||||
writeLaTeX
|
||||
) where
|
||||
module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Printf ( printf )
|
||||
import Data.List ( (\\), isInfixOf )
|
||||
import Data.Char ( toLower )
|
||||
import qualified Data.Set as S
|
||||
import Control.Monad.State
|
||||
import Data.Char ( toLower )
|
||||
|
||||
data WriterState =
|
||||
WriterState { stIncludes :: S.Set String -- strings to include in header
|
||||
|
@ -77,16 +75,16 @@ latexHeader :: WriterOptions -- ^ Options, including LaTeX header
|
|||
-> Meta -- ^ Meta with bibliographic information
|
||||
-> State WriterState String
|
||||
latexHeader options (Meta title authors date) = do
|
||||
titletext <- if null title
|
||||
then return ""
|
||||
else do title' <- inlineListToLaTeX title
|
||||
return $ "\\title{" ++ title' ++ "}\n"
|
||||
extras <- get >>= (return . unlines . S.toList. stIncludes)
|
||||
titletext <- if null title
|
||||
then return ""
|
||||
else do title' <- inlineListToLaTeX title
|
||||
return $ "\\title{" ++ title' ++ "}\n"
|
||||
extras <- get >>= (return . unlines . S.toList. stIncludes)
|
||||
let verbatim = if "\\usepackage{fancyvrb}" `isInfixOf` extras
|
||||
then "\\VerbatimFootnotes % allows verbatim text in footnotes\n"
|
||||
else ""
|
||||
let authorstext = "\\author{" ++ (joinWithSep "\\\\"
|
||||
(map stringToLaTeX authors)) ++ "}\n"
|
||||
let authorstext = "\\author{" ++
|
||||
joinWithSep "\\\\" (map stringToLaTeX authors) ++ "}\n"
|
||||
let datetext = if date == ""
|
||||
then ""
|
||||
else "\\date{" ++ stringToLaTeX date ++ "}\n"
|
||||
|
@ -124,8 +122,8 @@ deVerb (other:rest) = other:(deVerb rest)
|
|||
blockToLaTeX :: Block -- ^ Block to convert
|
||||
-> State WriterState String
|
||||
blockToLaTeX Null = return ""
|
||||
blockToLaTeX (Plain lst) = (inlineListToLaTeX lst) >>= (return . (++ "\n"))
|
||||
blockToLaTeX (Para lst) = (inlineListToLaTeX lst) >>= (return . (++ "\n\n"))
|
||||
blockToLaTeX (Plain lst) = inlineListToLaTeX lst >>= return . (++ "\n")
|
||||
blockToLaTeX (Para lst) = inlineListToLaTeX lst >>= return . (++ "\n\n")
|
||||
blockToLaTeX (BlockQuote lst) = do
|
||||
contents <- blockListToLaTeX lst
|
||||
return $ "\\begin{quote}\n" ++ contents ++ "\\end{quote}\n"
|
||||
|
@ -184,22 +182,22 @@ blockToLaTeX (Table caption aligns widths heads rows) = do
|
|||
colWidths aligns
|
||||
let tableBody = "\\begin{tabular}{" ++ colDescriptors ++ "}\n" ++
|
||||
headers ++ "\\hline\n" ++ concat rows' ++ "\\end{tabular}\n"
|
||||
let centered str = "\\begin{center}\n" ++ str ++ "\\end{center}\n"
|
||||
let centered str = "\\begin{center}\n" ++ str ++ "\\end{center}\n"
|
||||
addToHeader "\\usepackage{array}\n\
|
||||
\% This is needed because raggedright in table elements redefines \\\\:\n\
|
||||
\\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\
|
||||
\\\let\\PBS=\\PreserveBackslash"
|
||||
\% This is needed because raggedright in table elements redefines \\\\:\n\
|
||||
\\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\
|
||||
\\\let\\PBS=\\PreserveBackslash"
|
||||
return $ if null captionText
|
||||
then centered tableBody ++ "\n"
|
||||
else "\\begin{table}[h]\n" ++ centered tableBody ++ "\\caption{" ++
|
||||
captionText ++ "}\n" ++ "\\end{table}\n\n"
|
||||
else "\\begin{table}[h]\n" ++ centered tableBody ++
|
||||
"\\caption{" ++ captionText ++ "}\n" ++ "\\end{table}\n\n"
|
||||
|
||||
blockListToLaTeX lst = mapM blockToLaTeX lst >>= (return . concat)
|
||||
blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . concat
|
||||
|
||||
tableRowToLaTeX cols =
|
||||
mapM blockListToLaTeX cols >>= (return . (++ "\\\\\n") . (joinWithSep " & "))
|
||||
mapM blockListToLaTeX cols >>= return . (++ "\\\\\n") . (joinWithSep " & ")
|
||||
|
||||
listItemToLaTeX lst = blockListToLaTeX lst >>= (return . ("\\item "++))
|
||||
listItemToLaTeX lst = blockListToLaTeX lst >>= return . ("\\item "++)
|
||||
|
||||
defListItemToLaTeX (term, def) = do
|
||||
term' <- inlineListToLaTeX $ deVerb term
|
||||
|
@ -209,8 +207,7 @@ defListItemToLaTeX (term, def) = do
|
|||
-- | Convert list of inline elements to LaTeX.
|
||||
inlineListToLaTeX :: [Inline] -- ^ Inlines to convert
|
||||
-> State WriterState String
|
||||
inlineListToLaTeX lst =
|
||||
mapM inlineToLaTeX lst >>= (return . concat)
|
||||
inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . concat
|
||||
|
||||
isQuoted :: Inline -> Bool
|
||||
isQuoted (Quoted _ _) = True
|
||||
|
|
|
@ -28,14 +28,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
Conversion of 'Pandoc' documents to groff man page format.
|
||||
|
||||
-}
|
||||
module Text.Pandoc.Writers.Man (
|
||||
writeMan
|
||||
) where
|
||||
module Text.Pandoc.Writers.Man ( writeMan) where
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Printf ( printf )
|
||||
import Data.Char ( toUpper )
|
||||
import Data.List ( group, isPrefixOf, drop, find, nub, intersperse )
|
||||
import Data.List ( isPrefixOf, drop, nub, intersperse )
|
||||
import Text.PrettyPrint.HughesPJ hiding ( Str )
|
||||
import Control.Monad.State
|
||||
|
||||
|
@ -45,16 +42,15 @@ type WriterState = (Notes, Preprocessors)
|
|||
|
||||
-- | Convert Pandoc to Man.
|
||||
writeMan :: WriterOptions -> Pandoc -> String
|
||||
writeMan opts document =
|
||||
render $ evalState (pandocToMan opts document) ([],[])
|
||||
writeMan opts document = render $ evalState (pandocToMan opts document) ([],[])
|
||||
|
||||
-- | Return groff man representation of document.
|
||||
pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc
|
||||
pandocToMan opts (Pandoc meta blocks) = do
|
||||
let before = writerIncludeBefore opts
|
||||
let after = writerIncludeAfter opts
|
||||
before' = if null before then empty else text before
|
||||
after' = if null after then empty else text after
|
||||
let before' = if null before then empty else text before
|
||||
let after' = if null after then empty else text after
|
||||
(head, foot) <- metaToMan opts meta
|
||||
body <- blockListToMan opts blocks
|
||||
(notes, preprocessors) <- get
|
||||
|
@ -84,8 +80,8 @@ metaToMan options (Meta title authors date) = do
|
|||
1 -> text ".SH AUTHOR" $$ (text $ joinWithSep ", " authors)
|
||||
2 -> text ".SH AUTHORS" $$ (text $ joinWithSep ", " authors)
|
||||
return $ if writerStandalone options
|
||||
then (head, foot)
|
||||
else (empty, empty)
|
||||
then (head, foot)
|
||||
else (empty, empty)
|
||||
|
||||
-- | Return man representation of notes.
|
||||
notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc
|
||||
|
@ -93,7 +89,7 @@ notesToMan opts notes =
|
|||
if null notes
|
||||
then return empty
|
||||
else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>=
|
||||
(return . (text ".SH NOTES" $$) . vcat)
|
||||
return . (text ".SH NOTES" $$) . vcat
|
||||
|
||||
-- | Return man representation of a note.
|
||||
noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc
|
||||
|
@ -110,8 +106,7 @@ wrappedMan opts sect = do
|
|||
|
||||
-- | Association list of characters to escape.
|
||||
manEscapes :: [(Char, String)]
|
||||
manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++
|
||||
backslashEscapes "\".@\\"
|
||||
manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "\".@\\"
|
||||
|
||||
-- | Escape special characters for Man.
|
||||
escapeString :: String -> String
|
||||
|
@ -140,8 +135,7 @@ blockToMan opts (Header level inlines) = do
|
|||
return $ text heading <> contents
|
||||
blockToMan opts (CodeBlock str) = return $
|
||||
text ".PP" $$ text "\\f[CR]" $$
|
||||
text ((unlines . map (" " ++) . lines) (escapeCode str)) <>
|
||||
text "\\f[]"
|
||||
text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]"
|
||||
blockToMan opts (BlockQuote blocks) = do
|
||||
contents <- blockListToMan opts blocks
|
||||
return $ text ".RS" $$ contents $$ text ".RE"
|
||||
|
|
|
@ -29,9 +29,7 @@ Conversion of 'Pandoc' documents to markdown-formatted plain text.
|
|||
|
||||
Markdown: <http://daringfireball.net/projects/markdown/>
|
||||
-}
|
||||
module Text.Pandoc.Writers.Markdown (
|
||||
writeMarkdown
|
||||
) where
|
||||
module Text.Pandoc.Writers.Markdown ( writeMarkdown) where
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.Blocks
|
||||
|
@ -53,10 +51,10 @@ pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc
|
|||
pandocToMarkdown opts (Pandoc meta blocks) = do
|
||||
let before = writerIncludeBefore opts
|
||||
let after = writerIncludeAfter opts
|
||||
before' = if null before then empty else text before
|
||||
after' = if null after then empty else text after
|
||||
let before' = if null before then empty else text before
|
||||
let after' = if null after then empty else text after
|
||||
metaBlock <- metaToMarkdown opts meta
|
||||
let head = if (writerStandalone opts)
|
||||
let head = if writerStandalone opts
|
||||
then metaBlock $+$ text (writerHeader opts)
|
||||
else empty
|
||||
let headerBlocks = filter isHeaderBlock blocks
|
||||
|
@ -73,8 +71,7 @@ pandocToMarkdown opts (Pandoc meta blocks) = do
|
|||
|
||||
-- | Return markdown representation of reference key table.
|
||||
keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc
|
||||
keyTableToMarkdown opts refs =
|
||||
mapM (keyToMarkdown opts) refs >>= (return . vcat)
|
||||
keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
|
||||
|
||||
-- | Return markdown representation of a reference key.
|
||||
keyToMarkdown :: WriterOptions
|
||||
|
@ -90,7 +87,7 @@ keyToMarkdown opts (label, (src, tit)) = do
|
|||
notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc
|
||||
notesToMarkdown opts notes =
|
||||
mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>=
|
||||
(return . vcat)
|
||||
return . vcat
|
||||
|
||||
-- | Return markdown representation of a note.
|
||||
noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc
|
||||
|
@ -143,8 +140,7 @@ tableOfContents opts headers =
|
|||
-- | Converts an Element to a list item for a table of contents,
|
||||
elementToListItem :: Element -> [Block]
|
||||
elementToListItem (Blk _) = []
|
||||
elementToListItem (Sec headerText subsecs) =
|
||||
[Plain headerText] ++
|
||||
elementToListItem (Sec headerText subsecs) = [Plain headerText] ++
|
||||
if null subsecs
|
||||
then []
|
||||
else [BulletList $ map elementToListItem subsecs]
|
||||
|
@ -184,9 +180,8 @@ blockToMarkdown opts (Table caption aligns widths headers rows) = do
|
|||
let makeRow = hsepBlocks . (zipWith alignHeader aligns) .
|
||||
(zipWith docToBlock widthsInChars)
|
||||
let head = makeRow headers'
|
||||
rows' <- mapM (\row -> do
|
||||
cols <- mapM (blockListToMarkdown opts) row
|
||||
return $ makeRow cols) rows
|
||||
rows' <- mapM (\row -> do cols <- mapM (blockListToMarkdown opts) row
|
||||
return $ makeRow cols) rows
|
||||
let tableWidth = sum widthsInChars
|
||||
let maxRowHeight = maximum $ map heightOfBlock (head:rows')
|
||||
let isMultilineTable = maxRowHeight > 1
|
||||
|
@ -208,8 +203,7 @@ blockToMarkdown opts (OrderedList attribs items) = do
|
|||
let markers = orderedListMarkers attribs
|
||||
let markers' = map (\m -> if length m < 3
|
||||
then m ++ replicate (3 - length m) ' '
|
||||
else m)
|
||||
markers
|
||||
else m) markers
|
||||
contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
|
||||
zip markers' items
|
||||
return $ (vcat contents) <> text "\n"
|
||||
|
@ -241,8 +235,8 @@ definitionListItemToMarkdown opts (label, items) = do
|
|||
let tabStop = writerTabStop opts
|
||||
let leader = char ':'
|
||||
contents <- mapM (\item -> blockToMarkdown opts item >>=
|
||||
(\txt -> return (leader $$ nest tabStop txt)))
|
||||
items >>= (return . vcat)
|
||||
(\txt -> return (leader $$ nest tabStop txt)))
|
||||
items >>= return . vcat
|
||||
return $ labelText $+$ contents
|
||||
|
||||
-- | Convert list of Pandoc block elements to markdown.
|
||||
|
@ -250,29 +244,30 @@ blockListToMarkdown :: WriterOptions -- ^ Options
|
|||
-> [Block] -- ^ List of block elements
|
||||
-> State WriterState Doc
|
||||
blockListToMarkdown opts blocks =
|
||||
mapM (blockToMarkdown opts) blocks >>= (return . vcat)
|
||||
mapM (blockToMarkdown opts) blocks >>= return . vcat
|
||||
|
||||
-- | Get reference for target; if none exists, create unique one and return.
|
||||
-- Prefer label if possible; otherwise, generate a unique key.
|
||||
getReference :: [Inline] -> Target -> State WriterState [Inline]
|
||||
getReference label (src, tit) = do
|
||||
(_,refs) <- get
|
||||
case find ((== (src, tit)) . snd) refs of
|
||||
Just (ref, _) -> return ref
|
||||
Nothing -> do
|
||||
let label' = case find ((== label) . fst) refs of
|
||||
Just _ -> -- label is used; generate numerical label
|
||||
case find (\n -> not (any (== [Str (show n)])
|
||||
(map fst refs))) [1..10000] of
|
||||
Just x -> [Str (show x)]
|
||||
Nothing -> error "no unique label"
|
||||
Nothing -> label
|
||||
modify (\(notes, refs) -> (notes, (label', (src,tit)):refs))
|
||||
return label'
|
||||
(_,refs) <- get
|
||||
case find ((== (src, tit)) . snd) refs of
|
||||
Just (ref, _) -> return ref
|
||||
Nothing -> do
|
||||
let label' = case find ((== label) . fst) refs of
|
||||
Just _ -> -- label is used; generate numerical label
|
||||
case find (\n -> not (any (== [Str (show n)])
|
||||
(map fst refs))) [1..10000] of
|
||||
Just x -> [Str (show x)]
|
||||
Nothing -> error "no unique label"
|
||||
Nothing -> label
|
||||
modify (\(notes, refs) -> (notes, (label', (src,tit)):refs))
|
||||
return label'
|
||||
|
||||
-- | Convert list of Pandoc inline elements to markdown.
|
||||
inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
|
||||
inlineListToMarkdown opts lst = mapM (inlineToMarkdown opts) lst >>= (return . hcat)
|
||||
inlineListToMarkdown opts lst =
|
||||
mapM (inlineToMarkdown opts) lst >>= return . hcat
|
||||
|
||||
-- | Convert Pandoc inline element to markdown.
|
||||
inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
|
||||
|
@ -327,13 +322,13 @@ inlineToMarkdown opts (Link txt (src, tit)) = do
|
|||
return $ if useAuto
|
||||
then char '<' <> text srcSuffix <> char '>'
|
||||
else if useRefLinks
|
||||
then let first = char '[' <> linktext <> char ']'
|
||||
second = if txt == ref
|
||||
then text "[]"
|
||||
else char '[' <> reftext <> char ']'
|
||||
in first <> second
|
||||
else char '[' <> linktext <> char ']' <>
|
||||
char '(' <> text src <> linktitle <> char ')'
|
||||
then let first = char '[' <> linktext <> char ']'
|
||||
second = if txt == ref
|
||||
then text "[]"
|
||||
else char '[' <> reftext <> char ']'
|
||||
in first <> second
|
||||
else char '[' <> linktext <> char ']' <>
|
||||
char '(' <> text src <> linktitle <> char ')'
|
||||
inlineToMarkdown opts (Image alternate (source, tit)) = do
|
||||
let txt = if (null alternate) || (alternate == [Str ""]) ||
|
||||
(alternate == [Str source]) -- to prevent autolinks
|
||||
|
|
|
@ -29,13 +29,11 @@ Conversion of 'Pandoc' documents to reStructuredText.
|
|||
|
||||
reStructuredText: <http://docutils.sourceforge.net/rst.html>
|
||||
-}
|
||||
module Text.Pandoc.Writers.RST (
|
||||
writeRST
|
||||
) where
|
||||
module Text.Pandoc.Writers.RST ( writeRST) where
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.Blocks
|
||||
import Data.List ( group, isPrefixOf, drop, find, intersperse )
|
||||
import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse )
|
||||
import Text.PrettyPrint.HughesPJ hiding ( Str )
|
||||
import Control.Monad.State
|
||||
|
||||
|
@ -70,8 +68,7 @@ pandocToRST opts (Pandoc meta blocks) = do
|
|||
|
||||
-- | Return RST representation of reference key table.
|
||||
keyTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
|
||||
keyTableToRST opts refs =
|
||||
mapM (keyToRST opts) refs >>= (return . vcat)
|
||||
keyTableToRST opts refs = mapM (keyToRST opts) refs >>= return . vcat
|
||||
|
||||
-- | Return RST representation of a reference key.
|
||||
keyToRST :: WriterOptions
|
||||
|
@ -85,7 +82,7 @@ keyToRST opts (label, (src, tit)) = do
|
|||
notesToRST :: WriterOptions -> [[Block]] -> State WriterState Doc
|
||||
notesToRST opts notes =
|
||||
mapM (\(num, note) -> noteToRST opts num note) (zip [1..] notes) >>=
|
||||
(return . vcat)
|
||||
return . vcat
|
||||
|
||||
-- | Return RST representation of a note.
|
||||
noteToRST :: WriterOptions -> Int -> [Block] -> State WriterState Doc
|
||||
|
@ -96,8 +93,7 @@ noteToRST opts num note = do
|
|||
|
||||
-- | Return RST representation of picture reference table.
|
||||
pictTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
|
||||
pictTableToRST opts refs =
|
||||
mapM (pictToRST opts) refs >>= (return . vcat)
|
||||
pictTableToRST opts refs = mapM (pictToRST opts) refs >>= return . vcat
|
||||
|
||||
-- | Return RST representation of a picture substitution reference.
|
||||
pictToRST :: WriterOptions
|
||||
|
@ -112,7 +108,7 @@ pictToRST opts (label, (src, _)) = do
|
|||
wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc
|
||||
wrappedRST opts inlines =
|
||||
mapM (wrappedRSTSection opts) (splitBy LineBreak inlines) >>=
|
||||
(return . vcat)
|
||||
return . vcat
|
||||
|
||||
wrappedRSTSection :: WriterOptions -> [Inline] -> State WriterState Doc
|
||||
wrappedRSTSection opts sect = do
|
||||
|
@ -160,21 +156,19 @@ blockToRST :: WriterOptions -- ^ Options
|
|||
blockToRST opts Null = return empty
|
||||
blockToRST opts (Plain inlines) = wrappedRST opts inlines
|
||||
blockToRST opts (Para [TeX str]) =
|
||||
let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in
|
||||
return $ hang (text "\n.. raw:: latex\n") 3
|
||||
(vcat $ map text (lines str'))
|
||||
let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
|
||||
return $ hang (text "\n.. raw:: latex\n") 3 $ vcat $ map text (lines str')
|
||||
blockToRST opts (Para inlines) = do
|
||||
contents <- wrappedRST opts inlines
|
||||
return $ contents <> text "\n"
|
||||
blockToRST opts (RawHtml str) =
|
||||
let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in
|
||||
return $ hang (text "\n.. raw:: html\n") 3
|
||||
(vcat $ map text (lines str'))
|
||||
let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
|
||||
return $ hang (text "\n.. raw:: html\n") 3 $ vcat $ map text (lines str')
|
||||
blockToRST opts HorizontalRule = return $ text "--------------\n"
|
||||
blockToRST opts (Header level inlines) = do
|
||||
contents <- inlineListToRST opts inlines
|
||||
let headerLength = length $ render contents
|
||||
let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1)
|
||||
let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
|
||||
let border = text $ replicate headerLength headerChar
|
||||
return $ contents $+$ border <> text "\n"
|
||||
blockToRST opts (CodeBlock str) = return $ (text "::\n") $+$
|
||||
|
@ -200,11 +194,10 @@ blockToRST opts (Table caption aligns widths headers rows) = do
|
|||
beg = TextBlock 2 height (replicate height "| ")
|
||||
end = TextBlock 2 height (replicate height " |")
|
||||
middle = hcatBlocks $ intersperse sep blocks
|
||||
let makeRow = hpipeBlocks . (zipWith docToBlock widthsInChars)
|
||||
let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars
|
||||
let head = makeRow headers'
|
||||
rows' <- mapM (\row -> do
|
||||
cols <- mapM (blockListToRST opts) row
|
||||
return $ makeRow cols) rows
|
||||
rows' <- mapM (\row -> do cols <- mapM (blockListToRST opts) row
|
||||
return $ makeRow cols) rows
|
||||
let tableWidth = sum widthsInChars
|
||||
let maxRowHeight = maximum $ map heightOfBlock (head:rows')
|
||||
let border ch = char '+' <> char ch <>
|
||||
|
@ -225,8 +218,7 @@ blockToRST opts (OrderedList (start, style, delim) items) = do
|
|||
(start, style, delim)
|
||||
let maxMarkerLength = maximum $ map length markers
|
||||
let markers' = map (\m -> let s = maxMarkerLength - length m
|
||||
in m ++ replicate s ' ')
|
||||
markers
|
||||
in m ++ replicate s ' ') markers
|
||||
contents <- mapM (\(item, num) -> orderedListItemToRST opts item num) $
|
||||
zip markers' items
|
||||
-- ensure that sublists have preceding blank line
|
||||
|
@ -262,11 +254,11 @@ blockListToRST :: WriterOptions -- ^ Options
|
|||
-> [Block] -- ^ List of block elements
|
||||
-> State WriterState Doc
|
||||
blockListToRST opts blocks =
|
||||
mapM (blockToRST opts) blocks >>= (return . vcat)
|
||||
mapM (blockToRST opts) blocks >>= return . vcat
|
||||
|
||||
-- | Convert list of Pandoc inline elements to RST.
|
||||
inlineListToRST :: WriterOptions -> [Inline] -> State WriterState Doc
|
||||
inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= (return . hcat)
|
||||
inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= return . hcat
|
||||
|
||||
-- | Convert Pandoc inline element to RST.
|
||||
inlineToRST :: WriterOptions -> Inline -> State WriterState Doc
|
||||
|
@ -319,8 +311,8 @@ inlineToRST opts (Link txt (src, tit)) = do
|
|||
inlineToRST opts (Image alternate (source, tit)) = do
|
||||
(notes, refs, pics) <- get
|
||||
let labelsUsed = map fst pics
|
||||
let txt = if (null alternate) || (alternate == [Str ""]) ||
|
||||
(alternate `elem` labelsUsed)
|
||||
let txt = if null alternate || alternate == [Str ""] ||
|
||||
alternate `elem` labelsUsed
|
||||
then [Str $ "image" ++ show (length refs)]
|
||||
else alternate
|
||||
let pics' = if (txt, (source, tit)) `elem` pics
|
||||
|
|
|
@ -27,12 +27,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
|
||||
Conversion of 'Pandoc' documents to RTF (rich text format).
|
||||
-}
|
||||
module Text.Pandoc.Writers.RTF ( writeRTF) where
|
||||
module Text.Pandoc.Writers.RTF ( writeRTF ) where
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Regex ( matchRegexAll, mkRegex )
|
||||
import Data.List ( isSuffixOf )
|
||||
import Data.Char ( ord, chr )
|
||||
import Data.Char ( ord )
|
||||
|
||||
-- | Convert Pandoc to a string in rich text format.
|
||||
writeRTF :: WriterOptions -> Pandoc -> String
|
||||
|
@ -44,22 +44,22 @@ writeRTF options (Pandoc meta blocks) =
|
|||
then tableOfContents $ filter isHeaderBlock blocks
|
||||
else ""
|
||||
foot = if writerStandalone options then "\n}\n" else ""
|
||||
body = (writerIncludeBefore options) ++
|
||||
body = writerIncludeBefore options ++
|
||||
concatMap (blockToRTF 0 AlignDefault) blocks ++
|
||||
(writerIncludeAfter options) in
|
||||
head ++ toc ++ body ++ foot
|
||||
writerIncludeAfter options
|
||||
in head ++ toc ++ body ++ foot
|
||||
|
||||
-- | Construct table of contents from list of header blocks.
|
||||
tableOfContents :: [Block] -> String
|
||||
tableOfContents headers =
|
||||
let contentsTree = hierarchicalize headers
|
||||
in concatMap (blockToRTF 0 AlignDefault) $ [Header 1 [Str "Contents"],
|
||||
BulletList (map elementToListItem contentsTree)]
|
||||
in concatMap (blockToRTF 0 AlignDefault) $
|
||||
[Header 1 [Str "Contents"],
|
||||
BulletList (map elementToListItem contentsTree)]
|
||||
|
||||
elementToListItem :: Element -> [Block]
|
||||
elementToListItem (Blk _) = []
|
||||
elementToListItem (Sec sectext subsecs) =
|
||||
[Plain sectext] ++
|
||||
elementToListItem (Sec sectext subsecs) = [Plain sectext] ++
|
||||
if null subsecs
|
||||
then []
|
||||
else [BulletList (map elementToListItem subsecs)]
|
||||
|
@ -67,10 +67,10 @@ elementToListItem (Sec sectext subsecs) =
|
|||
-- | Convert unicode characters (> 127) into rich text format representation.
|
||||
handleUnicode :: String -> String
|
||||
handleUnicode [] = []
|
||||
handleUnicode (c:cs) = if (ord c) > 127
|
||||
then '\\':'u':(show (ord c)) ++ "?" ++
|
||||
(handleUnicode cs)
|
||||
else c:(handleUnicode cs)
|
||||
handleUnicode (c:cs) =
|
||||
if ord c > 127
|
||||
then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs
|
||||
else c:(handleUnicode cs)
|
||||
|
||||
-- | Escape special characters.
|
||||
escapeSpecial :: String -> String
|
||||
|
@ -127,7 +127,7 @@ listIncrement = 360
|
|||
|
||||
-- | Returns appropriate bullet list marker for indent level.
|
||||
bulletMarker :: Int -> String
|
||||
bulletMarker indent = case (indent `mod` 720) of
|
||||
bulletMarker indent = case indent `mod` 720 of
|
||||
0 -> "\\bullet "
|
||||
otherwise -> "\\endash "
|
||||
|
||||
|
@ -135,7 +135,7 @@ bulletMarker indent = case (indent `mod` 720) of
|
|||
orderedMarkers :: Int -> ListAttributes -> [String]
|
||||
orderedMarkers indent (start, style, delim) =
|
||||
if style == DefaultStyle && delim == DefaultDelim
|
||||
then case (indent `mod` 720) of
|
||||
then case indent `mod` 720 of
|
||||
0 -> orderedListMarkers (start, Decimal, Period)
|
||||
otherwise -> orderedListMarkers (start, LowerAlpha, Period)
|
||||
else orderedListMarkers (start, style, delim)
|
||||
|
@ -145,21 +145,21 @@ rtfHeader :: String -- ^ header text
|
|||
-> Meta -- ^ bibliographic information
|
||||
-> String
|
||||
rtfHeader headerText (Meta title authors date) =
|
||||
let titletext = if null title
|
||||
let titletext = if null title
|
||||
then ""
|
||||
else rtfPar 0 0 AlignCenter $
|
||||
"\\b \\fs36 " ++ inlineListToRTF title
|
||||
authorstext = if null authors
|
||||
then ""
|
||||
else rtfPar 0 0 AlignCenter ("\\b \\fs36 " ++
|
||||
inlineListToRTF title)
|
||||
authorstext = if null authors
|
||||
then ""
|
||||
else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\"
|
||||
(map stringToRTF authors)))
|
||||
datetext = if date == ""
|
||||
then ""
|
||||
else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in
|
||||
let spacer = if null (titletext ++ authorstext ++ datetext)
|
||||
else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\" $
|
||||
map stringToRTF authors))
|
||||
datetext = if date == ""
|
||||
then ""
|
||||
else rtfPar 0 0 AlignDefault "" in
|
||||
headerText ++ titletext ++ authorstext ++ datetext ++ spacer
|
||||
else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in
|
||||
let spacer = if null (titletext ++ authorstext ++ datetext)
|
||||
then ""
|
||||
else rtfPar 0 0 AlignDefault "" in
|
||||
headerText ++ titletext ++ authorstext ++ datetext ++ spacer
|
||||
|
||||
-- | Convert Pandoc block element to RTF.
|
||||
blockToRTF :: Int -- ^ indent level
|
||||
|
@ -168,31 +168,27 @@ blockToRTF :: Int -- ^ indent level
|
|||
-> String
|
||||
blockToRTF _ _ Null = ""
|
||||
blockToRTF indent alignment (Plain lst) =
|
||||
rtfCompact indent 0 alignment (inlineListToRTF lst)
|
||||
rtfCompact indent 0 alignment $ inlineListToRTF lst
|
||||
blockToRTF indent alignment (Para lst) =
|
||||
rtfPar indent 0 alignment (inlineListToRTF lst)
|
||||
rtfPar indent 0 alignment $ inlineListToRTF lst
|
||||
blockToRTF indent alignment (BlockQuote lst) =
|
||||
concatMap (blockToRTF (indent + indentIncrement) alignment) lst
|
||||
blockToRTF indent _ (CodeBlock str) =
|
||||
rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str))
|
||||
blockToRTF _ _ (RawHtml str) = ""
|
||||
blockToRTF indent alignment (BulletList lst) =
|
||||
spaceAtEnd $
|
||||
blockToRTF indent alignment (BulletList lst) = spaceAtEnd $
|
||||
concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst
|
||||
blockToRTF indent alignment (OrderedList attribs lst) =
|
||||
spaceAtEnd $ concat $
|
||||
blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $
|
||||
zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst
|
||||
blockToRTF indent alignment (DefinitionList lst) =
|
||||
spaceAtEnd $
|
||||
blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $
|
||||
concatMap (definitionListItemToRTF alignment indent) lst
|
||||
blockToRTF indent _ HorizontalRule =
|
||||
rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash"
|
||||
blockToRTF indent alignment (Header level lst) =
|
||||
rtfPar indent 0 alignment ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++
|
||||
(inlineListToRTF lst))
|
||||
blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $
|
||||
"\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst
|
||||
blockToRTF indent alignment (Table caption aligns sizes headers rows) =
|
||||
(tableRowToRTF True indent aligns sizes headers) ++ (concatMap
|
||||
(tableRowToRTF False indent aligns sizes) rows) ++
|
||||
tableRowToRTF True indent aligns sizes headers ++
|
||||
concatMap (tableRowToRTF False indent aligns sizes) rows ++
|
||||
rtfPar indent 0 alignment (inlineListToRTF caption)
|
||||
|
||||
tableRowToRTF :: Bool -> Int -> [Alignment] -> [Float] -> [[Block]] -> String
|
||||
|
@ -201,8 +197,10 @@ tableRowToRTF header indent aligns sizes cols =
|
|||
totalTwips = 6 * 1440 -- 6 inches
|
||||
rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips))
|
||||
0 sizes
|
||||
cellDefs = map (\edge -> (if header then "\\clbrdrb\\brdrs"
|
||||
else "") ++ "\\cellx" ++ show edge) rightEdges
|
||||
cellDefs = map (\edge -> (if header
|
||||
then "\\clbrdrb\\brdrs"
|
||||
else "") ++ "\\cellx" ++ show edge)
|
||||
rightEdges
|
||||
start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++
|
||||
"\\trkeep\\intbl\n{\n"
|
||||
end = "}\n\\intbl\\row}\n"
|
||||
|
@ -234,11 +232,12 @@ listItemToRTF alignment indent marker list =
|
|||
let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list in
|
||||
-- insert the list marker into the (processed) first block
|
||||
let modFirst = case matchRegexAll (mkRegex "\\\\fi-?[0-9]+") first of
|
||||
Just (before, matched, after, _) -> before ++ "\\fi" ++
|
||||
show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++
|
||||
show listIncrement ++ "\\tab" ++ after
|
||||
Just (before, matched, after, _) ->
|
||||
before ++ "\\fi" ++ show (0 - listIncrement) ++
|
||||
" " ++ marker ++ "\\tx" ++
|
||||
show listIncrement ++ "\\tab" ++ after
|
||||
Nothing -> first in
|
||||
modFirst ++ (concat rest)
|
||||
modFirst ++ concat rest
|
||||
|
||||
-- | Convert definition list item (label, list of blocks) to RTF.
|
||||
definitionListItemToRTF :: Alignment -- ^ alignment
|
||||
|
@ -285,4 +284,3 @@ inlineToRTF (Image alternate (source, tit)) =
|
|||
inlineToRTF (Note contents) =
|
||||
"{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
|
||||
(concatMap (blockToRTF 0 AlignDefault) contents) ++ "}"
|
||||
|
||||
|
|
|
@ -1,4 +1,32 @@
|
|||
-- | Default headers for Pandoc writers.
|
||||
{-
|
||||
Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
-}
|
||||
|
||||
{- |
|
||||
Module : Text.Pandoc.Writers.DefaultHeaders
|
||||
Copyright : Copyright (C) 2006-7 John MacFarlane
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
Default headers for Pandoc writers.
|
||||
-}
|
||||
module Text.Pandoc.Writers.DefaultHeaders (
|
||||
defaultLaTeXHeader,
|
||||
defaultConTeXtHeader,
|
||||
|
|
|
@ -1,5 +1,33 @@
|
|||
-- | Definitions for creation of S5 powerpoint-like HTML.
|
||||
-- (See <http://meyerweb.com/eric/tools/s5/>.)
|
||||
{-
|
||||
Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
-}
|
||||
|
||||
{- |
|
||||
Module : Text.Pandoc.Writers.S5
|
||||
Copyright : Copyright (C) 2006-7 John MacFarlane
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
Definitions for creation of S5 powerpoint-like HTML.
|
||||
(See <http://meyerweb.com/eric/tools/s5/>.)
|
||||
-}
|
||||
module Text.Pandoc.Writers.S5 (
|
||||
-- * Strings
|
||||
s5Meta,
|
||||
|
@ -60,13 +88,13 @@ layoutDiv :: [Inline] -- ^ Title of document (for header or footer)
|
|||
-> [Block] -- ^ List of block elements returned
|
||||
layoutDiv title date = [(RawHtml "<div class=\"layout\">\n<div id=\"controls\"></div>\n<div id=\"currentSlide\"></div>\n<div id=\"header\"></div>\n<div id=\"footer\">\n"), (Header 1 [Str date]), (Header 2 title), (RawHtml "</div>\n</div>\n")]
|
||||
|
||||
presentationStart = (RawHtml "<div class=\"presentation\">\n\n")
|
||||
presentationStart = RawHtml "<div class=\"presentation\">\n\n"
|
||||
|
||||
presentationEnd = (RawHtml "</div>\n")
|
||||
presentationEnd = RawHtml "</div>\n"
|
||||
|
||||
slideStart = (RawHtml "<div class=\"slide\">\n")
|
||||
slideStart = RawHtml "<div class=\"slide\">\n"
|
||||
|
||||
slideEnd = (RawHtml "</div>\n")
|
||||
slideEnd = RawHtml "</div>\n"
|
||||
|
||||
-- | Returns 'True' if block is a Header 1.
|
||||
isH1 :: Block -> Bool
|
||||
|
@ -84,15 +112,22 @@ insertSlides beginning blocks =
|
|||
beforeHead ++ [slideEnd]
|
||||
else
|
||||
if beginning then
|
||||
beforeHead ++ slideStart:(head rest):(insertSlides False (tail rest))
|
||||
beforeHead ++
|
||||
slideStart:(head rest):(insertSlides False (tail rest))
|
||||
else
|
||||
beforeHead ++ slideEnd:slideStart:(head rest):(insertSlides False (tail rest))
|
||||
beforeHead ++
|
||||
slideEnd:slideStart:(head rest):(insertSlides False (tail rest))
|
||||
|
||||
-- | Insert blocks into 'Pandoc' for slide structure.
|
||||
insertS5Structure :: Pandoc -> Pandoc
|
||||
insertS5Structure (Pandoc meta []) = Pandoc meta []
|
||||
insertS5Structure (Pandoc (Meta title authors date) blocks) =
|
||||
let slides = insertSlides True blocks
|
||||
firstSlide = if (not (null title)) then [slideStart, (Header 1 title), (Header 3 [Str (joinWithSep ", " authors)]), (Header 4 [Str date]), slideEnd] else [] in
|
||||
let newBlocks = (layoutDiv title date) ++ presentationStart:firstSlide ++ slides ++ [presentationEnd] in
|
||||
Pandoc (Meta title authors date) newBlocks
|
||||
let slides = insertSlides True blocks
|
||||
firstSlide = if not (null title)
|
||||
then [slideStart, (Header 1 title),
|
||||
(Header 3 [Str (joinWithSep ", " authors)]),
|
||||
(Header 4 [Str date]), slideEnd]
|
||||
else []
|
||||
newBlocks = (layoutDiv title date) ++ presentationStart:firstSlide ++
|
||||
slides ++ [presentationEnd]
|
||||
in Pandoc (Meta title authors date) newBlocks
|
||||
|
|
Loading…
Add table
Reference in a new issue