000b89c718
+ Removed joinWithSep definition from Text.Pandoc.Shared. + Replaced joinWithSep with intercalate + Depend on base >= 3, since in base < 3 intercalate is not included. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1428 788f1e2b-df1e-0410-8736-df70ead52e1b
919 lines
35 KiB
Haskell
919 lines
35 KiB
Haskell
{-# LANGUAGE CPP #-}
|
|
{-
|
|
Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
-}
|
|
|
|
{- |
|
|
Module : Text.Pandoc.Shared
|
|
Copyright : Copyright (C) 2006-8 John MacFarlane
|
|
License : GNU GPL, version 2 or above
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
Stability : alpha
|
|
Portability : portable
|
|
|
|
Utility functions and definitions used by the various Pandoc modules.
|
|
-}
|
|
module Text.Pandoc.Shared (
|
|
-- * List processing
|
|
splitBy,
|
|
splitByIndices,
|
|
substitute,
|
|
-- * Text processing
|
|
backslashEscapes,
|
|
escapeStringUsing,
|
|
stripTrailingNewlines,
|
|
removeLeadingTrailingSpace,
|
|
removeLeadingSpace,
|
|
removeTrailingSpace,
|
|
stripFirstAndLast,
|
|
camelCaseToHyphenated,
|
|
toRomanNumeral,
|
|
wrapped,
|
|
wrapIfNeeded,
|
|
wrappedTeX,
|
|
wrapTeXIfNeeded,
|
|
BlockWrapper (..),
|
|
wrappedBlocksToDoc,
|
|
-- * Parsing
|
|
(>>~),
|
|
anyLine,
|
|
many1Till,
|
|
notFollowedBy',
|
|
oneOfStrings,
|
|
spaceChar,
|
|
skipSpaces,
|
|
blankline,
|
|
blanklines,
|
|
enclosed,
|
|
stringAnyCase,
|
|
parseFromString,
|
|
lineClump,
|
|
charsInBalanced,
|
|
charsInBalanced',
|
|
romanNumeral,
|
|
emailAddress,
|
|
uri,
|
|
withHorizDisplacement,
|
|
nullBlock,
|
|
failIfStrict,
|
|
escaped,
|
|
anyOrderedListMarker,
|
|
orderedListMarker,
|
|
charRef,
|
|
readWith,
|
|
testStringWith,
|
|
ParserState (..),
|
|
defaultParserState,
|
|
HeaderType (..),
|
|
ParserContext (..),
|
|
QuoteContext (..),
|
|
NoteTable,
|
|
KeyTable,
|
|
lookupKeySrc,
|
|
refsMatch,
|
|
-- * Native format prettyprinting
|
|
prettyPandoc,
|
|
-- * Pandoc block and inline list processing
|
|
orderedListMarkers,
|
|
normalizeSpaces,
|
|
compactify,
|
|
Element (..),
|
|
hierarchicalize,
|
|
isHeaderBlock,
|
|
-- * Writer options
|
|
HTMLMathMethod (..),
|
|
WriterOptions (..),
|
|
defaultWriterOptions,
|
|
-- * File handling
|
|
inDirectory
|
|
) where
|
|
|
|
import Text.Pandoc.Definition
|
|
import Text.ParserCombinators.Parsec
|
|
import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text )
|
|
import qualified Text.PrettyPrint.HughesPJ as PP
|
|
import Text.Pandoc.CharacterReferences ( characterReference )
|
|
import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
|
|
import Data.List ( find, isPrefixOf, intercalate )
|
|
import Control.Monad ( join )
|
|
import Network.URI ( parseURI, URI (..), isAllowedInURI )
|
|
import System.Directory
|
|
import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
|
|
import System.IO.UTF8
|
|
|
|
--
|
|
-- List processing
|
|
--
|
|
|
|
-- | Split list by groups of one or more sep.
|
|
splitBy :: (Eq a) => a -> [a] -> [[a]]
|
|
splitBy _ [] = []
|
|
splitBy sep lst =
|
|
let (first, rest) = break (== sep) lst
|
|
rest' = dropWhile (== sep) rest
|
|
in first:(splitBy sep rest')
|
|
|
|
-- | Split list into chunks divided at specified indices.
|
|
splitByIndices :: [Int] -> [a] -> [[a]]
|
|
splitByIndices [] lst = [lst]
|
|
splitByIndices (x:xs) lst =
|
|
let (first, rest) = splitAt x lst in
|
|
first:(splitByIndices (map (\y -> y - x) xs) rest)
|
|
|
|
-- | Replace each occurrence of one sublist in a list with another.
|
|
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
|
|
substitute _ _ [] = []
|
|
substitute [] _ lst = lst
|
|
substitute target replacement lst =
|
|
if target `isPrefixOf` lst
|
|
then replacement ++ (substitute target replacement $ drop (length target) lst)
|
|
else (head lst):(substitute target replacement $ tail lst)
|
|
|
|
--
|
|
-- Text processing
|
|
--
|
|
|
|
-- | Returns an association list of backslash escapes for the
|
|
-- designated characters.
|
|
backslashEscapes :: [Char] -- ^ list of special characters to escape
|
|
-> [(Char, String)]
|
|
backslashEscapes = map (\ch -> (ch, ['\\',ch]))
|
|
|
|
-- | Escape a string of characters, using an association list of
|
|
-- characters and strings.
|
|
escapeStringUsing :: [(Char, String)] -> String -> String
|
|
escapeStringUsing _ [] = ""
|
|
escapeStringUsing escapeTable (x:xs) =
|
|
case (lookup x escapeTable) of
|
|
Just str -> str ++ rest
|
|
Nothing -> x:rest
|
|
where rest = escapeStringUsing escapeTable xs
|
|
|
|
-- | Strip trailing newlines from string.
|
|
stripTrailingNewlines :: String -> String
|
|
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
|
|
|
|
-- | Remove leading and trailing space (including newlines) from string.
|
|
removeLeadingTrailingSpace :: String -> String
|
|
removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
|
|
|
|
-- | Remove leading space (including newlines) from string.
|
|
removeLeadingSpace :: String -> String
|
|
removeLeadingSpace = dropWhile (`elem` " \n\t")
|
|
|
|
-- | Remove trailing space (including newlines) from string.
|
|
removeTrailingSpace :: String -> String
|
|
removeTrailingSpace = reverse . removeLeadingSpace . reverse
|
|
|
|
-- | Strip leading and trailing characters from string
|
|
stripFirstAndLast :: String -> String
|
|
stripFirstAndLast str =
|
|
drop 1 $ take ((length str) - 1) str
|
|
|
|
-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
|
|
camelCaseToHyphenated :: String -> String
|
|
camelCaseToHyphenated [] = ""
|
|
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
|
|
a:'-':(toLower b):(camelCaseToHyphenated rest)
|
|
camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
|
|
|
|
-- | Convert number < 4000 to uppercase roman numeral.
|
|
toRomanNumeral :: Int -> String
|
|
toRomanNumeral x =
|
|
if x >= 4000 || x < 0
|
|
then "?"
|
|
else case x of
|
|
_ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
|
|
_ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900)
|
|
_ | x >= 500 -> "D" ++ toRomanNumeral (x - 500)
|
|
_ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400)
|
|
_ | x >= 100 -> "C" ++ toRomanNumeral (x - 100)
|
|
_ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90)
|
|
_ | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
|
|
_ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
|
|
_ | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
|
|
_ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5)
|
|
_ | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
|
|
_ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4)
|
|
_ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
|
|
_ -> ""
|
|
|
|
-- | Wrap inlines to line length.
|
|
wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc
|
|
wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>=
|
|
return . fsep
|
|
|
|
-- | Wrap inlines if the text wrap option is selected.
|
|
wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) ->
|
|
[Inline] -> m Doc
|
|
wrapIfNeeded opts = if writerWrapText opts
|
|
then wrapped
|
|
else ($)
|
|
|
|
-- auxiliary function for wrappedTeX
|
|
isNote :: Inline -> Bool
|
|
isNote (Note _) = True
|
|
isNote _ = False
|
|
|
|
-- | Wrap inlines to line length, treating footnotes in a way that
|
|
-- makes sense in LaTeX and ConTeXt.
|
|
wrappedTeX :: Monad m
|
|
=> Bool
|
|
-> ([Inline] -> m Doc)
|
|
-> [Inline]
|
|
-> m Doc
|
|
wrappedTeX includePercent listWriter sect = do
|
|
let (firstpart, rest) = break isNote sect
|
|
firstpartWrapped <- wrapped listWriter firstpart
|
|
if null rest
|
|
then return firstpartWrapped
|
|
else do let (note:rest') = rest
|
|
restWrapped <- if null rest'
|
|
then return empty
|
|
else wrappedTeX includePercent listWriter rest'
|
|
noteText <- listWriter [note]
|
|
return $ firstpartWrapped <>
|
|
(if includePercent then PP.char '%' else empty) $$
|
|
noteText $$ restWrapped
|
|
|
|
-- | Wrap inlines if the text wrap option is selected, specialized
|
|
-- for LaTeX and ConTeXt.
|
|
wrapTeXIfNeeded :: Monad m
|
|
=> WriterOptions
|
|
-> Bool
|
|
-> ([Inline] -> m Doc)
|
|
-> [Inline]
|
|
-> m Doc
|
|
wrapTeXIfNeeded opts includePercent = if writerWrapText opts
|
|
then wrappedTeX includePercent
|
|
else ($)
|
|
|
|
-- | Indicates whether block should be surrounded by blank lines (@Pad@) or not (@Reg@).
|
|
data BlockWrapper = Pad Doc | Reg Doc
|
|
|
|
-- | Converts a list of wrapped blocks to a Doc, with appropriate spaces around blocks.
|
|
wrappedBlocksToDoc :: [BlockWrapper] -> Doc
|
|
wrappedBlocksToDoc = foldr addBlock empty
|
|
where addBlock (Pad d) accum | isEmpty accum = d
|
|
addBlock (Pad d) accum = d $$ text "" $$ accum
|
|
addBlock (Reg d) accum = d $$ accum
|
|
|
|
--
|
|
-- Parsing
|
|
--
|
|
|
|
-- | Like >>, but returns the operation on the left.
|
|
-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
|
|
(>>~) :: (Monad m) => m a -> m b -> m a
|
|
a >>~ b = a >>= \x -> b >> return x
|
|
|
|
-- | Parse any line of text
|
|
anyLine :: GenParser Char st [Char]
|
|
anyLine = manyTill anyChar newline
|
|
|
|
-- | Like @manyTill@, but reads at least one item.
|
|
many1Till :: GenParser tok st a
|
|
-> GenParser tok st end
|
|
-> GenParser tok st [a]
|
|
many1Till p end = do
|
|
first <- p
|
|
rest <- manyTill p end
|
|
return (first:rest)
|
|
|
|
-- | A more general form of @notFollowedBy@. This one allows any
|
|
-- type of parser to be specified, and succeeds only if that parser fails.
|
|
-- It does not consume any input.
|
|
notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
|
|
notFollowedBy' p = try $ join $ do a <- try p
|
|
return (unexpected (show a))
|
|
<|>
|
|
return (return ())
|
|
-- (This version due to Andrew Pimlott on the Haskell mailing list.)
|
|
|
|
-- | Parses one of a list of strings (tried in order).
|
|
oneOfStrings :: [String] -> GenParser Char st String
|
|
oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
|
|
|
|
-- | Parses a space or tab.
|
|
spaceChar :: CharParser st Char
|
|
spaceChar = char ' ' <|> char '\t'
|
|
|
|
-- | Skips zero or more spaces or tabs.
|
|
skipSpaces :: GenParser Char st ()
|
|
skipSpaces = skipMany spaceChar
|
|
|
|
-- | Skips zero or more spaces or tabs, then reads a newline.
|
|
blankline :: GenParser Char st Char
|
|
blankline = try $ skipSpaces >> newline
|
|
|
|
-- | Parses one or more blank lines and returns a string of newlines.
|
|
blanklines :: GenParser Char st [Char]
|
|
blanklines = many1 blankline
|
|
|
|
-- | Parses material enclosed between start and end parsers.
|
|
enclosed :: GenParser Char st t -- ^ start parser
|
|
-> GenParser Char st end -- ^ end parser
|
|
-> GenParser Char st a -- ^ content parser (to be used repeatedly)
|
|
-> GenParser Char st [a]
|
|
enclosed start end parser = try $
|
|
start >> notFollowedBy space >> many1Till parser end
|
|
|
|
-- | Parse string, case insensitive.
|
|
stringAnyCase :: [Char] -> CharParser st String
|
|
stringAnyCase [] = string ""
|
|
stringAnyCase (x:xs) = do
|
|
firstChar <- char (toUpper x) <|> char (toLower x)
|
|
rest <- stringAnyCase xs
|
|
return (firstChar:rest)
|
|
|
|
-- | Parse contents of 'str' using 'parser' and return result.
|
|
parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
|
|
parseFromString parser str = do
|
|
oldPos <- getPosition
|
|
oldInput <- getInput
|
|
setInput str
|
|
result <- parser
|
|
setInput oldInput
|
|
setPosition oldPos
|
|
return result
|
|
|
|
-- | Parse raw line block up to and including blank lines.
|
|
lineClump :: GenParser Char st String
|
|
lineClump = blanklines
|
|
<|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
|
|
|
|
-- | Parse a string of characters between an open character
|
|
-- and a close character, including text between balanced
|
|
-- pairs of open and close, which must be different. For example,
|
|
-- @charsInBalanced '(' ')'@ will parse "(hello (there))"
|
|
-- and return "hello (there)". Stop if a blank line is
|
|
-- encountered.
|
|
charsInBalanced :: Char -> Char -> GenParser Char st String
|
|
charsInBalanced open close = try $ do
|
|
char open
|
|
raw <- many $ (many1 (noneOf [open, close, '\n']))
|
|
<|> (do res <- charsInBalanced open close
|
|
return $ [open] ++ res ++ [close])
|
|
<|> try (string "\n" >>~ notFollowedBy' blanklines)
|
|
char close
|
|
return $ concat raw
|
|
|
|
-- | Like @charsInBalanced@, but allow blank lines in the content.
|
|
charsInBalanced' :: Char -> Char -> GenParser Char st String
|
|
charsInBalanced' open close = try $ do
|
|
char open
|
|
raw <- many $ (many1 (noneOf [open, close]))
|
|
<|> (do res <- charsInBalanced' open close
|
|
return $ [open] ++ res ++ [close])
|
|
char close
|
|
return $ concat raw
|
|
|
|
-- Auxiliary functions for romanNumeral:
|
|
|
|
lowercaseRomanDigits :: [Char]
|
|
lowercaseRomanDigits = ['i','v','x','l','c','d','m']
|
|
|
|
uppercaseRomanDigits :: [Char]
|
|
uppercaseRomanDigits = map toUpper lowercaseRomanDigits
|
|
|
|
-- | Parses a roman numeral (uppercase or lowercase), returns number.
|
|
romanNumeral :: Bool -- ^ Uppercase if true
|
|
-> GenParser Char st Int
|
|
romanNumeral upperCase = do
|
|
let romanDigits = if upperCase
|
|
then uppercaseRomanDigits
|
|
else lowercaseRomanDigits
|
|
lookAhead $ oneOf romanDigits
|
|
let [one, five, ten, fifty, hundred, fivehundred, thousand] =
|
|
map char romanDigits
|
|
thousands <- many thousand >>= (return . (1000 *) . length)
|
|
ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
|
|
fivehundreds <- many fivehundred >>= (return . (500 *) . length)
|
|
fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
|
|
hundreds <- many hundred >>= (return . (100 *) . length)
|
|
nineties <- option 0 $ try $ ten >> hundred >> return 90
|
|
fifties <- many fifty >>= (return . (50 *) . length)
|
|
forties <- option 0 $ try $ ten >> fifty >> return 40
|
|
tens <- many ten >>= (return . (10 *) . length)
|
|
nines <- option 0 $ try $ one >> ten >> return 9
|
|
fives <- many five >>= (return . (5 *) . length)
|
|
fours <- option 0 $ try $ one >> five >> return 4
|
|
ones <- many one >>= (return . length)
|
|
let total = thousands + ninehundreds + fivehundreds + fourhundreds +
|
|
hundreds + nineties + fifties + forties + tens + nines +
|
|
fives + fours + ones
|
|
if total == 0
|
|
then fail "not a roman numeral"
|
|
else return total
|
|
|
|
-- Parsers for email addresses and URIs
|
|
|
|
emailChar :: GenParser Char st Char
|
|
emailChar = alphaNum <|> oneOf "-+_."
|
|
|
|
domainChar :: GenParser Char st Char
|
|
domainChar = alphaNum <|> char '-'
|
|
|
|
domain :: GenParser Char st [Char]
|
|
domain = do
|
|
first <- many1 domainChar
|
|
dom <- many1 $ try (char '.' >> many1 domainChar )
|
|
return $ intercalate "." (first:dom)
|
|
|
|
-- | Parses an email address; returns string.
|
|
emailAddress :: GenParser Char st [Char]
|
|
emailAddress = try $ do
|
|
firstLetter <- alphaNum
|
|
restAddr <- many emailChar
|
|
let addr = firstLetter:restAddr
|
|
char '@'
|
|
dom <- domain
|
|
return $ addr ++ '@':dom
|
|
|
|
-- | Parses a URI.
|
|
uri :: GenParser Char st String
|
|
uri = try $ do
|
|
str <- many1 $ satisfy isAllowedInURI
|
|
case parseURI str of
|
|
Just uri' -> if uriScheme uri' `elem` [ "http:", "https:", "ftp:",
|
|
"file:", "mailto:",
|
|
"news:", "telnet:" ]
|
|
then return $ show uri'
|
|
else fail "not a URI"
|
|
Nothing -> fail "not a URI"
|
|
|
|
-- | Applies a parser, returns tuple of its results and its horizontal
|
|
-- displacement (the difference between the source column at the end
|
|
-- and the source column at the beginning). Vertical displacement
|
|
-- (source row) is ignored.
|
|
withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
|
|
-> GenParser Char st (a, Int) -- ^ (result, displacement)
|
|
withHorizDisplacement parser = do
|
|
pos1 <- getPosition
|
|
result <- parser
|
|
pos2 <- getPosition
|
|
return (result, sourceColumn pos2 - sourceColumn pos1)
|
|
|
|
-- | Parses a character and returns 'Null' (so that the parser can move on
|
|
-- if it gets stuck).
|
|
nullBlock :: GenParser Char st Block
|
|
nullBlock = anyChar >> return Null
|
|
|
|
-- | Fail if reader is in strict markdown syntax mode.
|
|
failIfStrict :: GenParser Char ParserState ()
|
|
failIfStrict = do
|
|
state <- getState
|
|
if stateStrict state then fail "strict mode" else return ()
|
|
|
|
-- | Parses backslash, then applies character parser.
|
|
escaped :: GenParser Char st Char -- ^ Parser for character to escape
|
|
-> GenParser Char st Inline
|
|
escaped parser = try $ do
|
|
char '\\'
|
|
result <- parser
|
|
return (Str [result])
|
|
|
|
-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
|
|
upperRoman :: GenParser Char st (ListNumberStyle, Int)
|
|
upperRoman = do
|
|
num <- romanNumeral True
|
|
return (UpperRoman, num)
|
|
|
|
-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
|
|
lowerRoman :: GenParser Char st (ListNumberStyle, Int)
|
|
lowerRoman = do
|
|
num <- romanNumeral False
|
|
return (LowerRoman, num)
|
|
|
|
-- | Parses a decimal numeral and returns (Decimal, number).
|
|
decimal :: GenParser Char st (ListNumberStyle, Int)
|
|
decimal = do
|
|
num <- many1 digit
|
|
return (Decimal, read num)
|
|
|
|
-- | Parses a '#' returns (DefaultStyle, 1).
|
|
defaultNum :: GenParser Char st (ListNumberStyle, Int)
|
|
defaultNum = do
|
|
char '#'
|
|
return (DefaultStyle, 1)
|
|
|
|
-- | Parses a lowercase letter and returns (LowerAlpha, number).
|
|
lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
|
|
lowerAlpha = do
|
|
ch <- oneOf ['a'..'z']
|
|
return (LowerAlpha, ord ch - ord 'a' + 1)
|
|
|
|
-- | Parses an uppercase letter and returns (UpperAlpha, number).
|
|
upperAlpha :: GenParser Char st (ListNumberStyle, Int)
|
|
upperAlpha = do
|
|
ch <- oneOf ['A'..'Z']
|
|
return (UpperAlpha, ord ch - ord 'A' + 1)
|
|
|
|
-- | Parses a roman numeral i or I
|
|
romanOne :: GenParser Char st (ListNumberStyle, Int)
|
|
romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
|
|
(char 'I' >> return (UpperRoman, 1))
|
|
|
|
-- | Parses an ordered list marker and returns list attributes.
|
|
anyOrderedListMarker :: GenParser Char st ListAttributes
|
|
anyOrderedListMarker = choice $
|
|
[delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
|
|
numParser <- [decimal, defaultNum, romanOne,
|
|
lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
|
|
|
|
-- | Parses a list number (num) followed by a period, returns list attributes.
|
|
inPeriod :: GenParser Char st (ListNumberStyle, Int)
|
|
-> GenParser Char st ListAttributes
|
|
inPeriod num = try $ do
|
|
(style, start) <- num
|
|
char '.'
|
|
let delim = if style == DefaultStyle
|
|
then DefaultDelim
|
|
else Period
|
|
return (start, style, delim)
|
|
|
|
-- | Parses a list number (num) followed by a paren, returns list attributes.
|
|
inOneParen :: GenParser Char st (ListNumberStyle, Int)
|
|
-> GenParser Char st ListAttributes
|
|
inOneParen num = try $ do
|
|
(style, start) <- num
|
|
char ')'
|
|
return (start, style, OneParen)
|
|
|
|
-- | Parses a list number (num) enclosed in parens, returns list attributes.
|
|
inTwoParens :: GenParser Char st (ListNumberStyle, Int)
|
|
-> GenParser Char st ListAttributes
|
|
inTwoParens num = try $ do
|
|
char '('
|
|
(style, start) <- num
|
|
char ')'
|
|
return (start, style, TwoParens)
|
|
|
|
-- | Parses an ordered list marker with a given style and delimiter,
|
|
-- returns number.
|
|
orderedListMarker :: ListNumberStyle
|
|
-> ListNumberDelim
|
|
-> GenParser Char st Int
|
|
orderedListMarker style delim = do
|
|
let num = case style of
|
|
DefaultStyle -> decimal <|> defaultNum
|
|
Decimal -> decimal
|
|
UpperRoman -> upperRoman
|
|
LowerRoman -> lowerRoman
|
|
UpperAlpha -> upperAlpha
|
|
LowerAlpha -> lowerAlpha
|
|
let context = case delim of
|
|
DefaultDelim -> inPeriod
|
|
Period -> inPeriod
|
|
OneParen -> inOneParen
|
|
TwoParens -> inTwoParens
|
|
(start, _, _) <- context num
|
|
return start
|
|
|
|
-- | Parses a character reference and returns a Str element.
|
|
charRef :: GenParser Char st Inline
|
|
charRef = do
|
|
c <- characterReference
|
|
return $ Str [c]
|
|
|
|
-- | Parse a string with a given parser and state.
|
|
readWith :: GenParser Char ParserState a -- ^ parser
|
|
-> ParserState -- ^ initial state
|
|
-> String -- ^ input string
|
|
-> a
|
|
readWith parser state input =
|
|
case runParser parser state "source" input of
|
|
Left err -> error $ "\nError:\n" ++ show err
|
|
Right result -> result
|
|
|
|
-- | Parse a string with @parser@ (for testing).
|
|
testStringWith :: (Show a) => GenParser Char ParserState a
|
|
-> String
|
|
-> IO ()
|
|
testStringWith parser str = putStrLn $ show $
|
|
readWith parser defaultParserState str
|
|
|
|
-- | Parsing options.
|
|
data ParserState = ParserState
|
|
{ stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
|
|
stateParserContext :: ParserContext, -- ^ Inside list?
|
|
stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
|
|
stateSanitizeHTML :: Bool, -- ^ Sanitize HTML?
|
|
stateKeys :: KeyTable, -- ^ List of reference keys
|
|
#ifdef _CITEPROC
|
|
stateCitations :: [String], -- ^ List of available citations
|
|
#endif
|
|
stateNotes :: NoteTable, -- ^ List of notes
|
|
stateTabStop :: Int, -- ^ Tab stop
|
|
stateStandalone :: Bool, -- ^ Parse bibliographic info?
|
|
stateTitle :: [Inline], -- ^ Title of document
|
|
stateAuthors :: [String], -- ^ Authors of document
|
|
stateDate :: String, -- ^ Date of document
|
|
stateStrict :: Bool, -- ^ Use strict markdown syntax?
|
|
stateSmart :: Bool, -- ^ Use smart typography?
|
|
stateColumns :: Int, -- ^ Number of columns in terminal
|
|
stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used
|
|
}
|
|
deriving Show
|
|
|
|
defaultParserState :: ParserState
|
|
defaultParserState =
|
|
ParserState { stateParseRaw = False,
|
|
stateParserContext = NullState,
|
|
stateQuoteContext = NoQuote,
|
|
stateSanitizeHTML = False,
|
|
stateKeys = [],
|
|
#ifdef _CITEPROC
|
|
stateCitations = [],
|
|
#endif
|
|
stateNotes = [],
|
|
stateTabStop = 4,
|
|
stateStandalone = False,
|
|
stateTitle = [],
|
|
stateAuthors = [],
|
|
stateDate = [],
|
|
stateStrict = False,
|
|
stateSmart = False,
|
|
stateColumns = 80,
|
|
stateHeaderTable = [] }
|
|
|
|
data HeaderType
|
|
= SingleHeader Char -- ^ Single line of characters underneath
|
|
| DoubleHeader Char -- ^ Lines of characters above and below
|
|
deriving (Eq, Show)
|
|
|
|
data ParserContext
|
|
= ListItemState -- ^ Used when running parser on list item contents
|
|
| NullState -- ^ Default state
|
|
deriving (Eq, Show)
|
|
|
|
data QuoteContext
|
|
= InSingleQuote -- ^ Used when parsing inside single quotes
|
|
| InDoubleQuote -- ^ Used when parsing inside double quotes
|
|
| NoQuote -- ^ Used when not parsing inside quotes
|
|
deriving (Eq, Show)
|
|
|
|
type NoteTable = [(String, [Block])]
|
|
|
|
type KeyTable = [([Inline], Target)]
|
|
|
|
-- | Look up key in key table and return target object.
|
|
lookupKeySrc :: KeyTable -- ^ Key table
|
|
-> [Inline] -- ^ Key
|
|
-> Maybe Target
|
|
lookupKeySrc table key = case find (refsMatch key . fst) table of
|
|
Nothing -> Nothing
|
|
Just (_, src) -> Just src
|
|
|
|
-- | Returns @True@ if keys match (case insensitive).
|
|
refsMatch :: [Inline] -> [Inline] -> Bool
|
|
refsMatch ((Str x):restx) ((Str y):resty) =
|
|
((map toLower x) == (map toLower y)) && refsMatch restx resty
|
|
refsMatch ((Emph x):restx) ((Emph y):resty) =
|
|
refsMatch x y && refsMatch restx resty
|
|
refsMatch ((Strong x):restx) ((Strong y):resty) =
|
|
refsMatch x y && refsMatch restx resty
|
|
refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
|
|
refsMatch x y && refsMatch restx resty
|
|
refsMatch ((Superscript x):restx) ((Superscript y):resty) =
|
|
refsMatch x y && refsMatch restx resty
|
|
refsMatch ((Subscript x):restx) ((Subscript y):resty) =
|
|
refsMatch x y && refsMatch restx resty
|
|
refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) =
|
|
refsMatch x y && refsMatch restx resty
|
|
refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
|
|
t == u && refsMatch x y && refsMatch restx resty
|
|
refsMatch ((Code x):restx) ((Code y):resty) =
|
|
((map toLower x) == (map toLower y)) && refsMatch restx resty
|
|
refsMatch ((Math t x):restx) ((Math u y):resty) =
|
|
((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty
|
|
refsMatch ((TeX x):restx) ((TeX y):resty) =
|
|
((map toLower x) == (map toLower y)) && refsMatch restx resty
|
|
refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
|
|
((map toLower x) == (map toLower y)) && refsMatch restx resty
|
|
refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
|
|
refsMatch [] x = null x
|
|
refsMatch x [] = null x
|
|
|
|
--
|
|
-- Native format prettyprinting
|
|
--
|
|
|
|
-- | Indent string as a block.
|
|
indentBy :: Int -- ^ Number of spaces to indent the block
|
|
-> Int -- ^ Number of spaces (rel to block) to indent first line
|
|
-> String -- ^ Contents of block to indent
|
|
-> String
|
|
indentBy _ _ [] = ""
|
|
indentBy num first str =
|
|
let (firstLine:restLines) = lines str
|
|
firstLineIndent = num + first
|
|
in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++
|
|
(intercalate "\n" $ map ((replicate num ' ') ++ ) restLines)
|
|
|
|
-- | Prettyprint list of Pandoc blocks elements.
|
|
prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
|
|
-> [Block] -- ^ List of blocks
|
|
-> String
|
|
prettyBlockList indent [] = indentBy indent 0 "[]"
|
|
prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
|
|
(intercalate "\n, " (map prettyBlock blocks)) ++ " ]"
|
|
|
|
-- | Prettyprint Pandoc block element.
|
|
prettyBlock :: Block -> String
|
|
prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
|
|
(prettyBlockList 2 blocks)
|
|
prettyBlock (OrderedList attribs blockLists) =
|
|
"OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
|
|
(intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks)
|
|
blockLists)) ++ " ]"
|
|
prettyBlock (BulletList blockLists) = "BulletList\n" ++
|
|
indentBy 2 0 ("[ " ++ (intercalate ", "
|
|
(map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
|
|
prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++
|
|
indentBy 2 0 ("[" ++ (intercalate ",\n"
|
|
(map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++
|
|
indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]"
|
|
prettyBlock (Table caption aligns widths header rows) =
|
|
"Table " ++ show caption ++ " " ++ show aligns ++ " " ++
|
|
show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
|
|
(intercalate ",\n" (map prettyRow rows)) ++ " ]"
|
|
where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", "
|
|
(map (\blocks -> prettyBlockList 2 blocks)
|
|
cols))) ++ " ]"
|
|
prettyBlock block = show block
|
|
|
|
-- | Prettyprint Pandoc document.
|
|
prettyPandoc :: Pandoc -> String
|
|
prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++
|
|
")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
|
|
|
|
--
|
|
-- Pandoc block and inline list processing
|
|
--
|
|
|
|
-- | Generate infinite lazy list of markers for an ordered list,
|
|
-- depending on list attributes.
|
|
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
|
|
orderedListMarkers (start, numstyle, numdelim) =
|
|
let singleton c = [c]
|
|
nums = case numstyle of
|
|
DefaultStyle -> map show [start..]
|
|
Decimal -> map show [start..]
|
|
UpperAlpha -> drop (start - 1) $ cycle $
|
|
map singleton ['A'..'Z']
|
|
LowerAlpha -> drop (start - 1) $ cycle $
|
|
map singleton ['a'..'z']
|
|
UpperRoman -> map toRomanNumeral [start..]
|
|
LowerRoman -> map (map toLower . toRomanNumeral) [start..]
|
|
inDelim str = case numdelim of
|
|
DefaultDelim -> str ++ "."
|
|
Period -> str ++ "."
|
|
OneParen -> str ++ ")"
|
|
TwoParens -> "(" ++ str ++ ")"
|
|
in map inDelim nums
|
|
|
|
-- | Normalize a list of inline elements: remove leading and trailing
|
|
-- @Space@ elements, collapse double @Space@s into singles, and
|
|
-- remove empty Str elements.
|
|
normalizeSpaces :: [Inline] -> [Inline]
|
|
normalizeSpaces [] = []
|
|
normalizeSpaces list =
|
|
let removeDoubles [] = []
|
|
removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
|
|
removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest)
|
|
removeDoubles ((Str ""):rest) = removeDoubles rest
|
|
removeDoubles (x:rest) = x:(removeDoubles rest)
|
|
removeLeading (Space:xs) = removeLeading xs
|
|
removeLeading x = x
|
|
removeTrailing [] = []
|
|
removeTrailing lst = if (last lst == Space)
|
|
then init lst
|
|
else lst
|
|
in removeLeading $ removeTrailing $ removeDoubles list
|
|
|
|
-- | Change final list item from @Para@ to @Plain@ if the list should
|
|
-- be compact.
|
|
compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
|
|
-> [[Block]]
|
|
compactify [] = []
|
|
compactify items =
|
|
let final = last items
|
|
others = init items
|
|
in case final of
|
|
[Para a] -> if any containsPara others
|
|
then items
|
|
else others ++ [[Plain a]]
|
|
_ -> items
|
|
|
|
containsPara :: [Block] -> Bool
|
|
containsPara [] = False
|
|
containsPara ((Para _):_) = True
|
|
containsPara ((BulletList items):rest) = any containsPara items ||
|
|
containsPara rest
|
|
containsPara ((OrderedList _ items):rest) = any containsPara items ||
|
|
containsPara rest
|
|
containsPara ((DefinitionList items):rest) = any containsPara (map snd items) ||
|
|
containsPara rest
|
|
containsPara (_:rest) = containsPara rest
|
|
|
|
-- | Data structure for defining hierarchical Pandoc documents
|
|
data Element = Blk Block
|
|
| Sec [Inline] [Element] deriving (Eq, Read, Show)
|
|
|
|
-- | Returns @True@ on Header block with at least the specified level
|
|
headerAtLeast :: Int -> Block -> Bool
|
|
headerAtLeast level (Header x _) = x <= level
|
|
headerAtLeast _ _ = False
|
|
|
|
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
|
|
hierarchicalize :: [Block] -> [Element]
|
|
hierarchicalize [] = []
|
|
hierarchicalize (block:rest) =
|
|
case block of
|
|
(Header level title) ->
|
|
let (thisSection, rest') = break (headerAtLeast level) rest
|
|
in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest')
|
|
x -> (Blk x):(hierarchicalize rest)
|
|
|
|
-- | True if block is a Header block.
|
|
isHeaderBlock :: Block -> Bool
|
|
isHeaderBlock (Header _ _) = True
|
|
isHeaderBlock _ = False
|
|
|
|
--
|
|
-- Writer options
|
|
--
|
|
|
|
data HTMLMathMethod = PlainMath
|
|
| LaTeXMathML (Maybe String) -- url of LaTeXMathML.js
|
|
| GladTeX
|
|
| MimeTeX String -- url of mimetex.cgi
|
|
deriving (Show, Read, Eq)
|
|
|
|
-- | Options for writers
|
|
data WriterOptions = WriterOptions
|
|
{ writerStandalone :: Bool -- ^ Include header and footer
|
|
, writerHeader :: String -- ^ Header for the document
|
|
, writerTitlePrefix :: String -- ^ Prefix for HTML titles
|
|
, writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
|
|
, writerTableOfContents :: Bool -- ^ Include table of contents
|
|
, writerS5 :: Bool -- ^ We're writing S5
|
|
, writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
|
|
, writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
|
|
, writerIncremental :: Bool -- ^ Incremental S5 lists
|
|
, writerNumberSections :: Bool -- ^ Number sections in LaTeX
|
|
, writerIncludeBefore :: String -- ^ String to include before the body
|
|
, writerIncludeAfter :: String -- ^ String to include after the body
|
|
, writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
|
|
, writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
|
|
, writerWrapText :: Bool -- ^ Wrap text to line length
|
|
} deriving Show
|
|
|
|
-- | Default writer options.
|
|
defaultWriterOptions :: WriterOptions
|
|
defaultWriterOptions =
|
|
WriterOptions { writerStandalone = False
|
|
, writerHeader = ""
|
|
, writerTitlePrefix = ""
|
|
, writerTabStop = 4
|
|
, writerTableOfContents = False
|
|
, writerS5 = False
|
|
, writerHTMLMathMethod = PlainMath
|
|
, writerIgnoreNotes = False
|
|
, writerIncremental = False
|
|
, writerNumberSections = False
|
|
, writerIncludeBefore = ""
|
|
, writerIncludeAfter = ""
|
|
, writerStrictMarkdown = False
|
|
, writerReferenceLinks = False
|
|
, writerWrapText = True
|
|
}
|
|
|
|
--
|
|
-- File handling
|
|
--
|
|
|
|
-- | Perform an IO action in a directory, returning to starting directory.
|
|
inDirectory :: FilePath -> IO a -> IO a
|
|
inDirectory path action = do
|
|
oldDir <- getCurrentDirectory
|
|
setCurrentDirectory path
|
|
result <- action
|
|
setCurrentDirectory oldDir
|
|
return result
|