2009-12-31 16:48:14 +00:00
|
|
|
{-# LANGUAGE DeriveDataTypeable #-}
|
2007-11-03 23:27:58 +00:00
|
|
|
{-
|
2010-03-23 13:31:09 -07:00
|
|
|
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.Shared
|
2010-03-23 13:31:09 -07:00
|
|
|
Copyright : Copyright (C) 2006-2010 John MacFarlane
|
2007-11-03 23:27:58 +00:00
|
|
|
License : GNU GPL, version 2 or above
|
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
|
|
Stability : alpha
|
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
Utility functions and definitions used by the various Pandoc modules.
|
|
|
|
-}
|
2010-03-23 15:05:33 -07:00
|
|
|
module Text.Pandoc.Shared (
|
2007-11-03 23:27:58 +00:00
|
|
|
-- * List processing
|
|
|
|
splitBy,
|
|
|
|
splitByIndices,
|
|
|
|
substitute,
|
|
|
|
-- * Text processing
|
|
|
|
backslashEscapes,
|
|
|
|
escapeStringUsing,
|
|
|
|
stripTrailingNewlines,
|
|
|
|
removeLeadingTrailingSpace,
|
|
|
|
removeLeadingSpace,
|
|
|
|
removeTrailingSpace,
|
|
|
|
stripFirstAndLast,
|
|
|
|
camelCaseToHyphenated,
|
|
|
|
toRomanNumeral,
|
2010-03-23 15:05:33 -07:00
|
|
|
escapeURI,
|
2010-03-23 15:34:53 -07:00
|
|
|
unescapeURI,
|
2007-11-03 23:27:58 +00:00
|
|
|
wrapped,
|
|
|
|
wrapIfNeeded,
|
2007-11-15 03:09:31 +00:00
|
|
|
wrappedTeX,
|
|
|
|
wrapTeXIfNeeded,
|
2008-02-24 05:48:31 +00:00
|
|
|
BlockWrapper (..),
|
|
|
|
wrappedBlocksToDoc,
|
2009-01-31 17:13:41 +00:00
|
|
|
tabFilter,
|
2008-12-17 15:34:25 +00:00
|
|
|
-- * Prettyprinting
|
|
|
|
hang',
|
2007-11-03 23:27:58 +00:00
|
|
|
-- * Pandoc block and inline list processing
|
|
|
|
orderedListMarkers,
|
|
|
|
normalizeSpaces,
|
|
|
|
compactify,
|
|
|
|
Element (..),
|
|
|
|
hierarchicalize,
|
2010-03-16 06:45:52 +00:00
|
|
|
uniqueIdent,
|
2007-11-03 23:27:58 +00:00
|
|
|
isHeaderBlock,
|
|
|
|
-- * Writer options
|
2007-12-01 03:11:44 +00:00
|
|
|
HTMLMathMethod (..),
|
2009-01-24 19:58:48 +00:00
|
|
|
ObfuscationMethod (..),
|
2007-11-03 23:27:58 +00:00
|
|
|
WriterOptions (..),
|
2008-07-31 23:16:02 +00:00
|
|
|
defaultWriterOptions,
|
|
|
|
-- * File handling
|
2009-12-31 01:11:23 +00:00
|
|
|
inDirectory,
|
|
|
|
readDataFile
|
2007-11-03 23:27:58 +00:00
|
|
|
) where
|
|
|
|
|
|
|
|
import Text.Pandoc.Definition
|
2010-07-04 13:43:45 -07:00
|
|
|
import qualified Text.Pandoc.UTF8 as UTF8 (readFile)
|
2008-12-17 15:34:25 +00:00
|
|
|
import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest )
|
2007-11-15 03:09:31 +00:00
|
|
|
import qualified Text.PrettyPrint.HughesPJ as PP
|
2010-07-04 13:43:45 -07:00
|
|
|
import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii,
|
2010-03-28 22:29:31 -07:00
|
|
|
isLetter, isDigit )
|
2008-09-08 06:36:28 +00:00
|
|
|
import Data.List ( find, isPrefixOf, intercalate )
|
2010-07-04 13:43:45 -07:00
|
|
|
import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString )
|
2010-03-23 15:34:53 -07:00
|
|
|
import Codec.Binary.UTF8.String ( encodeString, decodeString )
|
2008-07-31 23:16:02 +00:00
|
|
|
import System.Directory
|
2009-12-31 16:48:36 +00:00
|
|
|
import System.FilePath ( (</>) )
|
2010-05-07 11:28:38 -07:00
|
|
|
import Data.Generics (Typeable, Data)
|
2009-04-25 00:29:58 +00:00
|
|
|
import qualified Control.Monad.State as S
|
2009-12-31 01:11:23 +00:00
|
|
|
import Paths_pandoc (getDataFileName)
|
2010-03-22 19:29:37 -07:00
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- List processing
|
|
|
|
--
|
|
|
|
|
|
|
|
-- | Split list by groups of one or more sep.
|
|
|
|
splitBy :: (Eq a) => a -> [a] -> [[a]]
|
|
|
|
splitBy _ [] = []
|
|
|
|
splitBy sep lst =
|
|
|
|
let (first, rest) = break (== sep) lst
|
|
|
|
rest' = dropWhile (== sep) rest
|
|
|
|
in first:(splitBy sep rest')
|
|
|
|
|
|
|
|
-- | Split list into chunks divided at specified indices.
|
|
|
|
splitByIndices :: [Int] -> [a] -> [[a]]
|
|
|
|
splitByIndices [] lst = [lst]
|
|
|
|
splitByIndices (x:xs) lst =
|
|
|
|
let (first, rest) = splitAt x lst in
|
|
|
|
first:(splitByIndices (map (\y -> y - x) xs) rest)
|
|
|
|
|
|
|
|
-- | Replace each occurrence of one sublist in a list with another.
|
|
|
|
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
|
|
|
|
substitute _ _ [] = []
|
|
|
|
substitute [] _ lst = lst
|
|
|
|
substitute target replacement lst =
|
|
|
|
if target `isPrefixOf` lst
|
|
|
|
then replacement ++ (substitute target replacement $ drop (length target) lst)
|
|
|
|
else (head lst):(substitute target replacement $ tail lst)
|
|
|
|
|
|
|
|
--
|
|
|
|
-- Text processing
|
|
|
|
--
|
|
|
|
|
|
|
|
-- | Returns an association list of backslash escapes for the
|
|
|
|
-- designated characters.
|
|
|
|
backslashEscapes :: [Char] -- ^ list of special characters to escape
|
|
|
|
-> [(Char, String)]
|
|
|
|
backslashEscapes = map (\ch -> (ch, ['\\',ch]))
|
|
|
|
|
|
|
|
-- | Escape a string of characters, using an association list of
|
|
|
|
-- characters and strings.
|
|
|
|
escapeStringUsing :: [(Char, String)] -> String -> String
|
|
|
|
escapeStringUsing _ [] = ""
|
|
|
|
escapeStringUsing escapeTable (x:xs) =
|
|
|
|
case (lookup x escapeTable) of
|
|
|
|
Just str -> str ++ rest
|
|
|
|
Nothing -> x:rest
|
|
|
|
where rest = escapeStringUsing escapeTable xs
|
|
|
|
|
|
|
|
-- | Strip trailing newlines from string.
|
|
|
|
stripTrailingNewlines :: String -> String
|
|
|
|
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
|
|
|
|
|
|
|
|
-- | Remove leading and trailing space (including newlines) from string.
|
|
|
|
removeLeadingTrailingSpace :: String -> String
|
|
|
|
removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
|
|
|
|
|
|
|
|
-- | Remove leading space (including newlines) from string.
|
|
|
|
removeLeadingSpace :: String -> String
|
|
|
|
removeLeadingSpace = dropWhile (`elem` " \n\t")
|
|
|
|
|
|
|
|
-- | Remove trailing space (including newlines) from string.
|
|
|
|
removeTrailingSpace :: String -> String
|
|
|
|
removeTrailingSpace = reverse . removeLeadingSpace . reverse
|
|
|
|
|
|
|
|
-- | Strip leading and trailing characters from string
|
|
|
|
stripFirstAndLast :: String -> String
|
|
|
|
stripFirstAndLast str =
|
|
|
|
drop 1 $ take ((length str) - 1) str
|
|
|
|
|
|
|
|
-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
|
|
|
|
camelCaseToHyphenated :: String -> String
|
|
|
|
camelCaseToHyphenated [] = ""
|
|
|
|
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
|
|
|
|
a:'-':(toLower b):(camelCaseToHyphenated rest)
|
|
|
|
camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
|
|
|
|
|
|
|
|
-- | Convert number < 4000 to uppercase roman numeral.
|
|
|
|
toRomanNumeral :: Int -> String
|
|
|
|
toRomanNumeral x =
|
|
|
|
if x >= 4000 || x < 0
|
|
|
|
then "?"
|
|
|
|
else case x of
|
|
|
|
_ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
|
|
|
|
_ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900)
|
|
|
|
_ | x >= 500 -> "D" ++ toRomanNumeral (x - 500)
|
|
|
|
_ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400)
|
|
|
|
_ | x >= 100 -> "C" ++ toRomanNumeral (x - 100)
|
|
|
|
_ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90)
|
|
|
|
_ | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
|
|
|
|
_ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
|
|
|
|
_ | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
|
|
|
|
_ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5)
|
|
|
|
_ | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
|
|
|
|
_ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4)
|
|
|
|
_ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
|
|
|
|
_ -> ""
|
|
|
|
|
2010-03-23 13:51:52 -07:00
|
|
|
-- | Escape unicode characters in a URI. Characters that are
|
|
|
|
-- already valid in a URI, including % and ?, are left alone.
|
2010-03-23 15:05:33 -07:00
|
|
|
escapeURI :: String -> String
|
|
|
|
escapeURI = escapeURIString isAllowedInURI . encodeString
|
2010-03-22 19:29:37 -07:00
|
|
|
|
2010-03-23 15:34:53 -07:00
|
|
|
-- | Unescape unicode and some special characters in a URI, but
|
|
|
|
-- without introducing spaces.
|
|
|
|
unescapeURI :: String -> String
|
|
|
|
unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
|
|
|
|
decodeString . unEscapeString
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
-- | Wrap inlines to line length.
|
|
|
|
wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc
|
|
|
|
wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>=
|
|
|
|
return . fsep
|
|
|
|
|
2007-11-15 03:09:31 +00:00
|
|
|
-- | Wrap inlines if the text wrap option is selected.
|
2007-11-03 23:27:58 +00:00
|
|
|
wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) ->
|
|
|
|
[Inline] -> m Doc
|
|
|
|
wrapIfNeeded opts = if writerWrapText opts
|
|
|
|
then wrapped
|
|
|
|
else ($)
|
|
|
|
|
2007-11-15 03:09:31 +00:00
|
|
|
-- auxiliary function for wrappedTeX
|
2008-06-17 19:07:11 +00:00
|
|
|
isNote :: Inline -> Bool
|
2007-11-15 03:09:31 +00:00
|
|
|
isNote (Note _) = True
|
|
|
|
isNote _ = False
|
|
|
|
|
|
|
|
-- | Wrap inlines to line length, treating footnotes in a way that
|
|
|
|
-- makes sense in LaTeX and ConTeXt.
|
2007-11-17 18:42:11 +00:00
|
|
|
wrappedTeX :: Monad m
|
2007-11-18 01:44:08 +00:00
|
|
|
=> Bool
|
|
|
|
-> ([Inline] -> m Doc)
|
2007-11-17 18:42:11 +00:00
|
|
|
-> [Inline]
|
|
|
|
-> m Doc
|
|
|
|
wrappedTeX includePercent listWriter sect = do
|
2007-11-15 03:09:31 +00:00
|
|
|
let (firstpart, rest) = break isNote sect
|
|
|
|
firstpartWrapped <- wrapped listWriter firstpart
|
|
|
|
if null rest
|
|
|
|
then return firstpartWrapped
|
|
|
|
else do let (note:rest') = rest
|
2008-10-19 00:33:12 +00:00
|
|
|
let (rest1, rest2) = break (== Space) rest'
|
|
|
|
-- rest1 is whatever comes between the note and a Space.
|
|
|
|
-- if the note is followed directly by a Space, rest1 is null.
|
|
|
|
-- rest1 is printed after the note but before the line break,
|
|
|
|
-- to avoid spurious blank space the note and immediately
|
|
|
|
-- following punctuation.
|
|
|
|
rest1Out <- if null rest1
|
|
|
|
then return empty
|
|
|
|
else listWriter rest1
|
|
|
|
rest2Wrapped <- if null rest2
|
|
|
|
then return empty
|
|
|
|
else wrappedTeX includePercent listWriter (tail rest2)
|
2007-11-15 03:09:31 +00:00
|
|
|
noteText <- listWriter [note]
|
2008-10-19 00:33:12 +00:00
|
|
|
return $ (firstpartWrapped <> if includePercent then PP.char '%' else empty) $$
|
|
|
|
(noteText <> rest1Out) $$
|
|
|
|
rest2Wrapped
|
2007-11-15 03:09:31 +00:00
|
|
|
|
|
|
|
-- | Wrap inlines if the text wrap option is selected, specialized
|
|
|
|
-- for LaTeX and ConTeXt.
|
2007-11-17 18:42:11 +00:00
|
|
|
wrapTeXIfNeeded :: Monad m
|
|
|
|
=> WriterOptions
|
2007-11-18 01:44:08 +00:00
|
|
|
-> Bool
|
|
|
|
-> ([Inline] -> m Doc)
|
2007-11-17 18:42:11 +00:00
|
|
|
-> [Inline]
|
|
|
|
-> m Doc
|
|
|
|
wrapTeXIfNeeded opts includePercent = if writerWrapText opts
|
|
|
|
then wrappedTeX includePercent
|
|
|
|
else ($)
|
2007-11-15 03:09:31 +00:00
|
|
|
|
2008-02-24 05:48:31 +00:00
|
|
|
-- | Indicates whether block should be surrounded by blank lines (@Pad@) or not (@Reg@).
|
|
|
|
data BlockWrapper = Pad Doc | Reg Doc
|
|
|
|
|
|
|
|
-- | Converts a list of wrapped blocks to a Doc, with appropriate spaces around blocks.
|
|
|
|
wrappedBlocksToDoc :: [BlockWrapper] -> Doc
|
|
|
|
wrappedBlocksToDoc = foldr addBlock empty
|
|
|
|
where addBlock (Pad d) accum | isEmpty accum = d
|
|
|
|
addBlock (Pad d) accum = d $$ text "" $$ accum
|
|
|
|
addBlock (Reg d) accum = d $$ accum
|
|
|
|
|
2009-01-31 17:13:41 +00:00
|
|
|
-- | Convert tabs to spaces and filter out DOS line endings.
|
|
|
|
-- Tabs will be preserved if tab stop is set to 0.
|
|
|
|
tabFilter :: Int -- ^ Tab stop
|
|
|
|
-> String -- ^ Input
|
|
|
|
-> String
|
|
|
|
tabFilter tabStop =
|
|
|
|
let go _ [] = ""
|
|
|
|
go _ ('\n':xs) = '\n' : go tabStop xs
|
|
|
|
go _ ('\r':'\n':xs) = '\n' : go tabStop xs
|
|
|
|
go _ ('\r':xs) = '\n' : go tabStop xs
|
|
|
|
go spsToNextStop ('\t':xs) =
|
|
|
|
if tabStop == 0
|
|
|
|
then '\t' : go tabStop xs
|
|
|
|
else replicate spsToNextStop ' ' ++ go tabStop xs
|
|
|
|
go 1 (x:xs) =
|
|
|
|
x : go tabStop xs
|
|
|
|
go spsToNextStop (x:xs) =
|
|
|
|
x : go (spsToNextStop - 1) xs
|
|
|
|
in go tabStop
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
2008-12-17 15:34:25 +00:00
|
|
|
-- Prettyprinting
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
2008-12-17 15:34:25 +00:00
|
|
|
|
|
|
|
-- | A version of hang that works like the version in pretty-1.0.0.0
|
|
|
|
hang' :: Doc -> Int -> Doc -> Doc
|
|
|
|
hang' d1 n d2 = d1 $$ (nest n d2)
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
--
|
|
|
|
-- Pandoc block and inline list processing
|
|
|
|
--
|
|
|
|
|
|
|
|
-- | Generate infinite lazy list of markers for an ordered list,
|
|
|
|
-- depending on list attributes.
|
|
|
|
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
|
|
|
|
orderedListMarkers (start, numstyle, numdelim) =
|
|
|
|
let singleton c = [c]
|
|
|
|
nums = case numstyle of
|
|
|
|
DefaultStyle -> map show [start..]
|
|
|
|
Decimal -> map show [start..]
|
|
|
|
UpperAlpha -> drop (start - 1) $ cycle $
|
|
|
|
map singleton ['A'..'Z']
|
|
|
|
LowerAlpha -> drop (start - 1) $ cycle $
|
|
|
|
map singleton ['a'..'z']
|
|
|
|
UpperRoman -> map toRomanNumeral [start..]
|
|
|
|
LowerRoman -> map (map toLower . toRomanNumeral) [start..]
|
|
|
|
inDelim str = case numdelim of
|
|
|
|
DefaultDelim -> str ++ "."
|
|
|
|
Period -> str ++ "."
|
|
|
|
OneParen -> str ++ ")"
|
|
|
|
TwoParens -> "(" ++ str ++ ")"
|
|
|
|
in map inDelim nums
|
|
|
|
|
|
|
|
-- | Normalize a list of inline elements: remove leading and trailing
|
|
|
|
-- @Space@ elements, collapse double @Space@s into singles, and
|
|
|
|
-- remove empty Str elements.
|
|
|
|
normalizeSpaces :: [Inline] -> [Inline]
|
|
|
|
normalizeSpaces [] = []
|
|
|
|
normalizeSpaces list =
|
|
|
|
let removeDoubles [] = []
|
|
|
|
removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
|
|
|
|
removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest)
|
|
|
|
removeDoubles ((Str ""):rest) = removeDoubles rest
|
|
|
|
removeDoubles (x:rest) = x:(removeDoubles rest)
|
|
|
|
removeLeading (Space:xs) = removeLeading xs
|
|
|
|
removeLeading x = x
|
|
|
|
removeTrailing [] = []
|
|
|
|
removeTrailing lst = if (last lst == Space)
|
|
|
|
then init lst
|
|
|
|
else lst
|
|
|
|
in removeLeading $ removeTrailing $ removeDoubles list
|
|
|
|
|
2009-11-01 02:38:18 +00:00
|
|
|
-- | Change final list item from @Para@ to @Plain@ if the list contains
|
|
|
|
-- no other @Para@ blocks.
|
2007-11-03 23:27:58 +00:00
|
|
|
compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
|
|
|
|
-> [[Block]]
|
|
|
|
compactify [] = []
|
|
|
|
compactify items =
|
2009-11-01 02:38:18 +00:00
|
|
|
case (init items, last items) of
|
|
|
|
(_,[]) -> items
|
|
|
|
(others, final) ->
|
|
|
|
case last final of
|
|
|
|
Para a -> case (filter isPara $ concat items) of
|
|
|
|
-- if this is only Para, change to Plain
|
|
|
|
[_] -> others ++ [init final ++ [Plain a]]
|
|
|
|
_ -> items
|
|
|
|
_ -> items
|
|
|
|
|
|
|
|
isPara :: Block -> Bool
|
|
|
|
isPara (Para _) = True
|
|
|
|
isPara _ = False
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- | Data structure for defining hierarchical Pandoc documents
|
|
|
|
data Element = Blk Block
|
2009-12-08 02:36:16 +00:00
|
|
|
| Sec Int [Int] String [Inline] [Element]
|
|
|
|
-- lvl num ident label contents
|
2009-04-25 00:29:58 +00:00
|
|
|
deriving (Eq, Read, Show, Typeable, Data)
|
|
|
|
|
2010-03-28 22:29:31 -07:00
|
|
|
-- | Convert Pandoc inline list to plain text identifier. HTML
|
|
|
|
-- identifiers must start with a letter, and may contain only
|
|
|
|
-- letters, digits, and the characters _-:.
|
2009-04-25 00:29:58 +00:00
|
|
|
inlineListToIdentifier :: [Inline] -> String
|
2010-03-28 22:29:31 -07:00
|
|
|
inlineListToIdentifier =
|
|
|
|
dropWhile (not . isAlpha) . intercalate "-" . words . map toLower .
|
|
|
|
filter (\c -> isLetter c || isDigit c || c `elem` "_-:. ") .
|
|
|
|
concatMap extractText
|
|
|
|
where extractText x = case x of
|
|
|
|
Str s -> s
|
|
|
|
Emph lst -> concatMap extractText lst
|
|
|
|
Strikeout lst -> concatMap extractText lst
|
|
|
|
Superscript lst -> concatMap extractText lst
|
|
|
|
SmallCaps lst -> concatMap extractText lst
|
|
|
|
Subscript lst -> concatMap extractText lst
|
|
|
|
Strong lst -> concatMap extractText lst
|
|
|
|
Quoted _ lst -> concatMap extractText lst
|
|
|
|
Cite _ lst -> concatMap extractText lst
|
|
|
|
Code s -> s
|
|
|
|
Space -> " "
|
|
|
|
EmDash -> "---"
|
|
|
|
EnDash -> "--"
|
|
|
|
Apostrophe -> ""
|
|
|
|
Ellipses -> "..."
|
|
|
|
LineBreak -> " "
|
|
|
|
Math _ s -> s
|
|
|
|
TeX _ -> ""
|
|
|
|
HtmlInline _ -> ""
|
|
|
|
Link lst _ -> concatMap extractText lst
|
|
|
|
Image lst _ -> concatMap extractText lst
|
|
|
|
Note _ -> ""
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
|
|
|
|
hierarchicalize :: [Block] -> [Element]
|
2009-12-08 02:36:16 +00:00
|
|
|
hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) ([],[])
|
2009-04-25 00:29:58 +00:00
|
|
|
|
2009-12-08 02:36:16 +00:00
|
|
|
hierarchicalizeWithIds :: [Block] -> S.State ([Int],[String]) [Element]
|
2009-04-25 00:29:58 +00:00
|
|
|
hierarchicalizeWithIds [] = return []
|
|
|
|
hierarchicalizeWithIds ((Header level title'):xs) = do
|
2009-12-08 02:36:16 +00:00
|
|
|
(lastnum, usedIdents) <- S.get
|
2009-04-25 00:29:58 +00:00
|
|
|
let ident = uniqueIdent title' usedIdents
|
2009-12-08 02:36:16 +00:00
|
|
|
let lastnum' = take level lastnum
|
|
|
|
let newnum = if length lastnum' >= level
|
|
|
|
then init lastnum' ++ [last lastnum' + 1]
|
|
|
|
else lastnum ++ replicate (level - length lastnum - 1) 0 ++ [1]
|
|
|
|
S.put (newnum, (ident : usedIdents))
|
2009-04-25 00:29:58 +00:00
|
|
|
let (sectionContents, rest) = break (headerLtEq level) xs
|
|
|
|
sectionContents' <- hierarchicalizeWithIds sectionContents
|
|
|
|
rest' <- hierarchicalizeWithIds rest
|
2009-12-08 02:36:16 +00:00
|
|
|
return $ Sec level newnum ident title' sectionContents' : rest'
|
2009-04-25 00:29:58 +00:00
|
|
|
hierarchicalizeWithIds (x:rest) = do
|
|
|
|
rest' <- hierarchicalizeWithIds rest
|
|
|
|
return $ (Blk x) : rest'
|
|
|
|
|
|
|
|
headerLtEq :: Int -> Block -> Bool
|
|
|
|
headerLtEq level (Header l _) = l <= level
|
|
|
|
headerLtEq _ _ = False
|
|
|
|
|
2010-03-16 06:45:52 +00:00
|
|
|
-- | Generate a unique identifier from a list of inlines.
|
|
|
|
-- Second argument is a list of already used identifiers.
|
2009-04-25 00:29:58 +00:00
|
|
|
uniqueIdent :: [Inline] -> [String] -> String
|
|
|
|
uniqueIdent title' usedIdents =
|
2010-03-28 22:29:31 -07:00
|
|
|
let baseIdent = case inlineListToIdentifier title' of
|
|
|
|
"" -> "section"
|
|
|
|
x -> x
|
2009-04-25 00:29:58 +00:00
|
|
|
numIdent n = baseIdent ++ "-" ++ show n
|
|
|
|
in if baseIdent `elem` usedIdents
|
|
|
|
then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
|
|
|
|
Just x -> numIdent x
|
|
|
|
Nothing -> baseIdent -- if we have more than 60,000, allow repeats
|
|
|
|
else baseIdent
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
-- | True if block is a Header block.
|
|
|
|
isHeaderBlock :: Block -> Bool
|
|
|
|
isHeaderBlock (Header _ _) = True
|
|
|
|
isHeaderBlock _ = False
|
|
|
|
|
|
|
|
--
|
|
|
|
-- Writer options
|
|
|
|
--
|
|
|
|
|
2007-12-01 03:11:44 +00:00
|
|
|
data HTMLMathMethod = PlainMath
|
2008-08-13 03:02:42 +00:00
|
|
|
| LaTeXMathML (Maybe String) -- url of LaTeXMathML.js
|
2008-10-28 21:54:50 +00:00
|
|
|
| JsMath (Maybe String) -- url of jsMath load script
|
2007-12-01 03:11:44 +00:00
|
|
|
| GladTeX
|
|
|
|
| MimeTeX String -- url of mimetex.cgi
|
2010-03-18 06:45:56 +00:00
|
|
|
| MathML (Maybe String) -- url of MathMLinHTML.js
|
2007-12-01 03:11:44 +00:00
|
|
|
deriving (Show, Read, Eq)
|
|
|
|
|
2009-01-24 19:58:48 +00:00
|
|
|
-- | Methods for obfuscating email addresses in HTML.
|
|
|
|
data ObfuscationMethod = NoObfuscation
|
|
|
|
| ReferenceObfuscation
|
|
|
|
| JavascriptObfuscation
|
|
|
|
deriving (Show, Read, Eq)
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
-- | Options for writers
|
|
|
|
data WriterOptions = WriterOptions
|
2009-01-24 19:58:48 +00:00
|
|
|
{ writerStandalone :: Bool -- ^ Include header and footer
|
2009-12-31 01:09:28 +00:00
|
|
|
, writerTemplate :: String -- ^ Template to use in standalone mode
|
|
|
|
, writerVariables :: [(String, String)] -- ^ Variables to set in template
|
2010-07-04 16:56:17 -07:00
|
|
|
, writerEPUBMetadata :: String -- ^ Metadata to include in EPUB
|
2009-01-24 19:58:48 +00:00
|
|
|
, writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
|
|
|
|
, writerTableOfContents :: Bool -- ^ Include table of contents
|
|
|
|
, writerS5 :: Bool -- ^ We're writing S5
|
2009-12-31 01:18:06 +00:00
|
|
|
, writerXeTeX :: Bool -- ^ Create latex suitable for use by xetex
|
2009-01-24 19:58:48 +00:00
|
|
|
, writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
|
|
|
|
, writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
|
|
|
|
, writerIncremental :: Bool -- ^ Incremental S5 lists
|
|
|
|
, writerNumberSections :: Bool -- ^ Number sections in LaTeX
|
|
|
|
, writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
|
|
|
|
, writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
|
|
|
|
, writerWrapText :: Bool -- ^ Wrap text to line length
|
|
|
|
, writerLiterateHaskell :: Bool -- ^ Write as literate haskell
|
|
|
|
, writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
|
2009-12-05 17:56:02 +00:00
|
|
|
, writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML
|
2007-11-03 23:27:58 +00:00
|
|
|
} deriving Show
|
|
|
|
|
|
|
|
-- | Default writer options.
|
|
|
|
defaultWriterOptions :: WriterOptions
|
|
|
|
defaultWriterOptions =
|
2009-01-24 19:58:48 +00:00
|
|
|
WriterOptions { writerStandalone = False
|
2009-12-31 01:09:28 +00:00
|
|
|
, writerTemplate = ""
|
|
|
|
, writerVariables = []
|
2010-07-04 16:56:17 -07:00
|
|
|
, writerEPUBMetadata = ""
|
2009-01-24 19:58:48 +00:00
|
|
|
, writerTabStop = 4
|
|
|
|
, writerTableOfContents = False
|
|
|
|
, writerS5 = False
|
2010-01-02 21:09:29 +00:00
|
|
|
, writerXeTeX = False
|
2009-01-24 19:58:48 +00:00
|
|
|
, writerHTMLMathMethod = PlainMath
|
|
|
|
, writerIgnoreNotes = False
|
|
|
|
, writerIncremental = False
|
|
|
|
, writerNumberSections = False
|
|
|
|
, writerStrictMarkdown = False
|
|
|
|
, writerReferenceLinks = False
|
|
|
|
, writerWrapText = True
|
|
|
|
, writerLiterateHaskell = False
|
|
|
|
, writerEmailObfuscation = JavascriptObfuscation
|
2009-12-05 17:56:02 +00:00
|
|
|
, writerIdentifierPrefix = ""
|
2007-11-03 23:27:58 +00:00
|
|
|
}
|
2008-07-31 23:16:02 +00:00
|
|
|
|
2008-08-02 17:22:55 +00:00
|
|
|
--
|
|
|
|
-- File handling
|
|
|
|
--
|
|
|
|
|
2008-09-04 02:51:28 +00:00
|
|
|
-- | Perform an IO action in a directory, returning to starting directory.
|
|
|
|
inDirectory :: FilePath -> IO a -> IO a
|
|
|
|
inDirectory path action = do
|
|
|
|
oldDir <- getCurrentDirectory
|
|
|
|
setCurrentDirectory path
|
|
|
|
result <- action
|
|
|
|
setCurrentDirectory oldDir
|
|
|
|
return result
|
2009-12-31 01:11:23 +00:00
|
|
|
|
2010-01-14 05:54:38 +00:00
|
|
|
-- | Read file from specified user data directory or, if not found there, from
|
|
|
|
-- Cabal data directory.
|
2010-01-18 07:01:29 +00:00
|
|
|
readDataFile :: Maybe FilePath -> FilePath -> IO String
|
|
|
|
readDataFile userDir fname =
|
|
|
|
case userDir of
|
2010-05-06 20:29:44 -07:00
|
|
|
Nothing -> getDataFileName fname >>= UTF8.readFile
|
|
|
|
Just u -> catch (UTF8.readFile $ u </> fname)
|
|
|
|
(\_ -> getDataFileName fname >>= UTF8.readFile)
|