2006-12-20 20:54:23 +00:00
|
|
|
{-
|
|
|
|
Copyright (C) 2006 John MacFarlane <jgm at berkeley dot edu>
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.Shared
|
|
|
|
Copyright : Copyright (C) 2006 John MacFarlane
|
|
|
|
License : GNU GPL, version 2 or above
|
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm at berkeley dot edu>
|
2006-12-20 20:20:10 +00:00
|
|
|
Stability : alpha
|
2006-12-20 06:50:14 +00:00
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
Utility functions and definitions used by the various Pandoc modules.
|
|
|
|
-}
|
2006-10-17 14:22:29 +00:00
|
|
|
module Text.Pandoc.Shared (
|
2007-01-04 22:52:16 +00:00
|
|
|
-- * List processing
|
|
|
|
splitBy,
|
2007-01-15 19:52:42 +00:00
|
|
|
splitByIndices,
|
2007-01-22 21:28:46 +00:00
|
|
|
substitute,
|
2006-10-17 14:22:29 +00:00
|
|
|
-- * Text processing
|
|
|
|
joinWithSep,
|
|
|
|
tabsToSpaces,
|
|
|
|
backslashEscape,
|
|
|
|
endsWith,
|
|
|
|
stripTrailingNewlines,
|
|
|
|
removeLeadingTrailingSpace,
|
|
|
|
removeLeadingSpace,
|
|
|
|
removeTrailingSpace,
|
2006-11-26 07:01:37 +00:00
|
|
|
stripFirstAndLast,
|
2006-10-17 14:22:29 +00:00
|
|
|
-- * Parsing
|
|
|
|
readWith,
|
|
|
|
testStringWith,
|
|
|
|
HeaderType (..),
|
|
|
|
ParserContext (..),
|
2007-01-06 18:41:01 +00:00
|
|
|
QuoteContext (..),
|
2006-10-17 14:22:29 +00:00
|
|
|
ParserState (..),
|
|
|
|
defaultParserState,
|
|
|
|
-- * Native format prettyprinting
|
|
|
|
prettyPandoc,
|
|
|
|
-- * Pandoc block list processing
|
|
|
|
isNoteBlock,
|
|
|
|
normalizeSpaces,
|
|
|
|
compactify,
|
|
|
|
generateReference,
|
|
|
|
WriterOptions (..),
|
|
|
|
KeyTable,
|
|
|
|
keyTable,
|
|
|
|
lookupKeySrc,
|
|
|
|
refsMatch,
|
|
|
|
replaceReferenceLinks,
|
2007-01-04 22:52:16 +00:00
|
|
|
replaceRefLinksBlockList,
|
|
|
|
-- * SGML
|
|
|
|
inTags,
|
|
|
|
selfClosingTag,
|
|
|
|
inTagsSimple,
|
|
|
|
inTagsIndented
|
2006-10-17 14:22:29 +00:00
|
|
|
) where
|
|
|
|
import Text.Pandoc.Definition
|
2007-01-24 23:25:27 +00:00
|
|
|
import Text.ParserCombinators.Parsec as Parsec
|
Changes in entity handling:
+ Entities are parsed (and unicode characters returned) in both
Markdown and HTML readers.
+ Parsers characterEntity, namedEntity, decimalEntity, hexEntity added
to Entities.hs; these parse a string and return a unicode character.
+ Changed 'entity' parser in HTML reader to use the 'characterEntity'
parser from Entities.hs.
+ Added new 'entity' parser to Markdown reader, and added '&' as a
special character. Adjusted test suite accordingly since now we
get 'Str "AT",Str "&",Str "T"' instead of 'Str "AT&T"..
+ stringToSGML moved to Entities.hs. escapeSGML removed as redundant,
given encodeEntities.
+ stringToSGML, encodeEntities, and specialCharToEntity are given a
boolean parameter that causes only numerical entities to be used.
This is used in the docbook writer. The HTML writer uses named
entities where possible, but not all docbook-consumers know about
the named entities without special instructions, so it seems safer
to use numerical entities there.
+ decodeEntities is rewritten in a way that avoids Text.Regex, using
the new parsers.
+ charToEntity and charToNumericalEntity added to Entities.hs.
+ Moved specialCharToEntity from Shared.hs to Entities.hs.
+ Removed unneeded 'decodeEntities' from 'str' parser in HTML and
Markdown readers.
+ Removed sgmlHexEntity, sgmlDecimalEntity, sgmlNamedEntity, and
sgmlCharacterEntity from Shared.hs.
+ Modified Docbook writer so that it doesn't rely on Text.Regex for
detecting "mailto" links.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@515 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-01-27 03:04:40 +00:00
|
|
|
import Text.Pandoc.Entities ( decodeEntities, encodeEntities, stringToSGML )
|
2007-01-24 23:25:27 +00:00
|
|
|
import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>),
|
|
|
|
($$), nest, Doc, isEmpty )
|
|
|
|
import Data.Char ( toLower, ord )
|
2007-01-22 21:28:46 +00:00
|
|
|
import Data.List ( find, groupBy, isPrefixOf )
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Parse a string with a given parser and state.
|
|
|
|
readWith :: GenParser Char ParserState a -- ^ parser
|
|
|
|
-> ParserState -- ^ initial state
|
|
|
|
-> String -- ^ input string
|
|
|
|
-> a
|
|
|
|
readWith parser state input =
|
|
|
|
case runParser parser state "source" input of
|
2006-12-20 06:50:14 +00:00
|
|
|
Left err -> error $ "\nError:\n" ++ show err
|
2006-10-17 14:22:29 +00:00
|
|
|
Right result -> result
|
|
|
|
|
|
|
|
-- | Parse a string with @parser@ (for testing).
|
|
|
|
testStringWith :: (Show a) =>
|
|
|
|
GenParser Char ParserState a
|
2006-12-20 06:50:14 +00:00
|
|
|
-> String
|
|
|
|
-> IO ()
|
|
|
|
testStringWith parser str = putStrLn $ show $
|
|
|
|
readWith parser defaultParserState str
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
data HeaderType
|
|
|
|
= SingleHeader Char -- ^ Single line of characters underneath
|
|
|
|
| DoubleHeader Char -- ^ Lines of characters above and below
|
|
|
|
deriving (Eq, Show)
|
|
|
|
|
|
|
|
data ParserContext
|
2006-12-21 09:02:06 +00:00
|
|
|
= ListItemState -- ^ Used when running parser on list item contents
|
2006-12-20 06:50:14 +00:00
|
|
|
| NullState -- ^ Default state
|
2006-10-17 14:22:29 +00:00
|
|
|
deriving (Eq, Show)
|
|
|
|
|
2007-01-06 18:41:01 +00:00
|
|
|
data QuoteContext
|
|
|
|
= InSingleQuote -- ^ Used when we're parsing inside single quotes
|
|
|
|
| InDoubleQuote -- ^ Used when we're parsing inside double quotes
|
|
|
|
| NoQuote -- ^ Used when we're not parsing inside quotes
|
|
|
|
deriving (Eq, Show)
|
|
|
|
|
2006-10-17 14:22:29 +00:00
|
|
|
data ParserState = ParserState
|
2006-12-20 06:50:14 +00:00
|
|
|
{ stateParseRaw :: Bool, -- ^ Parse untranslatable HTML
|
|
|
|
-- and LaTeX?
|
|
|
|
stateParserContext :: ParserContext, -- ^ What are we parsing?
|
2007-01-06 18:41:01 +00:00
|
|
|
stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
|
2006-12-20 06:50:14 +00:00
|
|
|
stateKeyBlocks :: [Block], -- ^ List of reference key blocks
|
|
|
|
stateKeysUsed :: [[Inline]], -- ^ List of references used
|
|
|
|
stateNoteBlocks :: [Block], -- ^ List of note blocks
|
|
|
|
stateNoteIdentifiers :: [String], -- ^ List of footnote identifiers
|
|
|
|
-- in the order encountered
|
|
|
|
stateTabStop :: Int, -- ^ Tab stop
|
|
|
|
stateStandalone :: Bool, -- ^ If @True@, parse
|
|
|
|
-- bibliographic info
|
|
|
|
stateTitle :: [Inline], -- ^ Title of document
|
|
|
|
stateAuthors :: [String], -- ^ Authors of document
|
|
|
|
stateDate :: String, -- ^ Date of document
|
2006-12-30 22:51:49 +00:00
|
|
|
stateStrict :: Bool, -- ^ Use strict markdown syntax
|
2007-01-06 09:54:58 +00:00
|
|
|
stateSmart :: Bool, -- ^ Use smart typography
|
2007-01-15 19:52:42 +00:00
|
|
|
stateColumns :: Int, -- ^ Number of columns in
|
|
|
|
-- terminal (used for tables)
|
2006-12-20 06:50:14 +00:00
|
|
|
stateHeaderTable :: [HeaderType] -- ^ List of header types used,
|
|
|
|
-- in what order (rst only)
|
2006-11-08 17:50:09 +00:00
|
|
|
}
|
2006-10-17 14:22:29 +00:00
|
|
|
deriving Show
|
|
|
|
|
|
|
|
defaultParserState :: ParserState
|
|
|
|
defaultParserState =
|
2006-12-19 23:13:03 +00:00
|
|
|
ParserState { stateParseRaw = False,
|
|
|
|
stateParserContext = NullState,
|
2007-01-06 18:41:01 +00:00
|
|
|
stateQuoteContext = NoQuote,
|
2006-12-19 23:13:03 +00:00
|
|
|
stateKeyBlocks = [],
|
|
|
|
stateKeysUsed = [],
|
|
|
|
stateNoteBlocks = [],
|
|
|
|
stateNoteIdentifiers = [],
|
|
|
|
stateTabStop = 4,
|
|
|
|
stateStandalone = False,
|
|
|
|
stateTitle = [],
|
|
|
|
stateAuthors = [],
|
|
|
|
stateDate = [],
|
2006-12-30 22:51:49 +00:00
|
|
|
stateStrict = False,
|
2007-01-06 09:54:58 +00:00
|
|
|
stateSmart = False,
|
2007-01-15 19:52:42 +00:00
|
|
|
stateColumns = 80,
|
2006-12-19 23:13:03 +00:00
|
|
|
stateHeaderTable = [] }
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Indent string as a block.
|
2006-12-20 06:50:14 +00:00
|
|
|
indentBy :: Int -- ^ Number of spaces to indent the block
|
|
|
|
-> Int -- ^ Number of spaces (rel to block) to indent first line
|
|
|
|
-> String -- ^ Contents of block to indent
|
2006-10-17 14:22:29 +00:00
|
|
|
-> String
|
|
|
|
indentBy num first [] = ""
|
|
|
|
indentBy num first str =
|
|
|
|
let (firstLine:restLines) = lines str
|
|
|
|
firstLineIndent = num + first in
|
|
|
|
(replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ (joinWithSep "\n" $ map (\line -> (replicate num ' ') ++ line) restLines)
|
|
|
|
|
|
|
|
-- | Prettyprint list of Pandoc blocks elements.
|
|
|
|
prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
|
|
|
|
-> [Block] -- ^ List of blocks
|
|
|
|
-> String
|
|
|
|
prettyBlockList indent [] = indentBy indent 0 "[]"
|
2006-12-20 06:50:14 +00:00
|
|
|
prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
|
|
|
|
(joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Prettyprint Pandoc block element.
|
|
|
|
prettyBlock :: Block -> String
|
2006-12-20 06:50:14 +00:00
|
|
|
prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
|
|
|
|
(prettyBlockList 2 blocks)
|
|
|
|
prettyBlock (Note ref blocks) = "Note " ++ (show ref) ++ "\n " ++
|
|
|
|
(prettyBlockList 2 blocks)
|
|
|
|
prettyBlock (OrderedList blockLists) =
|
|
|
|
"OrderedList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", "
|
|
|
|
(map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
|
|
|
|
prettyBlock (BulletList blockLists) = "BulletList\n" ++
|
|
|
|
indentBy 2 0 ("[ " ++ (joinWithSep ", "
|
|
|
|
(map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
|
2006-10-17 14:22:29 +00:00
|
|
|
prettyBlock block = show block
|
|
|
|
|
|
|
|
-- | Prettyprint Pandoc document.
|
|
|
|
prettyPandoc :: Pandoc -> String
|
2006-12-20 06:50:14 +00:00
|
|
|
prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ (show meta) ++
|
|
|
|
")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Convert tabs to spaces (with adjustable tab stop).
|
|
|
|
tabsToSpaces :: Int -- ^ Tabstop
|
|
|
|
-> String -- ^ String to convert
|
|
|
|
-> String
|
|
|
|
tabsToSpaces tabstop str =
|
|
|
|
unlines (map (tabsInLine tabstop tabstop) (lines str))
|
|
|
|
|
|
|
|
-- | Convert tabs to spaces in one line.
|
|
|
|
tabsInLine :: Int -- ^ Number of spaces to next tab stop
|
|
|
|
-> Int -- ^ Tabstop
|
|
|
|
-> String -- ^ Line to convert
|
|
|
|
-> String
|
|
|
|
tabsInLine num tabstop "" = ""
|
|
|
|
tabsInLine num tabstop (c:cs) =
|
|
|
|
let replacement = (if (c == '\t') then (replicate num ' ') else [c]) in
|
|
|
|
let nextnumraw = (num - (length replacement)) in
|
2006-12-20 06:50:14 +00:00
|
|
|
let nextnum = if (nextnumraw < 1)
|
|
|
|
then (nextnumraw + tabstop)
|
|
|
|
else nextnumraw in
|
2006-10-17 14:22:29 +00:00
|
|
|
replacement ++ (tabsInLine nextnum tabstop cs)
|
|
|
|
|
|
|
|
-- | Escape designated characters with backslash.
|
|
|
|
backslashEscape :: [Char] -- ^ list of special characters to escape
|
|
|
|
-> String -- ^ string input
|
|
|
|
-> String
|
|
|
|
backslashEscape special [] = []
|
2006-12-20 06:50:14 +00:00
|
|
|
backslashEscape special (x:xs) = if x `elem` special
|
|
|
|
then '\\':x:(backslashEscape special xs)
|
|
|
|
else x:(backslashEscape special xs)
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Returns @True@ if string ends with given character.
|
|
|
|
endsWith :: Char -> [Char] -> Bool
|
|
|
|
endsWith char [] = False
|
|
|
|
endsWith char str = (char == last str)
|
|
|
|
|
|
|
|
-- | Returns @True@ if block is a @Note@ block
|
|
|
|
isNoteBlock :: Block -> Bool
|
|
|
|
isNoteBlock (Note ref blocks) = True
|
|
|
|
isNoteBlock _ = False
|
|
|
|
|
|
|
|
-- | Joins a list of lists, separated by another list.
|
|
|
|
joinWithSep :: [a] -- ^ List to use as separator
|
|
|
|
-> [[a]] -- ^ Lists to join
|
|
|
|
-> [a]
|
|
|
|
joinWithSep sep [] = []
|
|
|
|
joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
|
|
|
|
|
|
|
|
-- | Strip trailing newlines from string.
|
|
|
|
stripTrailingNewlines :: String -> String
|
|
|
|
stripTrailingNewlines "" = ""
|
|
|
|
stripTrailingNewlines str =
|
2006-12-20 06:50:14 +00:00
|
|
|
if (last str) == '\n'
|
|
|
|
then stripTrailingNewlines (init str)
|
|
|
|
else str
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Remove leading and trailing space (including newlines) from string.
|
|
|
|
removeLeadingTrailingSpace :: String -> String
|
|
|
|
removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
|
|
|
|
|
|
|
|
-- | Remove leading space (including newlines) from string.
|
|
|
|
removeLeadingSpace :: String -> String
|
2006-12-20 06:50:14 +00:00
|
|
|
removeLeadingSpace = dropWhile (\x -> (x == ' ') || (x == '\n') ||
|
|
|
|
(x == '\t'))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Remove trailing space (including newlines) from string.
|
|
|
|
removeTrailingSpace :: String -> String
|
|
|
|
removeTrailingSpace = reverse . removeLeadingSpace . reverse
|
|
|
|
|
2006-11-26 07:01:37 +00:00
|
|
|
-- | Strip leading and trailing characters from string
|
|
|
|
stripFirstAndLast str =
|
|
|
|
drop 1 $ take ((length str) - 1) str
|
|
|
|
|
2007-01-22 21:28:46 +00:00
|
|
|
-- | Replace each occurrence of one sublist in a list with another.
|
|
|
|
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
|
|
|
|
substitute _ _ [] = []
|
|
|
|
substitute [] _ lst = lst
|
|
|
|
substitute target replacement lst =
|
|
|
|
if isPrefixOf target lst
|
|
|
|
then replacement ++ (substitute target replacement $ drop (length target) lst)
|
|
|
|
else (head lst):(substitute target replacement $ tail lst)
|
|
|
|
|
2007-01-04 01:04:56 +00:00
|
|
|
-- | Split list into groups separated by sep.
|
|
|
|
splitBy :: (Eq a) => a -> [a] -> [[a]]
|
|
|
|
splitBy _ [] = []
|
|
|
|
splitBy sep lst =
|
|
|
|
let (first, rest) = break (== sep) lst
|
|
|
|
rest' = dropWhile (== sep) rest in
|
|
|
|
first:(splitBy sep rest')
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-01-15 19:52:42 +00:00
|
|
|
-- | Split list into chunks divided at specified indices.
|
|
|
|
splitByIndices :: [Int] -> [a] -> [[a]]
|
|
|
|
splitByIndices [] lst = [lst]
|
|
|
|
splitByIndices (x:xs) lst =
|
|
|
|
let (first, rest) = splitAt x lst in
|
|
|
|
first:(splitByIndices (map (\y -> y - x) xs) rest)
|
|
|
|
|
2006-10-17 14:22:29 +00:00
|
|
|
-- | Normalize a list of inline elements: remove leading and trailing
|
2007-01-24 08:14:43 +00:00
|
|
|
-- @Space@ elements, collapse double @Space@s into singles, and
|
|
|
|
-- remove empty Str elements.
|
2006-10-17 14:22:29 +00:00
|
|
|
normalizeSpaces :: [Inline] -> [Inline]
|
|
|
|
normalizeSpaces [] = []
|
|
|
|
normalizeSpaces list =
|
|
|
|
let removeDoubles [] = []
|
|
|
|
removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
|
2007-01-24 08:14:43 +00:00
|
|
|
removeDoubles ((Str ""):rest) = removeDoubles rest
|
2006-10-17 14:22:29 +00:00
|
|
|
removeDoubles (x:rest) = x:(removeDoubles rest) in
|
|
|
|
let removeLeading [] = []
|
2006-12-20 06:50:14 +00:00
|
|
|
removeLeading lst = if ((head lst) == Space)
|
|
|
|
then tail lst
|
|
|
|
else lst in
|
2006-10-17 14:22:29 +00:00
|
|
|
let removeTrailing [] = []
|
2006-12-20 06:50:14 +00:00
|
|
|
removeTrailing lst = if ((last lst) == Space)
|
|
|
|
then init lst
|
|
|
|
else lst in
|
2006-10-17 14:22:29 +00:00
|
|
|
removeLeading $ removeTrailing $ removeDoubles list
|
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
-- | Change final list item from @Para@ to @Plain@ if the list should
|
|
|
|
-- be compact.
|
2006-10-17 14:22:29 +00:00
|
|
|
compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
|
|
|
|
-> [[Block]]
|
|
|
|
compactify [] = []
|
|
|
|
compactify items =
|
|
|
|
let final = last items
|
|
|
|
others = init items in
|
|
|
|
case final of
|
2006-12-20 06:50:14 +00:00
|
|
|
[Para a] -> if any containsPara others
|
|
|
|
then items
|
|
|
|
else others ++ [[Plain a]]
|
2006-10-17 14:22:29 +00:00
|
|
|
otherwise -> items
|
|
|
|
|
|
|
|
containsPara :: [Block] -> Bool
|
|
|
|
containsPara [] = False
|
|
|
|
containsPara ((Para a):rest) = True
|
2006-12-20 06:50:14 +00:00
|
|
|
containsPara ((BulletList items):rest) = (any containsPara items) ||
|
|
|
|
(containsPara rest)
|
|
|
|
containsPara ((OrderedList items):rest) = (any containsPara items) ||
|
|
|
|
(containsPara rest)
|
2006-10-17 14:22:29 +00:00
|
|
|
containsPara (x:rest) = containsPara rest
|
|
|
|
|
|
|
|
-- | Options for writers
|
|
|
|
data WriterOptions = WriterOptions
|
2007-01-01 21:08:12 +00:00
|
|
|
{ writerStandalone :: Bool -- ^ Include header and footer
|
2006-12-20 06:50:14 +00:00
|
|
|
, writerTitlePrefix :: String -- ^ Prefix for HTML titles
|
|
|
|
, writerHeader :: String -- ^ Header for the document
|
|
|
|
, writerIncludeBefore :: String -- ^ String to include before the body
|
|
|
|
, writerIncludeAfter :: String -- ^ String to include after the body
|
2006-12-30 22:51:49 +00:00
|
|
|
, writerS5 :: Bool -- ^ We're writing S5
|
|
|
|
, writerIncremental :: Bool -- ^ Incremental S5 lists
|
|
|
|
, writerNumberSections :: Bool -- ^ Number sections in LaTeX
|
|
|
|
, writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
|
2006-12-20 06:50:14 +00:00
|
|
|
, writerTabStop :: Int -- ^ Tabstop for conversion between
|
|
|
|
-- spaces and tabs
|
2007-01-01 21:08:12 +00:00
|
|
|
, writerNotes :: [Block] -- ^ List of note blocks
|
2006-12-20 06:50:14 +00:00
|
|
|
} deriving Show
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- Functions for constructing lists of reference keys
|
|
|
|
--
|
|
|
|
|
|
|
|
-- | Returns @Just@ numerical key reference if there's already a key
|
|
|
|
-- for the specified target in the list of blocks, otherwise @Nothing@.
|
|
|
|
keyFoundIn :: [Block] -- ^ List of key blocks to search
|
|
|
|
-> Target -- ^ Target to search for
|
|
|
|
-> Maybe String
|
|
|
|
keyFoundIn [] src = Nothing
|
2006-12-20 06:50:14 +00:00
|
|
|
keyFoundIn ((Key [Str num] src1):rest) src = if (src1 == src)
|
|
|
|
then Just num
|
|
|
|
else keyFoundIn rest src
|
2006-10-17 14:22:29 +00:00
|
|
|
keyFoundIn (_:rest) src = keyFoundIn rest src
|
|
|
|
|
|
|
|
-- | Return next unique numerical key, given keyList
|
|
|
|
nextUniqueKey :: [[Inline]] -> String
|
|
|
|
nextUniqueKey keys =
|
|
|
|
let nums = [1..10000]
|
|
|
|
notAKey n = not (any (== [Str (show n)]) keys) in
|
|
|
|
case (find notAKey nums) of
|
2006-12-20 06:50:14 +00:00
|
|
|
Just x -> show x
|
2006-10-17 14:22:29 +00:00
|
|
|
Nothing -> error "Could not find unique key for reference link"
|
|
|
|
|
|
|
|
-- | Generate a reference for a URL (either an existing reference, if
|
|
|
|
-- there is one, or a new one, if there isn't) and update parser state.
|
|
|
|
generateReference :: String -- ^ URL
|
|
|
|
-> String -- ^ Title
|
|
|
|
-> GenParser tok ParserState Target
|
|
|
|
generateReference url title = do
|
|
|
|
let src = Src (decodeEntities url) (decodeEntities title)
|
|
|
|
state <- getState
|
|
|
|
let keyBlocks = stateKeyBlocks state
|
|
|
|
let keysUsed = stateKeysUsed state
|
|
|
|
case (keyFoundIn keyBlocks src) of
|
|
|
|
Just num -> return (Ref [Str num])
|
|
|
|
Nothing -> do
|
|
|
|
let nextNum = nextUniqueKey keysUsed
|
2006-12-20 06:50:14 +00:00
|
|
|
updateState (\st -> st { stateKeyBlocks =
|
|
|
|
(Key [Str nextNum] src):keyBlocks,
|
|
|
|
stateKeysUsed =
|
|
|
|
[Str nextNum]:keysUsed })
|
2006-10-17 14:22:29 +00:00
|
|
|
return (Ref [Str nextNum])
|
|
|
|
|
|
|
|
--
|
|
|
|
-- code to replace reference links with real links and remove unneeded key blocks
|
|
|
|
--
|
|
|
|
|
|
|
|
type KeyTable = [([Inline], Target)]
|
|
|
|
|
|
|
|
-- | Returns @True@ if block is a Key block
|
|
|
|
isRefBlock :: Block -> Bool
|
|
|
|
isRefBlock (Key _ _) = True
|
|
|
|
isRefBlock _ = False
|
|
|
|
|
|
|
|
-- | Returns a pair of a list of pairs of keys and associated sources, and a new
|
|
|
|
-- list of blocks with the included key blocks deleted.
|
|
|
|
keyTable :: [Block] -> (KeyTable, [Block])
|
|
|
|
keyTable [] = ([],[])
|
|
|
|
keyTable ((Key ref target):lst) = (((ref, target):table), rest)
|
|
|
|
where (table, rest) = keyTable lst
|
|
|
|
keyTable (Null:lst) = keyTable lst -- get rid of Nulls
|
|
|
|
keyTable (Blank:lst) = keyTable lst -- get rid of Blanks
|
2006-12-20 06:50:14 +00:00
|
|
|
keyTable ((BlockQuote blocks):lst) = ((table1 ++ table2),
|
|
|
|
((BlockQuote rest1):rest2))
|
2006-10-17 14:22:29 +00:00
|
|
|
where (table1, rest1) = keyTable blocks
|
|
|
|
(table2, rest2) = keyTable lst
|
2006-12-20 06:50:14 +00:00
|
|
|
keyTable ((Note ref blocks):lst) = ((table1 ++ table2),
|
|
|
|
((Note ref rest1):rest2))
|
2006-10-17 14:22:29 +00:00
|
|
|
where (table1, rest1) = keyTable blocks
|
|
|
|
(table2, rest2) = keyTable lst
|
2006-12-20 06:50:14 +00:00
|
|
|
keyTable ((OrderedList blockLists):lst) = ((table1 ++ table2),
|
|
|
|
((OrderedList rest1):rest2))
|
|
|
|
where results = map keyTable blockLists
|
|
|
|
rest1 = map snd results
|
|
|
|
table1 = concatMap fst results
|
2006-10-17 14:22:29 +00:00
|
|
|
(table2, rest2) = keyTable lst
|
2006-12-20 06:50:14 +00:00
|
|
|
keyTable ((BulletList blockLists):lst) = ((table1 ++ table2),
|
|
|
|
((BulletList rest1):rest2))
|
|
|
|
where results = map keyTable blockLists
|
|
|
|
rest1 = map snd results
|
|
|
|
table1 = concatMap fst results
|
2006-10-17 14:22:29 +00:00
|
|
|
(table2, rest2) = keyTable lst
|
|
|
|
keyTable (other:lst) = (table, (other:rest))
|
|
|
|
where (table, rest) = keyTable lst
|
|
|
|
|
|
|
|
-- | Look up key in key table and return target object.
|
|
|
|
lookupKeySrc :: KeyTable -- ^ Key table
|
|
|
|
-> [Inline] -- ^ Key
|
|
|
|
-> Maybe Target
|
|
|
|
lookupKeySrc table key = case table of
|
2006-12-20 06:50:14 +00:00
|
|
|
[] -> Nothing
|
|
|
|
(k, src):rest -> if (refsMatch k key)
|
|
|
|
then Just src
|
|
|
|
else lookupKeySrc rest key
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Returns @True@ if keys match (case insensitive).
|
|
|
|
refsMatch :: [Inline] -> [Inline] -> Bool
|
2006-12-20 06:50:14 +00:00
|
|
|
refsMatch ((Str x):restx) ((Str y):resty) =
|
|
|
|
((map toLower x) == (map toLower y)) && refsMatch restx resty
|
|
|
|
refsMatch ((Code x):restx) ((Code y):resty) =
|
|
|
|
((map toLower x) == (map toLower y)) && refsMatch restx resty
|
|
|
|
refsMatch ((TeX x):restx) ((TeX y):resty) =
|
|
|
|
((map toLower x) == (map toLower y)) && refsMatch restx resty
|
|
|
|
refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
|
|
|
|
((map toLower x) == (map toLower y)) && refsMatch restx resty
|
|
|
|
refsMatch ((NoteRef x):restx) ((NoteRef y):resty) =
|
|
|
|
((map toLower x) == (map toLower y)) && refsMatch restx resty
|
|
|
|
refsMatch ((Emph x):restx) ((Emph y):resty) =
|
|
|
|
refsMatch x y && refsMatch restx resty
|
|
|
|
refsMatch ((Strong x):restx) ((Strong y):resty) =
|
|
|
|
refsMatch x y && refsMatch restx resty
|
2007-01-06 09:54:58 +00:00
|
|
|
refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
|
|
|
|
t == u && refsMatch x y && refsMatch restx resty
|
2006-10-17 14:22:29 +00:00
|
|
|
refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
|
|
|
|
refsMatch [] x = null x
|
|
|
|
refsMatch x [] = null x
|
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
-- | Replace reference links with explicit links in list of blocks,
|
|
|
|
-- removing key blocks.
|
2006-10-17 14:22:29 +00:00
|
|
|
replaceReferenceLinks :: [Block] -> [Block]
|
|
|
|
replaceReferenceLinks blocks =
|
|
|
|
let (keytable, purged) = keyTable blocks in
|
|
|
|
replaceRefLinksBlockList keytable purged
|
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
-- | Use key table to replace reference links with explicit links in a list
|
|
|
|
-- of blocks
|
2006-10-17 14:22:29 +00:00
|
|
|
replaceRefLinksBlockList :: KeyTable -> [Block] -> [Block]
|
2006-12-20 06:50:14 +00:00
|
|
|
replaceRefLinksBlockList keytable lst =
|
|
|
|
map (replaceRefLinksBlock keytable) lst
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Use key table to replace reference links with explicit links in a block
|
|
|
|
replaceRefLinksBlock :: KeyTable -> Block -> Block
|
2006-12-20 06:50:14 +00:00
|
|
|
replaceRefLinksBlock keytable (Plain lst) =
|
|
|
|
Plain (map (replaceRefLinksInline keytable) lst)
|
|
|
|
replaceRefLinksBlock keytable (Para lst) =
|
|
|
|
Para (map (replaceRefLinksInline keytable) lst)
|
|
|
|
replaceRefLinksBlock keytable (Header lvl lst) =
|
|
|
|
Header lvl (map (replaceRefLinksInline keytable) lst)
|
|
|
|
replaceRefLinksBlock keytable (BlockQuote lst) =
|
|
|
|
BlockQuote (map (replaceRefLinksBlock keytable) lst)
|
|
|
|
replaceRefLinksBlock keytable (Note ref lst) =
|
|
|
|
Note ref (map (replaceRefLinksBlock keytable) lst)
|
|
|
|
replaceRefLinksBlock keytable (OrderedList lst) =
|
|
|
|
OrderedList (map (replaceRefLinksBlockList keytable) lst)
|
|
|
|
replaceRefLinksBlock keytable (BulletList lst) =
|
|
|
|
BulletList (map (replaceRefLinksBlockList keytable) lst)
|
2006-10-17 14:22:29 +00:00
|
|
|
replaceRefLinksBlock keytable other = other
|
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
-- | Use key table to replace reference links with explicit links in an
|
|
|
|
-- inline element.
|
2006-10-17 14:22:29 +00:00
|
|
|
replaceRefLinksInline :: KeyTable -> Inline -> Inline
|
|
|
|
replaceRefLinksInline keytable (Link text (Ref ref)) = (Link newText newRef)
|
2006-12-20 06:50:14 +00:00
|
|
|
where newRef = case lookupKeySrc keytable
|
|
|
|
(if (null ref) then text else ref) of
|
2006-10-17 14:22:29 +00:00
|
|
|
Nothing -> (Ref ref)
|
|
|
|
Just src -> src
|
|
|
|
newText = map (replaceRefLinksInline keytable) text
|
|
|
|
replaceRefLinksInline keytable (Image text (Ref ref)) = (Image newText newRef)
|
2006-12-20 06:50:14 +00:00
|
|
|
where newRef = case lookupKeySrc keytable
|
|
|
|
(if (null ref) then text else ref) of
|
2006-10-17 14:22:29 +00:00
|
|
|
Nothing -> (Ref ref)
|
|
|
|
Just src -> src
|
|
|
|
newText = map (replaceRefLinksInline keytable) text
|
2006-12-20 06:50:14 +00:00
|
|
|
replaceRefLinksInline keytable (Emph lst) =
|
|
|
|
Emph (map (replaceRefLinksInline keytable) lst)
|
|
|
|
replaceRefLinksInline keytable (Strong lst) =
|
|
|
|
Strong (map (replaceRefLinksInline keytable) lst)
|
2007-01-06 09:54:58 +00:00
|
|
|
replaceRefLinksInline keytable (Quoted t lst) =
|
|
|
|
Quoted t (map (replaceRefLinksInline keytable) lst)
|
2006-10-17 14:22:29 +00:00
|
|
|
replaceRefLinksInline keytable other = other
|
2007-01-04 22:52:16 +00:00
|
|
|
|
|
|
|
-- | Return a text object with a string of formatted SGML attributes.
|
2007-01-06 09:54:58 +00:00
|
|
|
attributeList :: [(String, String)] -> Doc
|
|
|
|
attributeList = text . concatMap
|
2007-01-27 22:13:11 +00:00
|
|
|
(\(a, b) -> " " ++ stringToSGML a ++ "=\"" ++
|
|
|
|
stringToSGML b ++ "\"")
|
2007-01-04 22:52:16 +00:00
|
|
|
|
|
|
|
-- | Put the supplied contents between start and end tags of tagType,
|
|
|
|
-- with specified attributes and (if specified) indentation.
|
2007-01-06 09:54:58 +00:00
|
|
|
inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
|
|
|
|
inTags isIndented tagType attribs contents =
|
|
|
|
let openTag = PP.char '<' <> text tagType <> attributeList attribs <>
|
2007-01-04 22:52:16 +00:00
|
|
|
PP.char '>'
|
|
|
|
closeTag = text "</" <> text tagType <> PP.char '>' in
|
|
|
|
if isIndented
|
|
|
|
then openTag $$ nest 2 contents $$ closeTag
|
|
|
|
else openTag <> contents <> closeTag
|
|
|
|
|
|
|
|
-- | Return a self-closing tag of tagType with specified attributes
|
2007-01-06 09:54:58 +00:00
|
|
|
selfClosingTag :: String -> [(String, String)] -> Doc
|
|
|
|
selfClosingTag tagType attribs =
|
|
|
|
PP.char '<' <> text tagType <> attributeList attribs <> text " />"
|
2007-01-04 22:52:16 +00:00
|
|
|
|
|
|
|
-- | Put the supplied contents between start and end tags of tagType.
|
2007-01-06 09:54:58 +00:00
|
|
|
inTagsSimple :: String -> Doc -> Doc
|
|
|
|
inTagsSimple tagType = inTags False tagType []
|
2007-01-04 22:52:16 +00:00
|
|
|
|
|
|
|
-- | Put the supplied contents in indented block btw start and end tags.
|
2007-01-06 09:54:58 +00:00
|
|
|
inTagsIndented :: String -> Doc -> Doc
|
|
|
|
inTagsIndented tagType = inTags True tagType []
|
2007-01-04 22:52:16 +00:00
|
|
|
|