Reverted changes in r1086 (implicit section header references).
This caused too much of a performance hit. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1093 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
66efec1670
commit
7deee9c874
4 changed files with 126 additions and 169 deletions
26
README
26
README
|
@ -299,7 +299,7 @@ For further documentation, see the `pandoc(1)` man page.
|
|||
: includes *string* as a prefix at the beginning of the title that
|
||||
appears in the HTML header (but not in the title as it appears at
|
||||
the beginning of the HTML body). (See below on
|
||||
[Title Blocks].)
|
||||
[Title Blocks](#title-blocks).)
|
||||
|
||||
`-S` or `--smart`
|
||||
: causes `pandoc` to produce typographically correct output, along the
|
||||
|
@ -797,28 +797,8 @@ another. A link to this section, for example, might look like this:
|
|||
|
||||
See the section on [header identifiers](#header-identifiers-in-html).
|
||||
|
||||
Note that this method of providing links to sections works only in
|
||||
HTML.
|
||||
|
||||
Shortcut links to section headers
|
||||
---------------------------------
|
||||
|
||||
You may use the following shortcut to link to a section header:
|
||||
|
||||
See the section on [Header identifiers in HTML].
|
||||
|
||||
Pandoc will behave as if the document contains
|
||||
|
||||
[Header identifiers in HTML]: #header-identifiers-in-html
|
||||
|
||||
If there are multiple headers labeled "Header identifiers in HTML",
|
||||
this will link to the first one. Note that implicit references are always
|
||||
overridden by explicit ones, so the link in the following text
|
||||
will be to `/url`, not `#header-identifiers-in-html`:
|
||||
|
||||
See the section on [Header identifiers in HTML].
|
||||
|
||||
[Header identifiers in HTML]: /url
|
||||
Note, however, that this method of providing links to sections works
|
||||
only in HTML.
|
||||
|
||||
Box-style blockquotes
|
||||
---------------------
|
||||
|
|
|
@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
{- |
|
||||
Module : Text.Pandoc.Readers.Markdown
|
||||
Copyright : Copyright (C) 2006-7 John MacFarlane
|
||||
License : GNU GPL, version 2 or above
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||
Stability : alpha
|
||||
|
@ -27,8 +27,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
|
||||
Conversion of markdown-formatted plain text to 'Pandoc' document.
|
||||
-}
|
||||
module Text.Pandoc.Readers.Markdown (
|
||||
readMarkdown
|
||||
module Text.Pandoc.Readers.Markdown (
|
||||
readMarkdown
|
||||
) where
|
||||
|
||||
import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex )
|
||||
|
@ -37,9 +37,9 @@ import Data.Char ( isAlphaNum )
|
|||
import Data.Maybe ( fromMaybe )
|
||||
import Network.URI ( isURI )
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
|
||||
import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
|
||||
import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
|
||||
anyHtmlInlineTag, anyHtmlTag,
|
||||
anyHtmlEndTag, htmlEndTag, extractTagType,
|
||||
htmlBlockElement )
|
||||
|
@ -69,14 +69,14 @@ specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221"
|
|||
indentSpaces = try $ do
|
||||
state <- getState
|
||||
let tabStop = stateTabStop state
|
||||
try (count tabStop (char ' ')) <|>
|
||||
try (count tabStop (char ' ')) <|>
|
||||
(many (char ' ') >> string "\t") <?> "indentation"
|
||||
|
||||
nonindentSpaces = do
|
||||
state <- getState
|
||||
let tabStop = stateTabStop state
|
||||
sps <- many (char ' ')
|
||||
if length sps < tabStop
|
||||
if length sps < tabStop
|
||||
then return sps
|
||||
else unexpected "indented line"
|
||||
|
||||
|
@ -91,8 +91,8 @@ failUnlessSmart = do
|
|||
if stateSmart state then return () else fail "Smart typography feature"
|
||||
|
||||
-- | Parse an inline Str element with a given content.
|
||||
inlineString str = try $ do
|
||||
(Str res) <- inline
|
||||
inlineString str = try $ do
|
||||
(Str res) <- inline
|
||||
if res == str then return res else fail $ "unexpected Str content"
|
||||
|
||||
-- | Parse a sequence of inline elements between a string
|
||||
|
@ -103,9 +103,9 @@ inlinesInBalanced opener closer = try $ do
|
|||
string opener
|
||||
result <- manyTill ( (do lookAhead (inlineString opener)
|
||||
-- because it might be a link...
|
||||
bal <- inlinesInBalanced opener closer
|
||||
bal <- inlinesInBalanced opener closer
|
||||
return $ [Str opener] ++ bal ++ [Str closer])
|
||||
<|> (count 1 inline))
|
||||
<|> (count 1 inline))
|
||||
(try (string closer))
|
||||
return $ concat result
|
||||
|
||||
|
@ -115,7 +115,7 @@ inlinesInBalanced opener closer = try $ do
|
|||
|
||||
titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
|
||||
|
||||
authorsLine = try $ do
|
||||
authorsLine = try $ do
|
||||
char '%'
|
||||
skipSpaces
|
||||
authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
|
||||
|
@ -143,24 +143,15 @@ parseMarkdown = do
|
|||
startPos <- getPosition
|
||||
-- go through once just to get list of reference keys
|
||||
-- docMinusKeys is the raw document with blanks where the keys were...
|
||||
docMinusKeys <- manyTill (referenceKey <|> headerReference <|>
|
||||
lineClump) eof >>= return . concat
|
||||
docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>=
|
||||
return . concat
|
||||
setInput docMinusKeys
|
||||
setPosition startPos
|
||||
st <- getState
|
||||
-- get headers and construct implicit references unless strict
|
||||
if stateStrict st
|
||||
then return ()
|
||||
else do let oldkeys = stateKeys st
|
||||
let headers = reverse $ stateHeaders st
|
||||
let idents = uniqueIdentifiers headers
|
||||
let implicitRefs = zipWith (\hd ident -> (hd, ("#" ++ ident, "")))
|
||||
headers idents
|
||||
updateState $ \st -> st { stateKeys = oldkeys ++ implicitRefs }
|
||||
-- go through again for notes unless strict...
|
||||
if stateStrict st
|
||||
then return ()
|
||||
else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=
|
||||
else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=
|
||||
return . concat
|
||||
st <- getState
|
||||
let reversedNotes = stateNotes st
|
||||
|
@ -169,10 +160,10 @@ parseMarkdown = do
|
|||
setPosition startPos
|
||||
-- now parse it for real...
|
||||
(title, author, date) <- option ([],[],"") titleBlock
|
||||
blocks <- parseBlocks
|
||||
blocks <- parseBlocks
|
||||
return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
|
||||
|
||||
--
|
||||
--
|
||||
-- initial pass for references and notes
|
||||
--
|
||||
|
||||
|
@ -195,17 +186,7 @@ referenceKey = try $ do
|
|||
-- return blanks so line count isn't affected
|
||||
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
|
||||
|
||||
headerReference = try $ do
|
||||
failIfStrict
|
||||
startPos <- getPosition
|
||||
(Header level text) <- lookAhead $ atxHeader <|> setextHeader
|
||||
st <- getState
|
||||
let headers = stateHeaders st
|
||||
updateState $ \st -> st { stateHeaders = text:headers }
|
||||
endPos <- getPosition
|
||||
lineClump -- return the raw header, because we need to parse it later
|
||||
|
||||
referenceTitle = try $ do
|
||||
referenceTitle = try $ do
|
||||
(many1 spaceChar >> option '\n' newline) <|> newline
|
||||
skipSpaces
|
||||
tit <- (charsInBalanced '(' ')' >>= return . unwords . words)
|
||||
|
@ -220,7 +201,7 @@ rawLine = do
|
|||
notFollowedBy blankline
|
||||
notFollowedBy' noteMarker
|
||||
contents <- many1 nonEndline
|
||||
end <- option "" (newline >> optional indentSpaces >> return "\n")
|
||||
end <- option "" (newline >> optional indentSpaces >> return "\n")
|
||||
return $ contents ++ end
|
||||
|
||||
rawLines = many1 rawLine >>= return . concat
|
||||
|
@ -249,7 +230,7 @@ noteBlock = try $ do
|
|||
|
||||
parseBlocks = manyTill block eof
|
||||
|
||||
block = choice [ header
|
||||
block = choice [ header
|
||||
, table
|
||||
, codeBlock
|
||||
, hrule
|
||||
|
@ -304,7 +285,7 @@ hrule = try $ do
|
|||
indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
|
||||
|
||||
codeBlock = do
|
||||
contents <- many1 (indentedLine <|>
|
||||
contents <- many1 (indentedLine <|>
|
||||
try (do b <- blanklines
|
||||
l <- indentedLine
|
||||
return $ b ++ l))
|
||||
|
@ -319,7 +300,7 @@ emacsBoxQuote = try $ do
|
|||
failIfStrict
|
||||
string ",----"
|
||||
manyTill anyChar newline
|
||||
raw <- manyTill
|
||||
raw <- manyTill
|
||||
(try (char '|' >> optional (char ' ') >> manyTill anyChar newline))
|
||||
(try (string "`----"))
|
||||
blanklines
|
||||
|
@ -329,7 +310,7 @@ emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
|
|||
|
||||
emailBlockQuote = try $ do
|
||||
emailBlockQuoteStart
|
||||
raw <- sepBy (many (nonEndline <|>
|
||||
raw <- sepBy (many (nonEndline <|>
|
||||
(try (endline >> notFollowedBy emailBlockQuoteStart >>
|
||||
return '\n'))))
|
||||
(try (newline >> emailBlockQuoteStart))
|
||||
|
@ -337,12 +318,12 @@ emailBlockQuote = try $ do
|
|||
optional blanklines
|
||||
return raw
|
||||
|
||||
blockQuote = do
|
||||
blockQuote = do
|
||||
raw <- emailBlockQuote <|> emacsBoxQuote
|
||||
-- parse the extracted block, which may contain various block elements:
|
||||
contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
|
||||
return $ BlockQuote contents
|
||||
|
||||
|
||||
--
|
||||
-- list blocks
|
||||
--
|
||||
|
@ -377,7 +358,7 @@ orderedListStart style delim = try $ do
|
|||
then do many1 digit
|
||||
char '.'
|
||||
return 1
|
||||
else orderedListMarker style delim
|
||||
else orderedListMarker style delim
|
||||
if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
|
||||
num `elem` [1, 5, 10, 50, 100, 500, 1000]))
|
||||
then char '\t' <|> (spaceChar >> spaceChar)
|
||||
|
@ -401,7 +382,7 @@ rawListItem start = try $ do
|
|||
blanks <- many blankline
|
||||
return $ concat result ++ blanks
|
||||
|
||||
-- continuation of a list item - indented and separated by blankline
|
||||
-- continuation of a list item - indented and separated by blankline
|
||||
-- or (in compact lists) endline.
|
||||
-- note: nested lists are parsed as continuations
|
||||
listContinuation start = try $ do
|
||||
|
@ -417,7 +398,7 @@ listContinuationLine start = try $ do
|
|||
result <- manyTill anyChar newline
|
||||
return $ result ++ "\n"
|
||||
|
||||
listItem start = try $ do
|
||||
listItem start = try $ do
|
||||
first <- rawListItem start
|
||||
continuations <- many (listContinuation start)
|
||||
-- parsing with ListItemState forces markers at beginning of lines to
|
||||
|
@ -437,7 +418,7 @@ orderedList = try $ do
|
|||
items <- many1 (listItem (orderedListStart style delim))
|
||||
return $ OrderedList (start, style, delim) $ compactify items
|
||||
|
||||
bulletList = many1 (listItem bulletListStart) >>=
|
||||
bulletList = many1 (listItem bulletListStart) >>=
|
||||
return . BulletList . compactify
|
||||
|
||||
-- definition lists
|
||||
|
@ -478,7 +459,7 @@ definitionList = do
|
|||
-- paragraph block
|
||||
--
|
||||
|
||||
para = try $ do
|
||||
para = try $ do
|
||||
result <- many1 inline
|
||||
newline
|
||||
blanklines <|> do st <- getState
|
||||
|
@ -487,9 +468,9 @@ para = try $ do
|
|||
else lookAhead emacsBoxQuote >> return ""
|
||||
return $ Para $ normalizeSpaces result
|
||||
|
||||
plain = many1 inline >>= return . Plain . normalizeSpaces
|
||||
plain = many1 inline >>= return . Plain . normalizeSpaces
|
||||
|
||||
--
|
||||
--
|
||||
-- raw html
|
||||
--
|
||||
|
||||
|
@ -506,25 +487,25 @@ htmlBlock = do
|
|||
else rawHtmlBlocks
|
||||
|
||||
-- True if tag is self-closing
|
||||
isSelfClosing tag =
|
||||
isSelfClosing tag =
|
||||
isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
|
||||
|
||||
strictHtmlBlock = try $ do
|
||||
tag <- anyHtmlBlockTag
|
||||
tag <- anyHtmlBlockTag
|
||||
let tag' = extractTagType tag
|
||||
if isSelfClosing tag || tag' == "hr"
|
||||
if isSelfClosing tag || tag' == "hr"
|
||||
then return tag
|
||||
else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
|
||||
else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
|
||||
(htmlElement <|> (count 1 anyChar)))
|
||||
end <- htmlEndTag tag'
|
||||
return $ tag ++ concat contents ++ end
|
||||
|
||||
rawHtmlBlocks = do
|
||||
htmlBlocks <- many1 rawHtmlBlock
|
||||
htmlBlocks <- many1 rawHtmlBlock
|
||||
let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
|
||||
let combined' = if not (null combined) && last combined == '\n'
|
||||
then init combined -- strip extra newline
|
||||
else combined
|
||||
then init combined -- strip extra newline
|
||||
else combined
|
||||
return $ RawHtml combined'
|
||||
|
||||
--
|
||||
|
@ -535,7 +516,7 @@ rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment
|
|||
|
||||
--
|
||||
-- Tables
|
||||
--
|
||||
--
|
||||
|
||||
-- Parse a dashed line with optional trailing spaces; return its length
|
||||
-- and the length including trailing space.
|
||||
|
@ -544,7 +525,7 @@ dashedLine ch = do
|
|||
sp <- many spaceChar
|
||||
return $ (length dashes, length $ dashes ++ sp)
|
||||
|
||||
-- Parse a table header with dashed lines of '-' preceded by
|
||||
-- Parse a table header with dashed lines of '-' preceded by
|
||||
-- one line of text.
|
||||
simpleTableHeader = try $ do
|
||||
rawContent <- anyLine
|
||||
|
@ -567,7 +548,7 @@ tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
|
|||
rawTableLine indices = do
|
||||
notFollowedBy' (blanklines <|> tableFooter)
|
||||
line <- many1Till anyChar newline
|
||||
return $ map removeLeadingTrailingSpace $ tail $
|
||||
return $ map removeLeadingTrailingSpace $ tail $
|
||||
splitByIndices (init indices) line
|
||||
|
||||
-- Parse a table line and return a list of lists of blocks (columns).
|
||||
|
@ -584,8 +565,8 @@ multilineRow indices = do
|
|||
widthsFromIndices :: Int -- Number of columns on terminal
|
||||
-> [Int] -- Indices
|
||||
-> [Float] -- Fractional relative sizes of columns
|
||||
widthsFromIndices _ [] = []
|
||||
widthsFromIndices numColumns indices =
|
||||
widthsFromIndices _ [] = []
|
||||
widthsFromIndices numColumns indices =
|
||||
let lengths = zipWith (-) indices (0:indices)
|
||||
totLength = sum lengths
|
||||
quotient = if totLength > numColumns
|
||||
|
@ -624,14 +605,14 @@ simpleTable = tableWith simpleTableHeader tableLine blanklines
|
|||
multilineTable = tableWith multilineTableHeader multilineRow tableFooter
|
||||
|
||||
multilineTableHeader = try $ do
|
||||
tableSep
|
||||
tableSep
|
||||
rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
|
||||
initSp <- nonindentSpaces
|
||||
dashes <- many1 (dashedLine '-')
|
||||
newline
|
||||
let (lengths, lines) = unzip dashes
|
||||
let indices = scanl (+) (length initSp) lines
|
||||
let rawHeadsList = transpose $ map
|
||||
let rawHeadsList = transpose $ map
|
||||
(\ln -> tail $ splitByIndices (init indices) ln)
|
||||
rawContent
|
||||
let rawHeads = map (joinWithSep " ") rawHeadsList
|
||||
|
@ -644,7 +625,7 @@ multilineTableHeader = try $ do
|
|||
alignType :: [String] -> Int -> Alignment
|
||||
alignType [] len = AlignDefault
|
||||
alignType strLst len =
|
||||
let str = head $ sortBy (comparing length) $
|
||||
let str = head $ sortBy (comparing length) $
|
||||
map removeTrailingSpace strLst
|
||||
leftSpace = if null str then False else (str !! 0) `elem` " \t"
|
||||
rightSpace = length str < len || (str !! (len - 1)) `elem` " \t"
|
||||
|
@ -656,7 +637,7 @@ alignType strLst len =
|
|||
|
||||
table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table"
|
||||
|
||||
--
|
||||
--
|
||||
-- inline
|
||||
--
|
||||
|
||||
|
@ -686,7 +667,7 @@ inline = choice [ str
|
|||
escapedChar = do
|
||||
char '\\'
|
||||
state <- getState
|
||||
result <- option '\\' $ if stateStrict state
|
||||
result <- option '\\' $ if stateStrict state
|
||||
then oneOf "\\`*_{}[]()>#+-.!~"
|
||||
else satisfy (not . isAlphaNum)
|
||||
return $ Str [result]
|
||||
|
@ -700,17 +681,17 @@ ltSign = do
|
|||
|
||||
specialCharsMinusLt = filter (/= '<') specialChars
|
||||
|
||||
symbol = do
|
||||
symbol = do
|
||||
result <- oneOf specialCharsMinusLt
|
||||
return $ Str [result]
|
||||
|
||||
-- parses inline code, between n `s and n `s
|
||||
code = try $ do
|
||||
code = try $ do
|
||||
starts <- many1 (char '`')
|
||||
skipSpaces
|
||||
result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
|
||||
(char '\n' >> return " "))
|
||||
(try (skipSpaces >> count (length starts) (char '`') >>
|
||||
(char '\n' >> return " "))
|
||||
(try (skipSpaces >> count (length starts) (char '`') >>
|
||||
notFollowedBy (char '`')))
|
||||
return $ Code $ removeLeadingTrailingSpace $ concat result
|
||||
|
||||
|
@ -726,30 +707,30 @@ math = try $ do
|
|||
return $ TeX ("$" ++ (joinWithSep " " words) ++ "$")
|
||||
|
||||
emph = ((enclosed (char '*') (char '*') inline) <|>
|
||||
(enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>=
|
||||
(enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>=
|
||||
return . Emph . normalizeSpaces
|
||||
|
||||
strong = ((enclosed (string "**") (try $ string "**") inline) <|>
|
||||
strong = ((enclosed (string "**") (try $ string "**") inline) <|>
|
||||
(enclosed (string "__") (try $ string "__") inline)) >>=
|
||||
return . Strong . normalizeSpaces
|
||||
|
||||
strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
|
||||
return . Strikeout . normalizeSpaces
|
||||
|
||||
superscript = failIfStrict >> enclosed (char '^') (char '^')
|
||||
superscript = failIfStrict >> enclosed (char '^') (char '^')
|
||||
(notFollowedBy' whitespace >> inline) >>= -- may not contain Space
|
||||
return . Superscript
|
||||
|
||||
subscript = failIfStrict >> enclosed (char '~') (char '~')
|
||||
(notFollowedBy' whitespace >> inline) >>= -- may not contain Space
|
||||
return . Subscript
|
||||
return . Subscript
|
||||
|
||||
smartPunctuation = failUnlessSmart >>
|
||||
smartPunctuation = failUnlessSmart >>
|
||||
choice [ quoted, apostrophe, dash, ellipses ]
|
||||
|
||||
apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
|
||||
|
||||
quoted = doubleQuoted <|> singleQuoted
|
||||
quoted = doubleQuoted <|> singleQuoted
|
||||
|
||||
withQuoteContext context parser = do
|
||||
oldState <- getState
|
||||
|
@ -765,7 +746,7 @@ singleQuoted = try $ do
|
|||
withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
|
||||
return . Quoted SingleQuote . normalizeSpaces
|
||||
|
||||
doubleQuoted = try $ do
|
||||
doubleQuoted = try $ do
|
||||
doubleQuoteStart
|
||||
withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
|
||||
return . Quoted DoubleQuote . normalizeSpaces
|
||||
|
@ -776,13 +757,13 @@ failIfInQuoteContext context = do
|
|||
then fail "already inside quotes"
|
||||
else return ()
|
||||
|
||||
singleQuoteStart = do
|
||||
singleQuoteStart = do
|
||||
failIfInQuoteContext InSingleQuote
|
||||
char '\8216' <|>
|
||||
(try $ do char '\''
|
||||
char '\8216' <|>
|
||||
(try $ do char '\''
|
||||
notFollowedBy (oneOf ")!],.;:-? \t\n")
|
||||
notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
|
||||
satisfy (not . isAlphaNum)))
|
||||
satisfy (not . isAlphaNum)))
|
||||
-- possess/contraction
|
||||
return '\'')
|
||||
|
||||
|
@ -826,13 +807,13 @@ endline = try $ do
|
|||
newline
|
||||
notFollowedBy blankline
|
||||
st <- getState
|
||||
if stateStrict st
|
||||
if stateStrict st
|
||||
then do notFollowedBy emailBlockQuoteStart
|
||||
notFollowedBy (char '#') -- atx header
|
||||
else return ()
|
||||
else return ()
|
||||
-- parse potential list-starts differently if in a list:
|
||||
if stateParserContext st == ListItemState
|
||||
then notFollowedBy' (bulletListStart <|>
|
||||
then notFollowedBy' (bulletListStart <|>
|
||||
(anyOrderedListStart >> return ()))
|
||||
else return ()
|
||||
return Space
|
||||
|
@ -846,7 +827,7 @@ reference = notFollowedBy' (string "[^") >> -- footnote reference
|
|||
inlinesInBalanced "[" "]" >>= (return . normalizeSpaces)
|
||||
|
||||
-- source for a link, with optional title
|
||||
source = try $ do
|
||||
source = try $ do
|
||||
char '('
|
||||
optional (char '<')
|
||||
src <- many (noneOf ")> \t\n")
|
||||
|
@ -856,7 +837,7 @@ source = try $ do
|
|||
char ')'
|
||||
return (removeTrailingSpace src, tit)
|
||||
|
||||
linkTitle = try $ do
|
||||
linkTitle = try $ do
|
||||
(many1 spaceChar >> option '\n' newline) <|> newline
|
||||
skipSpaces
|
||||
delim <- char '\'' <|> char '"'
|
||||
|
@ -871,13 +852,13 @@ link = try $ do
|
|||
|
||||
-- a link like [this][ref] or [this][] or [this]
|
||||
referenceLink label = do
|
||||
ref <- option [] (try (optional (char ' ') >>
|
||||
ref <- option [] (try (optional (char ' ') >>
|
||||
optional (newline >> skipSpaces) >> reference))
|
||||
let ref' = if null ref then label else ref
|
||||
state <- getState
|
||||
case lookupKeySrc (stateKeys state) ref' of
|
||||
Nothing -> fail "no corresponding key"
|
||||
Just target -> return target
|
||||
Nothing -> fail "no corresponding key"
|
||||
Just target -> return target
|
||||
|
||||
emailAddress = try $ do
|
||||
name <- many1 (alphaNum <|> char '+')
|
||||
|
@ -898,7 +879,7 @@ autoLink = try $ do
|
|||
char '>'
|
||||
let src' = if "mailto:" `isPrefixOf` src
|
||||
then drop 7 src
|
||||
else src
|
||||
else src
|
||||
st <- getState
|
||||
return $ if stateStrict st
|
||||
then Link [Str src'] (src, "")
|
||||
|
@ -929,7 +910,7 @@ rawLaTeXInline' = failIfStrict >> rawLaTeXInline
|
|||
rawHtmlInline' = do
|
||||
st <- getState
|
||||
result <- choice $ if stateStrict st
|
||||
then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
|
||||
then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
|
||||
else [htmlBlockElement, anyHtmlInlineTag]
|
||||
return $ HtmlInline result
|
||||
|
||||
|
|
|
@ -34,7 +34,6 @@ module Text.Pandoc.Shared (
|
|||
substitute,
|
||||
joinWithSep,
|
||||
-- * Text processing
|
||||
isPunctuation,
|
||||
backslashEscapes,
|
||||
escapeStringUsing,
|
||||
stripTrailingNewlines,
|
||||
|
@ -92,7 +91,6 @@ module Text.Pandoc.Shared (
|
|||
Element (..),
|
||||
hierarchicalize,
|
||||
isHeaderBlock,
|
||||
uniqueIdentifiers,
|
||||
-- * Writer options
|
||||
WriterOptions (..),
|
||||
defaultWriterOptions
|
||||
|
@ -104,7 +102,7 @@ import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty )
|
|||
import qualified Text.PrettyPrint.HughesPJ as PP
|
||||
import Text.Pandoc.CharacterReferences ( characterReference )
|
||||
import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
|
||||
import Data.List ( find, isPrefixOf, intersperse )
|
||||
import Data.List ( find, isPrefixOf )
|
||||
import Control.Monad ( join )
|
||||
|
||||
--
|
||||
|
@ -146,15 +144,6 @@ joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
|
|||
-- Text processing
|
||||
--
|
||||
|
||||
-- | True if character is a punctuation character (unicode).
|
||||
isPunctuation :: Char -> Bool
|
||||
isPunctuation c =
|
||||
let c' = ord c
|
||||
in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
|
||||
c' >= 0xE000 && c' <= 0xE0FF
|
||||
then True
|
||||
else False
|
||||
|
||||
-- | Returns an association list of backslash escapes for the
|
||||
-- designated characters.
|
||||
backslashEscapes :: [Char] -- ^ list of special characters to escape
|
||||
|
@ -580,8 +569,7 @@ data ParserState = ParserState
|
|||
stateStrict :: Bool, -- ^ Use strict markdown syntax?
|
||||
stateSmart :: Bool, -- ^ Use smart typography?
|
||||
stateColumns :: Int, -- ^ Number of columns in terminal
|
||||
stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
|
||||
stateHeaders :: [[Inline]] -- ^ List of header texts used
|
||||
stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used
|
||||
}
|
||||
deriving Show
|
||||
|
||||
|
@ -600,8 +588,7 @@ defaultParserState =
|
|||
stateStrict = False,
|
||||
stateSmart = False,
|
||||
stateColumns = 80,
|
||||
stateHeaderTable = [],
|
||||
stateHeaders = [] }
|
||||
stateHeaderTable = [] }
|
||||
|
||||
data HeaderType
|
||||
= SingleHeader Char -- ^ Single line of characters underneath
|
||||
|
@ -803,43 +790,6 @@ isHeaderBlock :: Block -> Bool
|
|||
isHeaderBlock (Header _ _) = True
|
||||
isHeaderBlock _ = False
|
||||
|
||||
-- | Convert Pandoc inline list to plain text identifier.
|
||||
inlineListToIdentifier :: [Inline] -> String
|
||||
inlineListToIdentifier [] = ""
|
||||
inlineListToIdentifier (x:xs) =
|
||||
xAsText ++ inlineListToIdentifier xs
|
||||
where xAsText = case x of
|
||||
Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
|
||||
concat $ intersperse "-" $ words $ map toLower s
|
||||
Emph lst -> inlineListToIdentifier lst
|
||||
Strikeout lst -> inlineListToIdentifier lst
|
||||
Superscript lst -> inlineListToIdentifier lst
|
||||
Subscript lst -> inlineListToIdentifier lst
|
||||
Strong lst -> inlineListToIdentifier lst
|
||||
Quoted _ lst -> inlineListToIdentifier lst
|
||||
Code s -> s
|
||||
Space -> "-"
|
||||
EmDash -> "-"
|
||||
EnDash -> "-"
|
||||
Apostrophe -> ""
|
||||
Ellipses -> ""
|
||||
LineBreak -> "-"
|
||||
TeX _ -> ""
|
||||
HtmlInline _ -> ""
|
||||
Link lst _ -> inlineListToIdentifier lst
|
||||
Image lst _ -> inlineListToIdentifier lst
|
||||
Note _ -> ""
|
||||
|
||||
-- | Return unique identifiers for list of inline lists.
|
||||
uniqueIdentifiers :: [[Inline]] -> [String]
|
||||
uniqueIdentifiers ls =
|
||||
let addIdentifier (nonuniqueIds, uniqueIds) l =
|
||||
let new = inlineListToIdentifier l
|
||||
matches = length $ filter (== new) nonuniqueIds
|
||||
new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
|
||||
in (new:nonuniqueIds, new':uniqueIds)
|
||||
in reverse $ snd $ foldl addIdentifier ([],[]) ls
|
||||
|
||||
--
|
||||
-- Writer options
|
||||
--
|
||||
|
|
|
@ -193,6 +193,15 @@ obfuscateChar char =
|
|||
obfuscateString :: String -> String
|
||||
obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
|
||||
|
||||
-- | True if character is a punctuation character (unicode).
|
||||
isPunctuation :: Char -> Bool
|
||||
isPunctuation c =
|
||||
let c' = ord c
|
||||
in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
|
||||
c' >= 0xE000 && c' <= 0xE0FF
|
||||
then True
|
||||
else False
|
||||
|
||||
-- | Add CSS for document header.
|
||||
addToCSS :: String -> State WriterState ()
|
||||
addToCSS item = do
|
||||
|
@ -200,6 +209,43 @@ addToCSS item = do
|
|||
let current = stCSS st
|
||||
put $ st {stCSS = S.insert item current}
|
||||
|
||||
-- | Convert Pandoc inline list to plain text identifier.
|
||||
inlineListToIdentifier :: [Inline] -> String
|
||||
inlineListToIdentifier [] = ""
|
||||
inlineListToIdentifier (x:xs) =
|
||||
xAsText ++ inlineListToIdentifier xs
|
||||
where xAsText = case x of
|
||||
Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
|
||||
concat $ intersperse "-" $ words $ map toLower s
|
||||
Emph lst -> inlineListToIdentifier lst
|
||||
Strikeout lst -> inlineListToIdentifier lst
|
||||
Superscript lst -> inlineListToIdentifier lst
|
||||
Subscript lst -> inlineListToIdentifier lst
|
||||
Strong lst -> inlineListToIdentifier lst
|
||||
Quoted _ lst -> inlineListToIdentifier lst
|
||||
Code s -> s
|
||||
Space -> "-"
|
||||
EmDash -> "-"
|
||||
EnDash -> "-"
|
||||
Apostrophe -> ""
|
||||
Ellipses -> ""
|
||||
LineBreak -> "-"
|
||||
TeX _ -> ""
|
||||
HtmlInline _ -> ""
|
||||
Link lst _ -> inlineListToIdentifier lst
|
||||
Image lst _ -> inlineListToIdentifier lst
|
||||
Note _ -> ""
|
||||
|
||||
-- | Return unique identifiers for list of inline lists.
|
||||
uniqueIdentifiers :: [[Inline]] -> [String]
|
||||
uniqueIdentifiers ls =
|
||||
let addIdentifier (nonuniqueIds, uniqueIds) l =
|
||||
let new = inlineListToIdentifier l
|
||||
matches = length $ filter (== new) nonuniqueIds
|
||||
new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
|
||||
in (new:nonuniqueIds, new':uniqueIds)
|
||||
in reverse $ snd $ foldl addIdentifier ([],[]) ls
|
||||
|
||||
-- | Convert Pandoc block element to HTML.
|
||||
blockToHtml :: WriterOptions -> Block -> State WriterState Html
|
||||
blockToHtml opts Null = return $ noHtml
|
||||
|
|
Loading…
Add table
Reference in a new issue