2006-12-20 20:54:23 +00:00
|
|
|
{-
|
2007-07-07 22:51:55 +00:00
|
|
|
Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
|
2006-12-20 20:54:23 +00:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.Readers.Markdown
|
2007-07-07 22:51:55 +00:00
|
|
|
Copyright : Copyright (C) 2006-7 John MacFarlane
|
2006-12-20 06:50:14 +00:00
|
|
|
License : GNU GPL, version 2 or above
|
|
|
|
|
2007-07-07 22:51:55 +00:00
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
2006-12-20 20:20:10 +00:00
|
|
|
Stability : alpha
|
2006-12-20 06:50:14 +00:00
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
Conversion of markdown-formatted plain text to 'Pandoc' document.
|
|
|
|
-}
|
2006-10-17 14:22:29 +00:00
|
|
|
module Text.Pandoc.Readers.Markdown (
|
|
|
|
readMarkdown
|
|
|
|
) where
|
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
import Data.List ( transpose, isSuffixOf, lookup, sortBy )
|
|
|
|
import Data.Ord ( comparing )
|
2007-03-07 20:53:37 +00:00
|
|
|
import Data.Char ( isAlphaNum )
|
2006-10-17 14:22:29 +00:00
|
|
|
import Text.Pandoc.Definition
|
|
|
|
import Text.Pandoc.Shared
|
2007-08-15 06:00:58 +00:00
|
|
|
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
|
2007-08-15 21:20:02 +00:00
|
|
|
import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
|
|
|
|
anyHtmlInlineTag, anyHtmlTag,
|
|
|
|
anyHtmlEndTag, htmlEndTag, extractTagType,
|
2006-12-30 22:51:49 +00:00
|
|
|
htmlBlockElement )
|
2007-08-15 06:00:58 +00:00
|
|
|
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
|
2006-10-17 14:22:29 +00:00
|
|
|
import Text.ParserCombinators.Parsec
|
|
|
|
|
|
|
|
-- | Read markdown from an input string and return a Pandoc document.
|
|
|
|
readMarkdown :: ParserState -> String -> Pandoc
|
2007-04-22 04:38:05 +00:00
|
|
|
readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n")
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- Constants and data structure definitions
|
|
|
|
--
|
|
|
|
|
|
|
|
spaceChars = " \t"
|
|
|
|
bulletListMarkers = "*+-"
|
|
|
|
hruleChars = "*-_"
|
|
|
|
titleOpeners = "\"'("
|
2007-07-21 22:52:07 +00:00
|
|
|
setextHChars = "=-"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- treat these as potentially non-text when parsing inline:
|
2007-07-21 22:52:07 +00:00
|
|
|
specialChars = "\\[]*_~`<>$!^-.&'\""
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- auxiliary functions
|
|
|
|
--
|
|
|
|
|
2007-07-28 19:14:50 +00:00
|
|
|
indentSpaces = try $ do
|
2006-10-17 14:22:29 +00:00
|
|
|
state <- getState
|
|
|
|
let tabStop = stateTabStop state
|
2007-03-11 07:56:29 +00:00
|
|
|
try (count tabStop (char ' ')) <|>
|
2007-08-15 06:00:58 +00:00
|
|
|
(many (char ' ') >> string "\t") <?> "indentation"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-01-15 19:52:42 +00:00
|
|
|
nonindentSpaces = do
|
2006-10-17 14:22:29 +00:00
|
|
|
state <- getState
|
|
|
|
let tabStop = stateTabStop state
|
2007-08-15 06:00:58 +00:00
|
|
|
choice $ map (\n -> (try (count n (char ' ')))) $ reverse [0..(tabStop - 1)]
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2006-12-30 22:51:49 +00:00
|
|
|
-- | Fail unless we're at beginning of a line.
|
|
|
|
failUnlessBeginningOfLine = do
|
|
|
|
pos <- getPosition
|
|
|
|
if sourceColumn pos == 1 then return () else fail "not beginning of line"
|
|
|
|
|
2007-01-06 09:54:58 +00:00
|
|
|
-- | Fail unless we're in "smart typography" mode.
|
|
|
|
failUnlessSmart = do
|
|
|
|
state <- getState
|
|
|
|
if stateSmart state then return () else fail "Smart typography feature"
|
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
-- | Parse an inline Str element with a given content.
|
|
|
|
inlineString str = try $ do
|
|
|
|
(Str res) <- inline
|
|
|
|
if res == str then return res else fail $ "unexpected Str content"
|
|
|
|
|
2007-07-15 23:53:22 +00:00
|
|
|
-- | Parse a sequence of inline elements between a string
|
|
|
|
-- @opener@ and a string @closer@, including inlines
|
|
|
|
-- between balanced pairs of @opener@ and a @closer@.
|
|
|
|
inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline]
|
|
|
|
inlinesInBalanced opener closer = try $ do
|
2007-08-15 06:00:58 +00:00
|
|
|
string opener
|
|
|
|
result <- manyTill ( (do lookAhead (inlineString opener)
|
|
|
|
-- because it might be a link...
|
|
|
|
bal <- inlinesInBalanced opener closer
|
|
|
|
return $ [Str opener] ++ bal ++ [Str closer])
|
2007-07-15 23:53:22 +00:00
|
|
|
<|> (count 1 inline))
|
|
|
|
(try (string closer))
|
|
|
|
return $ concat result
|
|
|
|
|
2006-10-17 14:22:29 +00:00
|
|
|
--
|
|
|
|
-- document structure
|
|
|
|
--
|
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
authorsLine = try $ do
|
2006-10-17 14:22:29 +00:00
|
|
|
char '%'
|
|
|
|
skipSpaces
|
|
|
|
authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
|
|
|
|
newline
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
dateLine = try $ do
|
2006-10-17 14:22:29 +00:00
|
|
|
char '%'
|
|
|
|
skipSpaces
|
|
|
|
date <- many (noneOf "\n")
|
|
|
|
newline
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ decodeCharacterReferences $ removeTrailingSpace date
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
titleBlock = try $ do
|
2006-12-30 22:51:49 +00:00
|
|
|
failIfStrict
|
2006-10-17 14:22:29 +00:00
|
|
|
title <- option [] titleLine
|
|
|
|
author <- option [] authorsLine
|
|
|
|
date <- option "" dateLine
|
2007-08-15 06:00:58 +00:00
|
|
|
optional blanklines
|
|
|
|
return (title, author, date)
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
parseMarkdown = do
|
2007-08-15 06:00:58 +00:00
|
|
|
-- markdown allows raw HTML
|
|
|
|
updateState (\state -> state { stateParseRaw = True })
|
2006-10-17 14:22:29 +00:00
|
|
|
(title, author, date) <- option ([],[],"") titleBlock
|
2006-12-31 17:34:06 +00:00
|
|
|
-- go through once just to get list of reference keys
|
2007-08-15 06:00:58 +00:00
|
|
|
refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
let keys = map (\(KeyBlock label target) -> (label, target)) $
|
|
|
|
filter isKeyBlock refs
|
2007-07-06 06:46:31 +00:00
|
|
|
let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
|
|
|
|
setInput $ concat rawlines -- with keys stripped out
|
|
|
|
updateState (\state -> state { stateKeys = keys })
|
2007-08-15 06:00:58 +00:00
|
|
|
-- now go through for notes (which may contain references - hence 2nd pass)
|
|
|
|
refs <- manyTill (noteBlock <|> (lineClump >>= return . LineClump)) eof
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
let notes = map (\(NoteBlock label blocks) -> (label, blocks)) $
|
|
|
|
filter isNoteBlock refs
|
|
|
|
let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
|
2007-08-15 06:00:58 +00:00
|
|
|
-- go through a 3rd time, with note blocks and keys stripped out
|
|
|
|
setInput $ concat rawlines
|
2007-07-06 06:46:31 +00:00
|
|
|
updateState (\state -> state { stateNotes = notes })
|
2007-08-15 06:00:58 +00:00
|
|
|
blocks <- parseBlocks
|
2006-12-19 23:13:03 +00:00
|
|
|
let blocks' = filter (/= Null) blocks
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ Pandoc (Meta title author date) blocks'
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
|
|
|
|
--
|
2007-08-15 06:00:58 +00:00
|
|
|
-- initial pass for references and notes
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
--
|
|
|
|
|
|
|
|
referenceKey = try $ do
|
|
|
|
nonindentSpaces
|
|
|
|
label <- reference
|
2007-07-21 22:52:07 +00:00
|
|
|
char ':'
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
skipSpaces
|
2007-08-15 06:00:58 +00:00
|
|
|
optional (char '<')
|
2007-07-21 22:52:07 +00:00
|
|
|
src <- many (noneOf "> \n\t")
|
2007-08-15 06:00:58 +00:00
|
|
|
optional (char '>')
|
2007-08-29 19:57:01 +00:00
|
|
|
tit <- option "" referenceTitle
|
|
|
|
blanklines
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
return $ KeyBlock label (removeTrailingSpace src, tit)
|
|
|
|
|
2007-08-29 01:51:03 +00:00
|
|
|
referenceTitle = try $ do
|
|
|
|
skipSpaces
|
|
|
|
optional newline
|
|
|
|
skipSpaces
|
|
|
|
tit <- (charsInBalanced '(' ')' >>= return . unwords . words)
|
|
|
|
<|> do delim <- char '\'' <|> char '"'
|
2007-08-29 19:57:01 +00:00
|
|
|
manyTill anyChar (try (char delim >> skipSpaces >>
|
|
|
|
notFollowedBy (noneOf ")\n")))
|
2007-08-29 01:51:03 +00:00
|
|
|
return $ decodeCharacterReferences tit
|
|
|
|
|
2007-08-29 00:16:50 +00:00
|
|
|
noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']')
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
|
2007-07-21 22:52:07 +00:00
|
|
|
rawLine = try $ do
|
2007-08-15 06:00:58 +00:00
|
|
|
notFollowedBy blankline
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
notFollowedBy' noteMarker
|
|
|
|
contents <- many1 nonEndline
|
2007-08-15 06:00:58 +00:00
|
|
|
end <- option "" (newline >> optional indentSpaces >> return "\n")
|
|
|
|
return $ contents ++ end
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
rawLines = many1 rawLine >>= return . concat
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
|
|
|
|
noteBlock = try $ do
|
|
|
|
failIfStrict
|
|
|
|
ref <- noteMarker
|
|
|
|
char ':'
|
2007-08-15 06:00:58 +00:00
|
|
|
optional blankline
|
|
|
|
optional indentSpaces
|
|
|
|
raw <- sepBy rawLines (try (blankline >> indentSpaces))
|
|
|
|
optional blanklines
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
-- parse the extracted text, which may contain various block elements:
|
2007-08-19 00:18:46 +00:00
|
|
|
contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ NoteBlock ref contents
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- parsing blocks
|
|
|
|
--
|
|
|
|
|
2006-12-31 16:46:48 +00:00
|
|
|
parseBlocks = manyTill block eof
|
2006-10-17 14:22:29 +00:00
|
|
|
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
block = choice [ header
|
|
|
|
, table
|
|
|
|
, codeBlock
|
|
|
|
, hrule
|
|
|
|
, list
|
|
|
|
, blockQuote
|
|
|
|
, htmlBlock
|
|
|
|
, rawLaTeXEnvironment'
|
|
|
|
, para
|
|
|
|
, plain
|
|
|
|
, nullBlock ] <?> "block"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- header blocks
|
|
|
|
--
|
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
header = setextHeader <|> atxHeader <?> "header"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
atxHeader = try $ do
|
2007-08-20 19:21:18 +00:00
|
|
|
level <- many1 (char '#') >>= return . length
|
2007-08-15 06:00:58 +00:00
|
|
|
notFollowedBy (char '.' <|> char ')') -- this would be a list
|
2006-10-17 14:22:29 +00:00
|
|
|
skipSpaces
|
2007-08-20 19:21:18 +00:00
|
|
|
text <- manyTill inline atxClosing >>= return . normalizeSpaces
|
|
|
|
return $ Header level text
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-20 19:21:18 +00:00
|
|
|
atxClosing = try $ skipMany (char '#') >> blanklines
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-28 06:58:20 +00:00
|
|
|
setextHeader = try $ do
|
2007-08-20 19:21:18 +00:00
|
|
|
text <- many1Till inline newline >>= return . normalizeSpaces
|
2007-08-28 06:59:57 +00:00
|
|
|
level <- choice $ zipWith
|
|
|
|
(\ch lev -> try (many1 $ char ch) >> blanklines >> return lev)
|
|
|
|
setextHChars [1..(length setextHChars)]
|
2007-08-20 19:21:18 +00:00
|
|
|
return $ Header level text
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- hrule block
|
|
|
|
--
|
|
|
|
|
2007-08-28 07:04:47 +00:00
|
|
|
hrule = try $ do
|
|
|
|
skipSpaces
|
|
|
|
start <- oneOf hruleChars
|
|
|
|
count 2 (skipSpaces >> char start)
|
|
|
|
skipMany (skipSpaces >> char start)
|
2006-12-20 06:50:14 +00:00
|
|
|
newline
|
2007-08-15 06:00:58 +00:00
|
|
|
optional blanklines
|
|
|
|
return HorizontalRule
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- code blocks
|
|
|
|
--
|
|
|
|
|
2007-08-20 19:21:18 +00:00
|
|
|
indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-20 19:21:18 +00:00
|
|
|
codeBlock = try $ do
|
|
|
|
contents <- many1 (indentedLine <|>
|
|
|
|
try (do b <- blanklines
|
|
|
|
l <- indentedLine
|
|
|
|
return $ b ++ l))
|
|
|
|
blanklines
|
|
|
|
return $ CodeBlock $ stripTrailingNewlines $ concat contents
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- block quotes
|
|
|
|
--
|
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
emacsBoxQuote = try $ do
|
2006-12-30 22:51:49 +00:00
|
|
|
failIfStrict
|
2006-12-20 06:50:14 +00:00
|
|
|
string ",----"
|
|
|
|
manyTill anyChar newline
|
2007-08-15 06:00:58 +00:00
|
|
|
raw <- manyTill
|
|
|
|
(try (char '|' >> optional (char ' ') >> manyTill anyChar newline))
|
|
|
|
(try (string "`----"))
|
|
|
|
blanklines
|
|
|
|
return raw
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
emailBlockQuote = try $ do
|
2006-12-20 06:50:14 +00:00
|
|
|
emailBlockQuoteStart
|
2007-08-15 06:00:58 +00:00
|
|
|
raw <- sepBy (many (nonEndline <|>
|
|
|
|
(try (endline >> notFollowedBy emailBlockQuoteStart >>
|
|
|
|
return '\n'))))
|
|
|
|
(try (newline >> emailBlockQuoteStart))
|
|
|
|
newline <|> (eof >> return '\n')
|
|
|
|
optional blanklines
|
|
|
|
return raw
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
blockQuote = do
|
2007-08-15 06:00:58 +00:00
|
|
|
raw <- emailBlockQuote <|> emacsBoxQuote
|
2006-12-20 06:50:14 +00:00
|
|
|
-- parse the extracted block, which may contain various block elements:
|
2007-08-19 00:18:46 +00:00
|
|
|
contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ BlockQuote contents
|
2006-12-21 09:02:06 +00:00
|
|
|
|
2006-10-17 14:22:29 +00:00
|
|
|
--
|
|
|
|
-- list blocks
|
|
|
|
--
|
|
|
|
|
2007-03-11 07:56:29 +00:00
|
|
|
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-08 02:43:15 +00:00
|
|
|
bulletListStart = try $ do
|
2007-08-15 06:00:58 +00:00
|
|
|
optional newline -- if preceded by a Plain block in a list context
|
2007-01-15 19:52:42 +00:00
|
|
|
nonindentSpaces
|
2007-08-28 07:19:07 +00:00
|
|
|
notFollowedBy' hrule -- because hrules start out just like lists
|
2006-12-20 06:50:14 +00:00
|
|
|
oneOf bulletListMarkers
|
|
|
|
spaceChar
|
2007-08-08 02:43:15 +00:00
|
|
|
skipSpaces
|
2007-03-09 02:37:49 +00:00
|
|
|
|
2007-08-08 02:43:15 +00:00
|
|
|
anyOrderedListStart = try $ do
|
2007-08-20 18:52:49 +00:00
|
|
|
optional newline -- if preceded by a Plain block in a list context
|
2007-08-08 02:43:15 +00:00
|
|
|
nonindentSpaces
|
2007-08-26 03:17:40 +00:00
|
|
|
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
|
2007-08-08 02:43:15 +00:00
|
|
|
state <- getState
|
|
|
|
if stateStrict state
|
|
|
|
then do many1 digit
|
|
|
|
char '.'
|
2007-08-18 15:26:29 +00:00
|
|
|
spaceChar
|
2007-08-08 02:43:15 +00:00
|
|
|
return (1, DefaultStyle, DefaultDelim)
|
2007-08-18 15:26:29 +00:00
|
|
|
else anyOrderedListMarker >>~ spaceChar
|
2007-03-09 02:37:49 +00:00
|
|
|
|
2007-08-08 02:43:15 +00:00
|
|
|
orderedListStart style delim = try $ do
|
2007-08-15 06:00:58 +00:00
|
|
|
optional newline -- if preceded by a Plain block in a list context
|
2007-01-15 19:52:42 +00:00
|
|
|
nonindentSpaces
|
2007-08-08 02:43:15 +00:00
|
|
|
state <- getState
|
2007-08-23 04:25:09 +00:00
|
|
|
num <- if stateStrict state
|
|
|
|
then do many1 digit
|
|
|
|
char '.'
|
|
|
|
return 1
|
|
|
|
else orderedListMarker style delim
|
|
|
|
if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
|
|
|
|
num `elem` [1, 5, 10, 50, 100, 500, 1000]))
|
|
|
|
then char '\t' <|> (spaceChar >> spaceChar)
|
|
|
|
else spaceChar
|
2007-03-09 02:37:49 +00:00
|
|
|
skipSpaces
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- parse a line of a list item (start = parser for beginning of list item)
|
2007-08-15 06:00:58 +00:00
|
|
|
listLine start = try $ do
|
2006-10-17 14:22:29 +00:00
|
|
|
notFollowedBy' start
|
|
|
|
notFollowedBy blankline
|
2007-08-15 06:00:58 +00:00
|
|
|
notFollowedBy' (do indentSpaces
|
|
|
|
many (spaceChar)
|
|
|
|
bulletListStart <|> (anyOrderedListStart >> return ()))
|
2006-10-17 14:22:29 +00:00
|
|
|
line <- manyTill anyChar newline
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ line ++ "\n"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- parse raw text for one list item, excluding start marker and continuations
|
2007-08-15 06:00:58 +00:00
|
|
|
rawListItem start = try $ do
|
2006-12-20 06:50:14 +00:00
|
|
|
start
|
|
|
|
result <- many1 (listLine start)
|
|
|
|
blanks <- many blankline
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ concat result ++ blanks
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- continuation of a list item - indented and separated by blankline
|
|
|
|
-- or (in compact lists) endline.
|
|
|
|
-- note: nested lists are parsed as continuations
|
2007-08-15 06:00:58 +00:00
|
|
|
listContinuation start = try $ do
|
2007-02-15 01:10:15 +00:00
|
|
|
lookAhead indentSpaces
|
2006-12-20 06:50:14 +00:00
|
|
|
result <- many1 (listContinuationLine start)
|
|
|
|
blanks <- many blankline
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ concat result ++ blanks
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
listContinuationLine start = try $ do
|
|
|
|
notFollowedBy blankline
|
2006-12-20 06:50:14 +00:00
|
|
|
notFollowedBy' start
|
2007-08-15 06:00:58 +00:00
|
|
|
optional indentSpaces
|
2006-12-20 06:50:14 +00:00
|
|
|
result <- manyTill anyChar newline
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ result ++ "\n"
|
2006-12-20 06:50:14 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
listItem start = try $ do
|
2006-12-20 06:50:14 +00:00
|
|
|
first <- rawListItem start
|
2006-12-21 09:02:06 +00:00
|
|
|
continuations <- many (listContinuation start)
|
2006-12-20 06:50:14 +00:00
|
|
|
-- parsing with ListItemState forces markers at beginning of lines to
|
|
|
|
-- count as list item markers, even if not separated by blank space.
|
|
|
|
-- see definition of "endline"
|
|
|
|
state <- getState
|
2006-12-21 09:02:06 +00:00
|
|
|
let oldContext = stateParserContext state
|
|
|
|
setState $ state {stateParserContext = ListItemState}
|
|
|
|
-- parse the extracted block, which may contain various block elements:
|
|
|
|
let raw = concat (first:continuations)
|
2007-08-19 00:18:46 +00:00
|
|
|
contents <- parseFromString parseBlocks raw
|
2006-12-21 09:02:06 +00:00
|
|
|
updateState (\st -> st {stateParserContext = oldContext})
|
2007-08-15 06:00:58 +00:00
|
|
|
return contents
|
2006-12-20 06:50:14 +00:00
|
|
|
|
2007-08-22 20:19:37 +00:00
|
|
|
orderedList = try $ do
|
2007-08-08 02:43:15 +00:00
|
|
|
(start, style, delim) <- lookAhead anyOrderedListStart
|
|
|
|
items <- many1 (listItem (orderedListStart style delim))
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ OrderedList (start, style, delim) $ compactify items
|
2006-12-20 06:50:14 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
bulletList = many1 (listItem bulletListStart) >>=
|
|
|
|
return . BulletList . compactify
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-03-10 17:48:16 +00:00
|
|
|
-- definition lists
|
|
|
|
|
|
|
|
definitionListItem = try $ do
|
|
|
|
notFollowedBy blankline
|
|
|
|
notFollowedBy' indentSpaces
|
|
|
|
term <- manyTill inline newline
|
2007-05-03 14:42:40 +00:00
|
|
|
raw <- many1 defRawBlock
|
2007-03-10 17:48:16 +00:00
|
|
|
state <- getState
|
|
|
|
let oldContext = stateParserContext state
|
|
|
|
-- parse the extracted block, which may contain various block elements:
|
2007-08-19 00:18:46 +00:00
|
|
|
contents <- parseFromString parseBlocks $ concat raw
|
2007-03-10 17:48:16 +00:00
|
|
|
updateState (\st -> st {stateParserContext = oldContext})
|
|
|
|
return ((normalizeSpaces term), contents)
|
|
|
|
|
|
|
|
defRawBlock = try $ do
|
2007-05-03 14:42:40 +00:00
|
|
|
char ':'
|
|
|
|
state <- getState
|
|
|
|
let tabStop = stateTabStop state
|
2007-08-15 06:00:58 +00:00
|
|
|
try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t")
|
2007-05-03 14:42:40 +00:00
|
|
|
firstline <- anyLine
|
2007-08-15 06:00:58 +00:00
|
|
|
rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
|
2007-03-10 20:45:19 +00:00
|
|
|
trailing <- option "" blanklines
|
2007-05-03 14:42:40 +00:00
|
|
|
return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
|
2007-03-10 17:48:16 +00:00
|
|
|
|
|
|
|
definitionList = do
|
2007-03-11 07:56:29 +00:00
|
|
|
failIfStrict
|
2007-03-10 17:48:16 +00:00
|
|
|
items <- many1 definitionListItem
|
2007-03-10 20:45:19 +00:00
|
|
|
let (terms, defs) = unzip items
|
|
|
|
let defs' = compactify defs
|
|
|
|
let items' = zip terms defs'
|
|
|
|
return $ DefinitionList items'
|
2007-03-10 17:48:16 +00:00
|
|
|
|
2006-10-17 14:22:29 +00:00
|
|
|
--
|
|
|
|
-- paragraph block
|
|
|
|
--
|
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
para = try $ do
|
2006-10-17 14:22:29 +00:00
|
|
|
result <- many1 inline
|
|
|
|
newline
|
2007-08-29 00:08:18 +00:00
|
|
|
blanklines <|> do st <- getState
|
|
|
|
if stateStrict st
|
|
|
|
then lookAhead (blockQuote <|> header) >> return ""
|
|
|
|
else lookAhead emacsBoxQuote >> return ""
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ Para $ normalizeSpaces result
|
|
|
|
|
|
|
|
plain = many1 inline >>= return . Plain . normalizeSpaces
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- raw html
|
|
|
|
--
|
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
|
2006-12-30 22:51:49 +00:00
|
|
|
|
|
|
|
htmlBlock = do
|
|
|
|
st <- getState
|
|
|
|
if stateStrict st
|
2007-08-15 06:00:58 +00:00
|
|
|
then try $ do failUnlessBeginningOfLine
|
|
|
|
first <- htmlElement
|
|
|
|
finalSpace <- many (oneOf spaceChars)
|
|
|
|
finalNewlines <- many newline
|
|
|
|
return $ RawHtml $ first ++ finalSpace ++ finalNewlines
|
2006-12-30 22:51:49 +00:00
|
|
|
else rawHtmlBlocks
|
|
|
|
|
|
|
|
-- True if tag is self-closing
|
2007-01-24 19:44:43 +00:00
|
|
|
isSelfClosing tag =
|
2007-08-29 20:03:28 +00:00
|
|
|
isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
|
2006-12-30 22:51:49 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
strictHtmlBlock = try $ do
|
2006-12-30 22:51:49 +00:00
|
|
|
tag <- anyHtmlBlockTag
|
|
|
|
let tag' = extractTagType tag
|
2007-01-24 19:44:43 +00:00
|
|
|
if isSelfClosing tag || tag' == "hr"
|
2006-12-30 22:51:49 +00:00
|
|
|
then return tag
|
2007-08-15 06:00:58 +00:00
|
|
|
else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
|
|
|
|
(htmlElement <|> (count 1 anyChar)))
|
|
|
|
end <- htmlEndTag tag'
|
|
|
|
return $ tag ++ concat contents ++ end
|
2006-12-30 22:51:49 +00:00
|
|
|
|
2007-08-28 07:19:07 +00:00
|
|
|
rawHtmlBlocks = do
|
2006-12-20 06:50:14 +00:00
|
|
|
htmlBlocks <- many1 rawHtmlBlock
|
|
|
|
let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
|
2007-08-15 06:00:58 +00:00
|
|
|
let combined' = if not (null combined) && last combined == '\n'
|
2006-12-20 06:50:14 +00:00
|
|
|
then init combined -- strip extra newline
|
|
|
|
else combined
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ RawHtml combined'
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2006-12-30 22:51:49 +00:00
|
|
|
--
|
|
|
|
-- LaTeX
|
|
|
|
--
|
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-01-15 19:52:42 +00:00
|
|
|
--
|
|
|
|
-- Tables
|
|
|
|
--
|
|
|
|
|
2007-01-16 00:07:42 +00:00
|
|
|
-- Parse a dashed line with optional trailing spaces; return its length
|
|
|
|
-- and the length including trailing space.
|
2007-08-28 07:20:47 +00:00
|
|
|
dashedLine ch = do
|
2007-08-15 06:00:58 +00:00
|
|
|
dashes <- many1 (char ch)
|
|
|
|
sp <- many spaceChar
|
|
|
|
return $ (length dashes, length $ dashes ++ sp)
|
2007-01-15 19:52:42 +00:00
|
|
|
|
2007-01-16 00:07:42 +00:00
|
|
|
-- Parse a table header with dashed lines of '-' preceded by
|
|
|
|
-- one line of text.
|
2007-08-15 06:00:58 +00:00
|
|
|
simpleTableHeader = try $ do
|
|
|
|
rawContent <- anyLine
|
|
|
|
initSp <- nonindentSpaces
|
|
|
|
dashes <- many1 (dashedLine '-')
|
|
|
|
newline
|
|
|
|
let (lengths, lines) = unzip dashes
|
|
|
|
let indices = scanl (+) (length initSp) lines
|
|
|
|
let rawHeads = tail $ splitByIndices (init indices) rawContent
|
|
|
|
let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
|
|
|
|
return (rawHeads, aligns, indices)
|
2007-01-15 19:52:42 +00:00
|
|
|
|
2007-01-16 00:07:42 +00:00
|
|
|
-- Parse a table footer - dashed lines followed by blank line.
|
2007-08-15 06:00:58 +00:00
|
|
|
tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines
|
2007-01-15 19:52:42 +00:00
|
|
|
|
2007-01-16 00:07:42 +00:00
|
|
|
-- Parse a table separator - dashed line.
|
2007-08-15 06:00:58 +00:00
|
|
|
tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
|
2007-01-15 19:52:42 +00:00
|
|
|
|
2007-01-16 00:07:42 +00:00
|
|
|
-- Parse a raw line and split it into chunks by indices.
|
2007-01-15 19:52:42 +00:00
|
|
|
rawTableLine indices = do
|
2007-08-15 06:00:58 +00:00
|
|
|
notFollowedBy' (blanklines <|> tableFooter)
|
|
|
|
line <- many1Till anyChar newline
|
|
|
|
return $ map removeLeadingTrailingSpace $ tail $
|
|
|
|
splitByIndices (init indices) line
|
2007-01-15 19:52:42 +00:00
|
|
|
|
2007-01-16 00:07:42 +00:00
|
|
|
-- Parse a table line and return a list of lists of blocks (columns).
|
2007-08-15 06:00:58 +00:00
|
|
|
tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
|
2007-01-15 19:52:42 +00:00
|
|
|
|
2007-01-16 00:07:42 +00:00
|
|
|
-- Parse a multiline table row and return a list of blocks (columns).
|
2007-08-28 07:22:21 +00:00
|
|
|
multilineRow indices = do
|
2007-08-15 06:00:58 +00:00
|
|
|
colLines <- many1 (rawTableLine indices)
|
|
|
|
optional blanklines
|
|
|
|
let cols = map unlines $ transpose colLines
|
|
|
|
mapM (parseFromString (many plain)) cols
|
2007-01-15 19:52:42 +00:00
|
|
|
|
2007-01-16 00:07:42 +00:00
|
|
|
-- Calculate relative widths of table columns, based on indices
|
|
|
|
widthsFromIndices :: Int -- Number of columns on terminal
|
|
|
|
-> [Int] -- Indices
|
|
|
|
-> [Float] -- Fractional relative sizes of columns
|
2007-01-15 19:52:42 +00:00
|
|
|
widthsFromIndices _ [] = []
|
|
|
|
widthsFromIndices numColumns indices =
|
2007-08-15 06:00:58 +00:00
|
|
|
let lengths = zipWith (-) indices (0:indices)
|
|
|
|
totLength = sum lengths
|
|
|
|
quotient = if totLength > numColumns
|
|
|
|
then fromIntegral totLength
|
|
|
|
else fromIntegral numColumns
|
|
|
|
fracs = map (\l -> (fromIntegral l) / quotient) lengths in
|
|
|
|
tail fracs
|
2007-01-15 19:52:42 +00:00
|
|
|
|
2007-01-16 00:07:42 +00:00
|
|
|
-- Parses a table caption: inlines beginning with 'Table:'
|
|
|
|
-- and followed by blank lines.
|
2007-01-15 19:52:42 +00:00
|
|
|
tableCaption = try $ do
|
2007-08-15 06:00:58 +00:00
|
|
|
nonindentSpaces
|
|
|
|
string "Table:"
|
|
|
|
result <- many1 inline
|
|
|
|
blanklines
|
|
|
|
return $ normalizeSpaces result
|
2007-01-15 19:52:42 +00:00
|
|
|
|
2007-01-16 00:07:42 +00:00
|
|
|
-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
|
2007-01-15 19:52:42 +00:00
|
|
|
tableWith headerParser lineParser footerParser = try $ do
|
|
|
|
(rawHeads, aligns, indices) <- headerParser
|
|
|
|
lines <- many1Till (lineParser indices) footerParser
|
|
|
|
caption <- option [] tableCaption
|
2007-07-23 00:19:00 +00:00
|
|
|
heads <- mapM (parseFromString (many plain)) rawHeads
|
2007-01-15 19:52:42 +00:00
|
|
|
state <- getState
|
|
|
|
let numColumns = stateColumns state
|
|
|
|
let widths = widthsFromIndices numColumns indices
|
|
|
|
return $ Table caption aligns widths heads lines
|
|
|
|
|
2007-01-16 00:07:42 +00:00
|
|
|
-- Parse a simple table with '---' header and one line per row.
|
2007-01-15 19:52:42 +00:00
|
|
|
simpleTable = tableWith simpleTableHeader tableLine blanklines
|
|
|
|
|
2007-01-16 00:07:42 +00:00
|
|
|
-- Parse a multiline table: starts with row of '-' on top, then header
|
|
|
|
-- (which may be multiline), then the rows,
|
|
|
|
-- which may be multiline, separated by blank lines, and
|
|
|
|
-- ending with a footer (dashed line followed by blank line).
|
2007-01-15 19:52:42 +00:00
|
|
|
multilineTable = tableWith multilineTableHeader multilineRow tableFooter
|
|
|
|
|
|
|
|
multilineTableHeader = try $ do
|
2007-08-15 06:00:58 +00:00
|
|
|
tableSep
|
|
|
|
rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
|
|
|
|
initSp <- nonindentSpaces
|
|
|
|
dashes <- many1 (dashedLine '-')
|
|
|
|
newline
|
|
|
|
let (lengths, lines) = unzip dashes
|
|
|
|
let indices = scanl (+) (length initSp) lines
|
|
|
|
let rawHeadsList = transpose $ map
|
|
|
|
(\ln -> tail $ splitByIndices (init indices) ln)
|
|
|
|
rawContent
|
|
|
|
let rawHeads = map (joinWithSep " ") rawHeadsList
|
|
|
|
let aligns = zipWith alignType rawHeadsList lengths
|
|
|
|
return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
|
2007-01-15 19:52:42 +00:00
|
|
|
|
2007-01-16 00:07:42 +00:00
|
|
|
-- Returns an alignment type for a table, based on a list of strings
|
|
|
|
-- (the rows of the column header) and a number (the length of the
|
|
|
|
-- dashed line under the rows.
|
2007-01-15 19:52:42 +00:00
|
|
|
alignType :: [String] -> Int -> Alignment
|
|
|
|
alignType [] len = AlignDefault
|
|
|
|
alignType strLst len =
|
2007-08-15 06:00:58 +00:00
|
|
|
let str = head $ sortBy (comparing length) $
|
|
|
|
map removeTrailingSpace strLst
|
|
|
|
leftSpace = if null str then False else (str !! 0) `elem` " \t"
|
|
|
|
rightSpace = length str < len || (str !! (len - 1)) `elem` " \t"
|
|
|
|
in case (leftSpace, rightSpace) of
|
2007-01-15 19:52:42 +00:00
|
|
|
(True, False) -> AlignRight
|
|
|
|
(False, True) -> AlignLeft
|
2007-08-15 06:00:58 +00:00
|
|
|
(True, True) -> AlignCenter
|
2007-01-15 19:52:42 +00:00
|
|
|
(False, False) -> AlignDefault
|
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table"
|
2007-01-15 19:52:42 +00:00
|
|
|
|
2006-10-17 14:22:29 +00:00
|
|
|
--
|
|
|
|
-- inline
|
|
|
|
--
|
|
|
|
|
2007-08-28 06:38:38 +00:00
|
|
|
inline = choice [ str
|
2007-08-28 02:33:53 +00:00
|
|
|
, smartPunctuation
|
|
|
|
, linebreak
|
|
|
|
, endline
|
|
|
|
, whitespace
|
2007-08-28 06:44:47 +00:00
|
|
|
, code
|
2007-08-15 06:00:58 +00:00
|
|
|
, charRef
|
2007-08-28 06:44:47 +00:00
|
|
|
, strong
|
|
|
|
, emph
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
, note
|
2007-03-17 17:25:28 +00:00
|
|
|
, inlineNote
|
|
|
|
, link
|
|
|
|
, image
|
|
|
|
, math
|
2007-07-21 22:52:07 +00:00
|
|
|
, strikeout
|
|
|
|
, superscript
|
|
|
|
, subscript
|
2007-08-28 06:44:47 +00:00
|
|
|
, autoLink
|
|
|
|
, rawHtmlInline'
|
|
|
|
, rawLaTeXInline'
|
|
|
|
, escapedChar
|
2007-03-17 17:25:28 +00:00
|
|
|
, symbol
|
2007-08-28 02:33:53 +00:00
|
|
|
, ltSign ] <?> "inline"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-28 07:24:56 +00:00
|
|
|
escapedChar = do
|
2007-01-08 23:54:15 +00:00
|
|
|
char '\\'
|
|
|
|
state <- getState
|
2007-08-28 07:24:56 +00:00
|
|
|
result <- option '\\' $ if stateStrict state
|
|
|
|
then oneOf "\\`*_{}[]()>#+-.!~"
|
|
|
|
else satisfy (not . isAlphaNum)
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ Str [result]
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-28 02:33:53 +00:00
|
|
|
ltSign = do
|
|
|
|
st <- getState
|
|
|
|
if stateStrict st
|
|
|
|
then char '<'
|
|
|
|
else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ Str ['<']
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
specialCharsMinusLt = filter (/= '<') specialChars
|
|
|
|
|
|
|
|
symbol = do
|
|
|
|
result <- oneOf specialCharsMinusLt
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ Str [result]
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-07-21 22:52:07 +00:00
|
|
|
-- parses inline code, between n `s and n `s
|
2007-08-15 06:00:58 +00:00
|
|
|
code = try $ do
|
2007-07-21 22:52:07 +00:00
|
|
|
starts <- many1 (char '`')
|
2007-08-29 16:38:41 +00:00
|
|
|
skipSpaces
|
|
|
|
result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
|
|
|
|
(char '\n' >> return " "))
|
|
|
|
(try (skipSpaces >> count (length starts) (char '`') >>
|
|
|
|
notFollowedBy (char '`')))
|
|
|
|
return $ Code $ removeLeadingTrailingSpace $ concat result
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
mathWord = many1 ((noneOf " \t\n\\$") <|>
|
|
|
|
(try (char '\\') >>~ notFollowedBy (char '$')))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
math = try $ do
|
2006-12-30 22:51:49 +00:00
|
|
|
failIfStrict
|
2007-07-21 22:52:07 +00:00
|
|
|
char '$'
|
2006-10-17 14:22:29 +00:00
|
|
|
notFollowedBy space
|
|
|
|
words <- sepBy1 mathWord (many1 space)
|
2007-07-21 22:52:07 +00:00
|
|
|
char '$'
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ TeX ("$" ++ (joinWithSep " " words) ++ "$")
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
emph = ((enclosed (char '*') (char '*') inline) <|>
|
|
|
|
(enclosed (char '_') (char '_') inline)) >>=
|
|
|
|
return . Emph . normalizeSpaces
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-28 05:58:21 +00:00
|
|
|
strong = ((enclosed (string "**") (try $ string "**") inline) <|>
|
|
|
|
(enclosed (string "__") (try $ string "__") inline)) >>=
|
2007-08-15 06:00:58 +00:00
|
|
|
return . Strong . normalizeSpaces
|
2007-07-21 22:52:07 +00:00
|
|
|
|
2007-08-28 05:58:21 +00:00
|
|
|
strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
|
2007-08-15 06:00:58 +00:00
|
|
|
return . Strikeout . normalizeSpaces
|
2007-07-21 22:52:07 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
superscript = failIfStrict >> enclosed (char '^') (char '^')
|
|
|
|
(notFollowedBy' whitespace >> inline) >>= -- may not contain Space
|
|
|
|
return . Superscript
|
2007-07-21 22:52:07 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
subscript = failIfStrict >> enclosed (char '~') (char '~')
|
|
|
|
(notFollowedBy' whitespace >> inline) >>= -- may not contain Space
|
|
|
|
return . Subscript
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
smartPunctuation = failUnlessSmart >>
|
|
|
|
choice [ quoted, apostrophe, dash, ellipses ]
|
2007-01-06 09:54:58 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
|
2007-01-06 09:54:58 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
quoted = doubleQuoted <|> singleQuoted
|
2007-01-06 09:54:58 +00:00
|
|
|
|
2007-01-06 18:41:01 +00:00
|
|
|
withQuoteContext context parser = do
|
|
|
|
oldState <- getState
|
|
|
|
let oldQuoteContext = stateQuoteContext oldState
|
|
|
|
setState oldState { stateQuoteContext = context }
|
|
|
|
result <- parser
|
|
|
|
newState <- getState
|
|
|
|
setState newState { stateQuoteContext = oldQuoteContext }
|
|
|
|
return result
|
|
|
|
|
|
|
|
singleQuoted = try $ do
|
|
|
|
singleQuoteStart
|
2007-08-15 06:00:58 +00:00
|
|
|
withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
|
|
|
|
return . Quoted SingleQuote . normalizeSpaces
|
2007-01-06 18:41:01 +00:00
|
|
|
|
|
|
|
doubleQuoted = try $ do
|
|
|
|
doubleQuoteStart
|
2007-08-15 06:00:58 +00:00
|
|
|
withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
|
|
|
|
return . Quoted DoubleQuote . normalizeSpaces
|
2007-01-06 18:41:01 +00:00
|
|
|
|
|
|
|
failIfInQuoteContext context = do
|
|
|
|
st <- getState
|
|
|
|
if (stateQuoteContext st == context)
|
|
|
|
then fail "already inside quotes"
|
|
|
|
else return ()
|
2007-01-06 09:54:58 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
singleQuoteStart = do
|
2007-01-06 18:41:01 +00:00
|
|
|
failIfInQuoteContext InSingleQuote
|
2007-08-15 06:00:58 +00:00
|
|
|
char '\8216' <|>
|
|
|
|
do char '\''
|
|
|
|
notFollowedBy (oneOf ")!],.;:-? \t\n")
|
|
|
|
notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
|
|
|
|
satisfy (not . isAlphaNum))) -- possess/contraction
|
|
|
|
return '\''
|
|
|
|
|
|
|
|
singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum
|
|
|
|
|
|
|
|
doubleQuoteStart = failIfInQuoteContext InDoubleQuote >>
|
|
|
|
(char '"' <|> char '\8220') >>
|
|
|
|
notFollowedBy (oneOf " \t\n")
|
2007-01-06 09:54:58 +00:00
|
|
|
|
|
|
|
doubleQuoteEnd = char '"' <|> char '\8221'
|
|
|
|
|
2007-08-29 19:59:08 +00:00
|
|
|
ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
|
2007-01-06 09:54:58 +00:00
|
|
|
|
|
|
|
dash = enDash <|> emDash
|
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
|
2007-01-06 09:54:58 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >>
|
|
|
|
skipSpaces >> return EmDash
|
2007-01-06 09:54:58 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
whitespace = (many1 (oneOf spaceChars) >> return Space) <?> "whitespace"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- hard line break
|
2007-08-15 06:00:58 +00:00
|
|
|
linebreak = try $ oneOf spaceChars >> many1 (oneOf spaceChars) >>
|
|
|
|
endline >> return LineBreak
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-07-21 22:52:07 +00:00
|
|
|
nonEndline = satisfy (/='\n')
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-07-21 22:52:07 +00:00
|
|
|
strChar = noneOf (specialChars ++ spaceChars ++ "\n")
|
2007-02-15 02:27:36 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
str = many1 strChar >>= return . Str
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- an endline character that can be treated as a space, not a structural break
|
2007-08-15 06:00:58 +00:00
|
|
|
endline = try $ do
|
2006-12-20 06:50:14 +00:00
|
|
|
newline
|
|
|
|
notFollowedBy blankline
|
|
|
|
st <- getState
|
2006-12-30 22:51:49 +00:00
|
|
|
if stateStrict st
|
2007-08-29 00:11:37 +00:00
|
|
|
then do notFollowedBy emailBlockQuoteStart
|
|
|
|
notFollowedBy (char '#') -- atx header
|
2006-12-30 22:51:49 +00:00
|
|
|
else return ()
|
|
|
|
-- parse potential list-starts differently if in a list:
|
2007-08-15 06:00:58 +00:00
|
|
|
if stateParserContext st == ListItemState
|
|
|
|
then notFollowedBy' (bulletListStart <|>
|
|
|
|
(anyOrderedListStart >> return ()))
|
2006-12-30 22:51:49 +00:00
|
|
|
else return ()
|
2007-08-15 06:00:58 +00:00
|
|
|
return Space
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- links
|
|
|
|
--
|
|
|
|
|
2007-01-06 20:47:00 +00:00
|
|
|
-- a reference label for a link
|
2007-07-28 19:14:50 +00:00
|
|
|
reference = notFollowedBy' (string "[^") >> -- footnote reference
|
|
|
|
inlinesInBalanced "[" "]" >>= (return . normalizeSpaces)
|
2007-07-15 23:53:22 +00:00
|
|
|
|
2006-10-17 14:22:29 +00:00
|
|
|
-- source for a link, with optional title
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
source = try $ do
|
2007-07-21 22:52:07 +00:00
|
|
|
char '('
|
2007-08-15 06:00:58 +00:00
|
|
|
optional (char '<')
|
2007-07-21 22:52:07 +00:00
|
|
|
src <- many (noneOf ")> \t\n")
|
2007-08-15 06:00:58 +00:00
|
|
|
optional (char '>')
|
2007-08-29 01:51:03 +00:00
|
|
|
tit <- option "" linkTitle
|
2006-12-20 06:50:14 +00:00
|
|
|
skipSpaces
|
2007-07-21 22:52:07 +00:00
|
|
|
char ')'
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
return (removeTrailingSpace src, tit)
|
2006-12-20 06:50:14 +00:00
|
|
|
|
2007-08-29 01:51:03 +00:00
|
|
|
linkTitle = try $ do
|
|
|
|
skipSpaces
|
|
|
|
optional newline
|
|
|
|
skipSpaces
|
|
|
|
delim <- char '\'' <|> char '"'
|
|
|
|
tit <- manyTill anyChar (try (char delim >> skipSpaces >>
|
|
|
|
notFollowedBy (noneOf ")\n")))
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ decodeCharacterReferences tit
|
2006-12-20 06:50:14 +00:00
|
|
|
|
2007-08-28 22:41:05 +00:00
|
|
|
link = try $ do
|
2006-12-20 06:50:14 +00:00
|
|
|
label <- reference
|
2007-08-28 22:41:05 +00:00
|
|
|
src <- source <|> referenceLink label
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ Link label src
|
2006-10-17 14:22:29 +00:00
|
|
|
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
-- a link like [this][ref] or [this][] or [this]
|
2007-08-28 22:41:05 +00:00
|
|
|
referenceLink label = do
|
2007-08-29 19:57:01 +00:00
|
|
|
ref <- option [] (try (optional (char ' ') >>
|
|
|
|
optional (newline >> skipSpaces) >> reference))
|
2006-12-30 22:51:49 +00:00
|
|
|
let ref' = if null ref then label else ref
|
|
|
|
state <- getState
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
case lookupKeySrc (stateKeys state) ref' of
|
2007-08-15 06:00:58 +00:00
|
|
|
Nothing -> fail "no corresponding key"
|
2007-08-28 22:41:05 +00:00
|
|
|
Just target -> return target
|
2006-12-20 06:50:14 +00:00
|
|
|
|
2007-01-24 20:55:27 +00:00
|
|
|
autoLink = autoLinkEmail <|> autoLinkRegular
|
2007-01-24 19:44:43 +00:00
|
|
|
|
|
|
|
-- a link <like@this.com>
|
|
|
|
autoLinkEmail = try $ do
|
2007-07-21 22:52:07 +00:00
|
|
|
char '<'
|
2007-01-24 19:44:43 +00:00
|
|
|
name <- many1Till (noneOf "/:<> \t\n") (char '@')
|
|
|
|
domain <- sepBy1 (many1 (noneOf "/:.@<> \t\n")) (char '.')
|
2007-07-21 22:52:07 +00:00
|
|
|
char '>'
|
2007-08-20 18:52:49 +00:00
|
|
|
let src = name ++ "@" ++ (joinWithSep "." domain)
|
|
|
|
txt <- autoLinkText src
|
|
|
|
return $ Link txt (("mailto:" ++ src), "")
|
2007-01-24 19:44:43 +00:00
|
|
|
|
2007-01-24 20:55:27 +00:00
|
|
|
-- a link <http://like.this.com>
|
2007-01-24 19:44:43 +00:00
|
|
|
autoLinkRegular = try $ do
|
2007-07-21 22:52:07 +00:00
|
|
|
char '<'
|
2007-01-24 20:55:27 +00:00
|
|
|
prot <- oneOfStrings ["http:", "ftp:", "mailto:"]
|
2007-07-21 22:52:07 +00:00
|
|
|
rest <- many1Till (noneOf " \t\n<>") (char '>')
|
2007-01-24 20:55:27 +00:00
|
|
|
let src = prot ++ rest
|
2007-08-20 18:52:49 +00:00
|
|
|
txt <- autoLinkText src
|
|
|
|
return $ Link txt (src, "")
|
|
|
|
|
|
|
|
autoLinkText src = do
|
|
|
|
st <- getState
|
|
|
|
return $ if stateStrict st
|
|
|
|
then [Str src]
|
|
|
|
else [Code src]
|
2006-12-20 06:50:14 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
image = try $ do
|
2007-07-21 22:52:07 +00:00
|
|
|
char '!'
|
2006-12-20 06:50:14 +00:00
|
|
|
(Link label src) <- link
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ Image label src
|
2006-10-17 14:22:29 +00:00
|
|
|
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
note = try $ do
|
2006-12-30 22:51:49 +00:00
|
|
|
failIfStrict
|
2006-12-20 06:50:14 +00:00
|
|
|
ref <- noteMarker
|
|
|
|
state <- getState
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
let notes = stateNotes state
|
|
|
|
case lookup ref notes of
|
2007-08-15 06:00:58 +00:00
|
|
|
Nothing -> fail "note not found"
|
|
|
|
Just contents -> return $ Note contents
|
2006-12-19 23:13:03 +00:00
|
|
|
|
Extensive changes stemming from a rethinking of the Pandoc data
structure. Key and Note blocks have been removed. Link and image URLs
are now stored directly in Link and Image inlines, and note blocks
are stored in Note inlines. This requires changes in both parsers
and writers. Markdown and RST parsers need to extract data from key
and note blocks and insert them into the relevant inline elements.
Other parsers can be simplified, since there is no longer any need to
construct separate key and note blocks. Markdown, RST, and HTML writers
need to construct lists of notes; Markdown and RST writers need to
construct lists of link references (when the --reference-links option
is specified); and the RST writer needs to construct a list of image
substitution references. All writers have been rewritten to use the
State monad when state is required. This rewrite yields a small speed
boost and considerably cleaner code.
* Text/Pandoc/Definition.hs:
+ blocks: removed Key and Note
+ inlines: removed NoteRef, added Note
+ modified Target: there is no longer a 'Ref' target; all targets
are explicit URL, title pairs
* Text/Pandoc/Shared.hs:
+ Added 'Reference', 'isNoteBlock', 'isKeyBlock', 'isLineClump',
used in some of the readers.
+ Removed 'generateReference', 'keyTable', 'replaceReferenceLinks',
'replaceRefLinksBlockList', along with some auxiliary functions
used only by them. These are no longer needed, since
reference links are resolved in the Markdown and RST readers.
+ Moved 'inTags', 'selfClosingTag', 'inTagsSimple', and 'inTagsIndented'
to the Docbook writer, since that is now the only module that uses
them.
+ Changed name of 'escapeSGMLString' to 'escapeStringForXML'
+ Added KeyTable and NoteTable types
+ Removed fields from ParserState; 'stateKeyBlocks', 'stateKeysUsed',
'stateNoteBlocks', 'stateNoteIdentifiers', 'stateInlineLinks'.
Added 'stateKeys' and 'stateNotes'.
+ Added clause for Note to 'prettyBlock'.
+ Added 'writerNotes', 'writerReferenceLinks' fields to WriterOptions.
* Text/Pandoc/Entities.hs: Renamed 'escapeSGMLChar' and
'escapeSGMLString' to 'escapeCharForXML' and 'escapeStringForXML'
* Text/ParserCombinators/Pandoc.hs: Added lineClump parser: parses a raw
line block up to and including following blank lines.
* Main.hs: Replaced --inline-links with --reference-links.
* README:
+ Documented --reference-links and removed description of --inline-links.
+ Added note that footnotes may occur anywhere in the document, but must
be at the outer level, not embedded in block elements.
* man/man1/pandoc.1, man/man1/html2markdown.1: Removed --inline-links
option, added --reference-links option
* Markdown and RST readers:
+ Rewrote to fit new Pandoc definition. Since there are no longer
Note or Key blocks, all note and key blocks are parsed on a first pass
through the document. Once tables of notes and keys have been constructed,
the remaining parts of the document are reassembled and parsed.
+ Refactored link parsers.
* LaTeX and HTML readers: Rewrote to fit new Pandoc definition. Since
there are no longer Note or Key blocks, notes and references can be
parsed in a single pass through the document.
* RST, Markdown, and HTML writers: Rewrote using state monad new Pandoc
and definition. State is used to hold lists of references footnotes to
and be printed at the end of the document.
* RTF and LaTeX writers: Rewrote using new Pandoc definition. (Because
of the different treatment of footnotes, the "notes" parameter is no
longer needed in the block and inline conversion functions.)
* Docbook writer:
+ Moved the functions 'attributeList', 'inTags', 'selfClosingTag',
'inTagsSimple', 'inTagsIndented' from Text/Pandoc/Shared, since
they are now used only by the Docbook writer.
+ Rewrote using new Pandoc definition. (Because of the different
treatment of footnotes, the "notes" parameter is no longer needed
in the block and inline conversion functions.)
* Updated test suite
* Throughout: old haskell98 module names replaced by hierarchical module
names, e.g. List by Data.List.
* debian/control: Include libghc6-xhtml-dev instead of libghc6-html-dev
in "Build-Depends."
* cabalize:
+ Remove haskell98 from BASE_DEPENDS (since now the new hierarchical
module names are being used throughout)
+ Added mtl to BASE_DEPENDS (needed for state monad)
+ Removed html from GHC66_DEPENDS (not needed since xhtml is now used)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@580 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-04-10 01:56:50 +00:00
|
|
|
inlineNote = try $ do
|
2006-12-30 22:51:49 +00:00
|
|
|
failIfStrict
|
2007-07-21 22:52:07 +00:00
|
|
|
char '^'
|
2007-07-15 23:53:22 +00:00
|
|
|
contents <- inlinesInBalanced "[" "]"
|
2007-08-15 06:00:58 +00:00
|
|
|
return $ Note [Para contents]
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2007-08-15 06:00:58 +00:00
|
|
|
rawLaTeXInline' = failIfStrict >> rawLaTeXInline
|
2006-12-30 22:51:49 +00:00
|
|
|
|
|
|
|
rawHtmlInline' = do
|
|
|
|
st <- getState
|
2007-08-15 06:00:58 +00:00
|
|
|
result <- choice $ if stateStrict st
|
|
|
|
then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
|
|
|
|
else [htmlBlockElement, anyHtmlInlineTag]
|
|
|
|
return $ HtmlInline result
|
2006-12-30 22:51:49 +00:00
|
|
|
|