RST reader: Added footnote suppport.

Resolves issue #258.

Note that there are some differences in how docutils and
pandoc treat footnotes.  Currently pandoc ignores the numeral
or symbol used in the note; footnotes are put in an auto-numbered
ordered list.
This commit is contained in:
John MacFarlane 2010-12-08 08:17:44 -08:00
parent 91978d2201
commit 9ead748cc9
4 changed files with 88 additions and 8 deletions

6
README
View file

@ -66,10 +66,8 @@ To convert `hello.html` from html to markdown:
Supported output formats are listed below under the `-t/--to` option.
Supported input formats are listed below under the `-f/--from` option. Note
that the `rst` reader only parses a subset of reStructuredText syntax. For
example, it doesn't handle tables, option lists, or footnotes. But for simple
documents it should be adequate. The `textile`, `latex`, and `html` readers
are also limited in what they can do.
that the `rst`, `textile`, `latex`, and `html` readers are not complete;
there are some constructs that they do not parse.
If the input or output format is not specified explicitly, `pandoc`
will attempt to guess it from the extensions of

View file

@ -35,7 +35,7 @@ import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.ParserCombinators.Parsec
import Control.Monad ( when, unless )
import Data.List ( findIndex, intercalate, transpose, sort )
import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy )
import qualified Data.Map as M
import Text.Printf ( printf )
@ -91,11 +91,15 @@ titleTransform blocks = (blocks, [])
parseRST :: GenParser Char ParserState Pandoc
parseRST = do
startPos <- getPosition
-- go through once just to get list of reference keys
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys were...
docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
docMinusKeys <- manyTill (referenceKey <|> noteBlock <|> lineClump) eof >>=
return . concat
setInput docMinusKeys
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
updateState $ \s -> s { stateNotes = reverse reversedNotes }
-- now parse it for real...
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
@ -508,6 +512,32 @@ unknownDirective = try $ do
many $ blanklines <|> (oneOf " \t" >> manyTill anyChar newline)
return Null
---
--- note block
---
noteBlock :: GenParser Char ParserState [Char]
noteBlock = try $ do
startPos <- getPosition
string ".."
spaceChar >> skipMany spaceChar
ref <- noteMarker
spaceChar >> skipMany spaceChar
first <- anyLine
blanks <- option "" blanklines
rest <- option "" indentedBlock
endPos <- getPosition
let raw = first ++ "\n" ++ blanks ++ rest ++ "\n"
let newnote = (ref, raw)
st <- getState
let oldnotes = stateNotes st
updateState $ \s -> s { stateNotes = newnote : oldnotes }
-- return blanks so line count isn't affected
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
noteMarker :: GenParser Char ParserState [Char]
noteMarker = char '[' >> (many1 digit <|> count 1 (oneOf "#*")) >>~ char ']'
--
-- reference key
--
@ -692,6 +722,7 @@ inline = choice [ smartPunctuation inline
, superscript
, subscript
, escapedChar
, note
, symbol ] <?> "inline"
hyphens :: GenParser Char ParserState Inline
@ -820,3 +851,20 @@ image = try $ do
Nothing -> fail "no corresponding key"
Just target -> return target
return $ Image (normalizeSpaces ref) (src, tit)
note :: GenParser Char ParserState Inline
note = try $ do
ref <- noteMarker
char '_'
state <- getState
let notes = stateNotes state
case lookup ref notes of
Nothing -> fail "note not found"
Just raw -> do
contents <- parseFromString parseBlocks raw
when (ref == "*" || ref == "#") $ do -- auto-numbered
-- delete the note so the next auto-numbered note
-- doesn't get the same contents:
let newnotes = deleteFirstsBy (==) notes [(ref,raw)]
updateState $ \st -> st{ stateNotes = newnotes }
return $ Note contents

View file

@ -308,4 +308,10 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite",Str ":
, Para [Str "r1",Space,Str "bis"] ], [ BulletList
[ [ Plain [Str "b"] ]
, [ Plain [Str "b",Space,Str "2"] ]
, [ Plain [Str "b",Space,Str "2"] ] ] ], [ Plain [Str "c",Space,Str "c",Space,Str "2",Space,Str "c",Space,Str "2"] ] ] ] ]
, [ Plain [Str "b",Space,Str "2"] ] ] ], [ Plain [Str "c",Space,Str "c",Space,Str "2",Space,Str "c",Space,Str "2"] ] ] ]
, Header 1 [Str "Footnotes"]
, Para [Note [Para [Str "Note",Space,Str "with",Space,Str "one",Space,Str "line",Str "."]]]
, Para [Note [Para [Str "Note",Space,Str "with",Space,Str "continuation",Space,Str "line",Str "."]]]
, Para [Note [Para [Str "Note",Space,Str "with"],Para [Str "continuation",Space,Str "block",Str "."]]]
, Para [Note [Para [Str "Note",Space,Str "with",Space,Str "continuation",Space,Str "line"],Para [Str "and",Space,Str "a",Space,Str "second",Space,Str "para",Str "."]]]
, Para [Str "Not",Space,Str "in",Space,Str "note",Str "."] ]

View file

@ -508,3 +508,31 @@ Multiple blocks in a cell
| | - b 2 | c 2 |
| r1 bis | - b 2 | c 2 |
+------------------+-----------+------------+
Footnotes
=========
[1]_
[#]_
[#]_
[*]_
.. [1] Note with one line.
.. [#] Note with
continuation line.
.. [#] Note with
continuation block.
.. [*] Note with
continuation line
and a second para.
Not in note.