Merge pull request #1805 from bergey/rst

RST Reader - Improved Role Support
This commit is contained in:
John MacFarlane 2014-12-15 09:06:45 -08:00
commit a864e9a348
6 changed files with 135 additions and 77 deletions

View file

@ -201,19 +201,18 @@ parseFormatSpec = parse formatSpec ""
'-' -> Set.delete ext
_ -> Set.insert ext
-- auxiliary function for readers:
markdown :: ReaderOptions -> String -> IO Pandoc
markdown o s = do
let (doc, warnings) = readMarkdownWithWarnings o s
mapM_ warn warnings
return doc
data Reader = StringReader (ReaderOptions -> String -> IO Pandoc)
| ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Pandoc, MediaBag))
mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader
mkStringReader r = StringReader (\o s -> return $ r o s)
mkStringReaderWithWarnings :: (ReaderOptions -> String -> (Pandoc, [String])) -> Reader
mkStringReaderWithWarnings r = StringReader $ \o s -> do
let (doc, warnings) = r o s
mapM_ warn warnings
return doc
mkBSReader :: (ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)) -> Reader
mkBSReader r = ByteStringReader (\o s -> return $ r o s)
@ -221,12 +220,12 @@ mkBSReader r = ByteStringReader (\o s -> return $ r o s)
readers :: [(String, Reader)]
readers = [ ("native" , StringReader $ \_ s -> return $ readNative s)
,("json" , mkStringReader readJSON )
,("markdown" , StringReader markdown)
,("markdown_strict" , StringReader markdown)
,("markdown_phpextra" , StringReader markdown)
,("markdown_github" , StringReader markdown)
,("markdown_mmd", StringReader markdown)
,("rst" , mkStringReader readRST )
,("markdown" , mkStringReaderWithWarnings readMarkdownWithWarnings)
,("markdown_strict" , mkStringReaderWithWarnings readMarkdownWithWarnings)
,("markdown_phpextra" , mkStringReaderWithWarnings readMarkdownWithWarnings)
,("markdown_github" , mkStringReaderWithWarnings readMarkdownWithWarnings)
,("markdown_mmd", mkStringReaderWithWarnings readMarkdownWithWarnings)
,("rst" , mkStringReaderWithWarnings readRSTWithWarnings )
,("mediawiki" , mkStringReader readMediaWiki)
,("docbook" , mkStringReader readDocBook)
,("opml" , mkStringReader readOPML)

View file

@ -65,6 +65,7 @@ module Text.Pandoc.Parsing ( anyLine,
widthsFromIndices,
gridTableWith,
readWith,
readWithWarnings,
readWithM,
testStringWith,
guardEnabled,
@ -162,6 +163,7 @@ module Text.Pandoc.Parsing ( anyLine,
setSourceColumn,
setSourceLine,
newPos,
addWarning
)
where
@ -880,6 +882,15 @@ readWith :: Parser [Char] st a
-> a
readWith p t inp = runIdentity $ readWithM p t inp
readWithWarnings :: Parser [Char] ParserState a
-> ParserState
-> String
-> (a, [String])
readWithWarnings p = readWith $ do
doc <- p
warnings <- stateWarnings <$> getState
return (doc, warnings)
-- | Parse a string with @parser@ (for testing).
testStringWith :: (Show a, Stream [Char] Identity Char)
=> ParserT [Char] ParserState Identity a
@ -910,10 +921,9 @@ data ParserState = ParserState
stateHasChapters :: Bool, -- ^ True if \chapter encountered
stateMacros :: [Macro], -- ^ List of macros defined so far
stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role
stateRstCustomRoles :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles
stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles
-- Triple represents: 1) Base role, 2) Optional format (only for :raw:
-- roles), 3) Source language annotation for code (could be used to
-- annotate role classes too).
-- roles), 3) Additional classes (rest of Attr is unused)).
stateCaption :: Maybe Inlines, -- ^ Caption in current environment
stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed
stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context
@ -1245,3 +1255,10 @@ applyMacros' target = do
then do macros <- extractMacros <$> getState
return $ applyMacros macros target
else return target
-- | Append a warning to the log.
addWarning :: Maybe SourcePos -> String -> Parser [Char] ParserState ()
addWarning mbpos msg =
updateState $ \st -> st{
stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) :
stateWarnings st }

View file

@ -79,11 +79,7 @@ readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> (Pandoc, [String])
readMarkdownWithWarnings opts s =
(readWith parseMarkdownWithWarnings) def{ stateOptions = opts } (s ++ "\n\n")
where parseMarkdownWithWarnings = do
doc <- parseMarkdown
warnings <- stateWarnings <$> getState
return (doc, warnings)
(readWithWarnings parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
trimInlinesF :: F Inlines -> F Inlines
trimInlinesF = liftM trimInlines
@ -343,12 +339,6 @@ parseMarkdown = do
let Pandoc _ bs = B.doc $ runF blocks st
return $ Pandoc meta bs
addWarning :: Maybe SourcePos -> String -> MarkdownParser ()
addWarning mbpos msg =
updateState $ \st -> st{
stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) :
stateWarnings st }
referenceKey :: MarkdownParser (F Blocks)
referenceKey = try $ do
pos <- getPosition

View file

@ -29,20 +29,21 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion from reStructuredText to 'Pandoc' document.
-}
module Text.Pandoc.Readers.RST (
readRST
readRST,
readRSTWithWarnings
) where
import Text.Pandoc.Definition
import Text.Pandoc.Builder (setMeta, fromList)
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.Pandoc.Options
import Control.Monad ( when, liftM, guard, mzero, mplus )
import Control.Monad ( when, liftM, guard, mzero )
import Data.List ( findIndex, intersperse, intercalate,
transpose, sort, deleteFirstsBy, isSuffixOf )
transpose, sort, deleteFirstsBy, isSuffixOf , nub, union)
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
import Text.Printf ( printf )
import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>))
import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>), pure)
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import qualified Text.Pandoc.Builder as B
import Data.Monoid (mconcat, mempty)
@ -55,6 +56,9 @@ readRST :: ReaderOptions -- ^ Reader options
-> Pandoc
readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n")
readRSTWithWarnings :: ReaderOptions -> String -> (Pandoc, [String])
readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n")
type RSTParser = Parser [Char] ParserState
--
@ -608,38 +612,62 @@ directive' = do
"" -> block
_ -> parseFromString parseBlocks body'
return $ B.divWith attrs children
_ -> return mempty
other -> do
pos <- getPosition
addWarning (Just pos) $ "ignoring unknown directive: " ++ other
return mempty
-- TODO:
-- - Silently ignores illegal fields
-- - Silently drops classes
-- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix
addNewRole :: String -> [(String, String)] -> RSTParser Blocks
addNewRole roleString fields = do
(role, parentRole) <- parseFromString inheritedRole roleString
customRoles <- stateRstCustomRoles <$> getState
baseRole <- case M.lookup parentRole customRoles of
Just (base, _, _) -> return base
Nothing -> return parentRole
let fmt = if baseRole == "raw" then lookup "format" fields else Nothing
annotate = maybe id addLanguage $
if baseRole == "code"
let (baseRole, baseFmt, baseAttr) =
maybe (parentRole, Nothing, nullAttr) id $
M.lookup parentRole customRoles
fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
annotate :: [String] -> [String]
annotate = maybe id (:) $
if parentRole == "code"
then lookup "language" fields
else Nothing
attr = let (ident, classes, keyValues) = baseAttr
-- nub in case role name & language class are the same
in (ident, nub . (role :) . annotate $ classes, keyValues)
-- warn about syntax we ignore
flip mapM_ fields $ \(key, _) -> case key of
"language" -> when (parentRole /= "code") $ addWarning Nothing $
"ignoring :language: field because the parent of role :" ++
role ++ ": is :" ++ parentRole ++ ": not :code:"
"format" -> when (parentRole /= "raw") $ addWarning Nothing $
"ignoring :format: field because the parent of role :" ++
role ++ ": is :" ++ parentRole ++ ": not :raw:"
_ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++
": in definition of role :" ++ role ++ ": in"
when (parentRole == "raw" && countKeys "format" > 1) $
addWarning Nothing $
"ignoring :format: fields after the first in the definition of role :"
++ role ++": in"
when (parentRole == "code" && countKeys "language" > 1) $
addWarning Nothing $
"ignoring :language: fields after the first in the definition of role :"
++ role ++": in"
updateState $ \s -> s {
stateRstCustomRoles =
M.insert role (baseRole, fmt, (,) parentRole . annotate) customRoles
M.insert role (baseRole, fmt, attr) customRoles
}
return $ B.singleton Null
where
addLanguage lang (ident, classes, keyValues) =
(ident, "sourceCode" : lang : classes, keyValues)
countKeys k = length . filter (== k) . map fst $ fields
inheritedRole =
(,) <$> roleNameEndingIn (char '(') <*> roleNameEndingIn (char ')')
(,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span")
-- Can contain character codes as decimal numbers or
-- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u
@ -999,21 +1027,23 @@ renderRole contents fmt role attr = case role of
"RFC" -> return $ rfcLink contents
"pep-reference" -> return $ pepLink contents
"PEP" -> return $ pepLink contents
"literal" -> return $ B.str contents
"literal" -> return $ B.codeWith attr contents
"math" -> return $ B.math contents
"title-reference" -> titleRef contents
"title" -> titleRef contents
"t" -> titleRef contents
"code" -> return $ B.codeWith attr contents
"code" -> return $ B.codeWith (addClass "sourceCode" attr) contents
"span" -> return $ B.spanWith attr $ B.str contents
"raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
custom -> do
customRole <- stateRstCustomRoles <$> getState
case M.lookup custom customRole of
Just (_, newFmt, inherit) -> let
fmtStr = fmt `mplus` newFmt
(newRole, newAttr) = inherit attr
in renderRole contents fmtStr newRole newAttr
Nothing -> return $ B.str contents -- Undefined role
customRoles <- stateRstCustomRoles <$> getState
case M.lookup custom customRoles of
Just (newRole, newFmt, newAttr) ->
renderRole contents newFmt newRole newAttr
Nothing -> do
pos <- getPosition
addWarning (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in"
return $ B.str contents -- Undefined role
where
titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
@ -1022,11 +1052,14 @@ renderRole contents fmt role attr = case role of
where padNo = replicate (4 - length pepNo) '0' ++ pepNo
pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
roleNameEndingIn :: RSTParser Char -> RSTParser String
roleNameEndingIn end = many1Till (letter <|> char '-') end
addClass :: String -> Attr -> Attr
addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues)
roleName :: RSTParser String
roleName = many1 (letter <|> char '-')
roleMarker :: RSTParser String
roleMarker = char ':' *> roleNameEndingIn (char ':')
roleMarker = char ':' *> roleName <* char ':'
roleBefore :: RSTParser (String,String)
roleBefore = try $ do

View file

@ -67,26 +67,45 @@ tests = [ "line block with blank line" =:
link "http://foo.bar.baz" "" "http://foo.bar.baz" <> ". " <>
link "http://foo.bar/baz_(bam)" "" "http://foo.bar/baz_(bam)"
<> " (" <> link "http://foo.bar" "" "http://foo.bar" <> ")")
, "indented literal block" =: unlines
[ "::"
, ""
, " block quotes"
, ""
, " can go on for many lines"
, "but must stop here"]
=?> (doc $
codeBlock "block quotes\n\ncan go on for many lines" <>
para "but must stop here")
, "line block with 3 lines" =: "| a\n| b\n| c"
=?> para ("a" <> linebreak <> "b" <> linebreak <> "c")
, testGroup "literal / line / code blocks"
[ "indented literal block" =: unlines
[ "::"
, ""
, " block quotes"
, ""
, " can go on for many lines"
, "but must stop here"]
=?> (doc $
codeBlock "block quotes\n\ncan go on for many lines" <>
para "but must stop here")
, "line block with 3 lines" =: "| a\n| b\n| c"
=?> para ("a" <> linebreak <> "b" <> linebreak <> "c")
, "quoted literal block using >" =: "::\n\n> quoted\n> block\n\nOrdinary paragraph"
=?> codeBlock "> quoted\n> block" <> para "Ordinary paragraph"
, "quoted literal block using | (not a line block)" =: "::\n\n| quoted\n| block\n\nOrdinary paragraph"
=?> codeBlock "| quoted\n| block" <> para "Ordinary paragraph"
, "class directive with single paragraph" =: ".. class:: special\n\nThis is a \"special\" paragraph."
=?> divWith ("", ["special"], []) (para "This is a \"special\" paragraph.")
, "class directive with two paragraphs" =: ".. class:: exceptional remarkable\n\n First paragraph.\n\n Second paragraph."
=?> divWith ("", ["exceptional", "remarkable"], []) (para "First paragraph." <> para "Second paragraph.")
, "class directive around literal block" =: ".. class:: classy\n\n::\n\n a\n b"
=?> divWith ("", ["classy"], []) (codeBlock "a\nb")
, "class directive with single paragraph" =: ".. class:: special\n\nThis is a \"special\" paragraph."
=?> divWith ("", ["special"], []) (para "This is a \"special\" paragraph.")
, "class directive with two paragraphs" =: ".. class:: exceptional remarkable\n\n First paragraph.\n\n Second paragraph."
=?> divWith ("", ["exceptional", "remarkable"], []) (para "First paragraph." <> para "Second paragraph.")
, "class directive around literal block" =: ".. class:: classy\n\n::\n\n a\n b"
=?> divWith ("", ["classy"], []) (codeBlock "a\nb")]
, testGroup "interpreted text roles"
[ "literal role prefix" =: ":literal:`a`" =?> para (code "a")
, "literal role postfix" =: "`a`:literal:" =?> para (code "a")
, "literal text" =: "``text``" =?> para (code "text")
, "code role" =: ":code:`a`" =?> para (codeWith ("", ["sourceCode"], []) "a")
, "inherited code role" =: ".. role:: codeLike(code)\n\n:codeLike:`a`"
=?> para (codeWith ("", ["codeLike", "sourceCode"], []) "a")
, "custom code role with language field"
=: ".. role:: lhs(code)\n :language: haskell\n\n:lhs:`a`"
=?> para (codeWith ("", ["lhs", "haskell","sourceCode"], []) "a")
, "custom role with unspecified parent role"
=: ".. role:: classy\n\n:classy:`text`"
=?> para (spanWith ("", ["classy"], []) "text")
, "role with recursive inheritance"
=: ".. role:: haskell(code)\n.. role:: lhs(haskell)\n\n:lhs:`text`"
=?> para (codeWith ("", ["lhs", "haskell", "sourceCode"], []) "text")
, "unknown role" =: ":unknown:`text`" =?> para (str "text")
]
]

View file

@ -322,12 +322,12 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
,Null
,Para [Str "And",Space,Str "now",Space,Str "with",Space,RawInline (Format "html") "<b>inline</b> <span id=\"test\">HTML</span>",Str "."]
,Null
,Para [Str "And",Space,Str "some",Space,Str "inline",Space,Str "haskell",Space,Code ("",["sourceCode","haskell"],[]) "fmap id [1,2..10]",Str "."]
,Para [Str "And",Space,Str "some",Space,Str "inline",Space,Str "haskell",Space,Code ("",["haskell","sourceCode"],[]) "fmap id [1,2..10]",Str "."]
,Null
,Null
,Para [Str "Indirect",Space,Str "python",Space,Str "role",Space,Code ("",["sourceCode","python"],[]) "[x*x for x in [1,2,3,4,5]]",Str "."]
,Para [Str "Indirect",Space,Str "python",Space,Str "role",Space,Code ("",["python","indirect","sourceCode"],[]) "[x*x for x in [1,2,3,4,5]]",Str "."]
,Null
,Null
,Para [Str "Different",Space,Str "indirect",Space,Str "C",Space,Code ("",["sourceCode","c"],[]) "int x = 15;",Str "."]
,Para [Str "Different",Space,Str "indirect",Space,Str "C",Space,Code ("",["c","different-indirect","sourceCode"],[]) "int x = 15;",Str "."]
,Header 2 ("literal-symbols",[],[]) [Str "Literal",Space,Str "symbols"]
,Para [Str "2*2",Space,Str "=",Space,Str "4*1"]]