Enhanced Pandoc's support for rST roles.

rST parser now supports:
    - All built-in rST roles
    - New role definition
    - Role inheritance

Issues/TODO:
    - Silently ignores illegal fields on roles
    - Silently drops class annotations for roles
    - Only supports :format: fields with a single format for :raw: roles,
      requires a change to Text.Pandoc.Definition.Format to support multiple
      formats.
    - Allows direct use of :raw: role, rST only allows indirect (i.e.,
      inherited use of :raw:).
This commit is contained in:
Merijn Verstraaten 2014-02-15 17:51:33 +01:00
parent 286781f801
commit fe246ce01c
4 changed files with 117 additions and 10 deletions

View file

@ -853,6 +853,7 @@ data ParserState = ParserState
stateHasChapters :: Bool, -- ^ True if \chapter encountered
stateMacros :: [Macro], -- ^ List of macros defined so far
stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role
stateRstCustomRoles :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles
stateWarnings :: [String] -- ^ Warnings generated by the parser
}
@ -915,6 +916,7 @@ defaultParserState =
stateHasChapters = False,
stateMacros = [],
stateRstDefaultRole = "title-reference",
stateRstCustomRoles = M.empty,
stateWarnings = []}
getOption :: (ReaderOptions -> a) -> Parser s ParserState a

View file

@ -36,12 +36,13 @@ import Text.Pandoc.Builder (setMeta, fromList)
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.Pandoc.Options
import Control.Monad ( when, liftM, guard, mzero )
import Control.Monad ( when, liftM, guard, mzero, mplus )
import Data.List ( findIndex, intersperse, intercalate,
transpose, sort, deleteFirstsBy, isSuffixOf )
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
import Text.Printf ( printf )
import Control.Applicative ((<$>), (<$), (<*), (*>))
import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>))
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import qualified Text.Pandoc.Builder as B
import Data.Monoid (mconcat, mempty)
@ -530,7 +531,7 @@ directive' = do
let body' = body ++ "\n\n"
case label of
"raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
"role" -> return mempty
"role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
"container" -> parseFromString parseBlocks body'
"replace" -> B.para <$> -- consumed by substKey
parseFromString (trimInlines . mconcat <$> many inline)
@ -591,7 +592,38 @@ directive' = do
Nothing -> B.image src "" alt
_ -> return mempty
-- Can contain haracter codes as decimal numbers or
-- TODO:
-- - Silently ignores illegal fields
-- - Silently drops classes
-- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix
addNewRole :: String -> [(String, String)] -> RSTParser Blocks
addNewRole roleString fields = do
(role, parentRole) <- parseFromString inheritedRole roleString
customRoles <- stateRstCustomRoles <$> getState
baseRole <- case M.lookup parentRole customRoles of
Just (base, _, _) -> return base
Nothing -> return parentRole
let fmt = if baseRole == "raw" then lookup "format" fields else Nothing
annotate = maybe id addLanguage $
if baseRole == "code"
then lookup "language" fields
else Nothing
updateState $ \s -> s {
stateRstCustomRoles =
M.insert role (baseRole, fmt, (,) parentRole . annotate) customRoles
}
return $ B.singleton Null
where
addLanguage lang (ident, classes, keyValues) =
(ident, "sourceCode" : lang : classes, keyValues)
inheritedRole =
(,) <$> roleNameEndingIn (char '(') <*> roleNameEndingIn (char ')')
-- Can contain character codes as decimal numbers or
-- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u
-- or as XML-style hexadecimal character entities, e.g. &#x1a2b;
-- or text, which is used as-is. Comments start with ..
@ -930,17 +962,56 @@ strong = B.strong . trimInlines . mconcat <$>
-- Note, this doesn't precisely implement the complex rule in
-- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
-- but it should be good enough for most purposes
--
-- TODO:
-- - Classes are silently discarded in addNewRole
-- - Lacks sensible implementation for title-reference (which is the default)
-- - Allows direct use of the :raw: role, rST only allows inherited use.
interpretedRole :: RSTParser Inlines
interpretedRole = try $ do
(role, contents) <- roleBefore <|> roleAfter
case role of
"sup" -> return $ B.superscript $ B.str contents
"sub" -> return $ B.subscript $ B.str contents
"math" -> return $ B.math contents
_ -> return $ B.str contents --unknown
renderRole contents Nothing role nullAttr
renderRole :: String -> Maybe String -> String -> Attr -> RSTParser Inlines
renderRole contents fmt role attr = case role of
"sup" -> return $ B.superscript $ B.str contents
"superscript" -> return $ B.superscript $ B.str contents
"sub" -> return $ B.subscript $ B.str contents
"subscript" -> return $ B.subscript $ B.str contents
"emphasis" -> return $ B.emph $ B.str contents
"strong" -> return $ B.strong $ B.str contents
"rfc-reference" -> return $ rfcLink contents
"RFC" -> return $ rfcLink contents
"pep-reference" -> return $ pepLink contents
"PEP" -> return $ pepLink contents
"literal" -> return $ B.str contents
"math" -> return $ B.math contents
"title-reference" -> titleRef contents
"title" -> titleRef contents
"t" -> titleRef contents
"code" -> return $ B.codeWith attr contents
"raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
custom -> do
customRole <- stateRstCustomRoles <$> getState
case M.lookup custom customRole of
Just (_, newFmt, inherit) -> let
fmtStr = fmt `mplus` newFmt
(newRole, newAttr) = inherit attr
in renderRole contents fmtStr newRole newAttr
Nothing -> return $ B.str contents -- Undefined role
where
titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
where padNo = replicate (4 - length pepNo) '0' ++ pepNo
pepUrl = "http://http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
roleNameEndingIn :: RSTParser Char -> RSTParser String
roleNameEndingIn end = many1Till (letter <|> char '-') end
roleMarker :: RSTParser String
roleMarker = char ':' *> many1Till (letter <|> char '-') (char ':')
roleMarker = char ':' *> roleNameEndingIn (char ':')
roleBefore :: RSTParser (String,String)
roleBefore = try $ do

View file

@ -319,5 +319,15 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
,Para [Str "Some",Space,Superscript [Str "of"],Space,Str "these",Space,Superscript [Str "words"],Space,Str "are",Space,Str "in",Space,Superscript [Str "superscript"],Str "."]
,Para [Str "Reset",Space,Str "default-role",Space,Str "to",Space,Str "the",Space,Str "default",Space,Str "default."]
,Para [Str "And",Space,Str "now",Space,Str "some-invalid-string-3231231",Space,Str "is",Space,Str "nonsense."]
,Null
,Para [Str "And",Space,Str "now",Space,Str "with",Space,RawInline (Format "html") "<b>inline</b> <span id=\"test\">HTML</span>",Str "."]
,Null
,Para [Str "And",Space,Str "some",Space,Str "inline",Space,Str "haskell",Space,Code ("",["sourceCode","haskell"],[]) "fmap id [1,2..10]",Str "."]
,Null
,Null
,Para [Str "Indirect",Space,Str "python",Space,Str "role",Space,Code ("",["sourceCode","python"],[]) "[x*x for x in [1,2,3,4,5]]",Str "."]
,Null
,Null
,Para [Str "Different",Space,Str "indirect",Space,Str "C",Space,Code ("",["sourceCode","c"],[]) "int x = 15;",Str "."]
,Header 2 ("literal-symbols",[],[]) [Str "Literal",Space,Str "symbols"]
,Para [Str "2*2",Space,Str "=",Space,Str "4*1"]]

View file

@ -599,6 +599,30 @@ Reset default-role to the default default.
And now `some-invalid-string-3231231` is nonsense.
.. role:: html(raw)
:format: html
And now with :html:`<b>inline</b> <span id="test">HTML</span>`.
.. role:: haskell(code)
:language: haskell
And some inline haskell :haskell:`fmap id [1,2..10]`.
.. role:: indirect(code)
.. role:: python(indirect)
:language: python
Indirect python role :python:`[x*x for x in [1,2,3,4,5]]`.
.. role:: different-indirect(code)
:language: c
.. role:: c(different-indirect)
Different indirect C :c:`int x = 15;`.
Literal symbols
---------------