RST reader: remove support for nested inlines.

RST does not allow nested emphasis, links, or other inline
constructs.

Closes #4581, double parsing of links with URLs as
link text.  This supersedes the earlier fix for #4581
in 6419819b46.

Fixes #4561, a bug parsing with URLs inside emphasis.

Closes #4792.
This commit is contained in:
danse 2018-04-23 08:54:06 +02:00 committed by John MacFarlane
parent 50e8c3b107
commit be2d7921cb
3 changed files with 26 additions and 23 deletions

View file

@ -45,7 +45,6 @@ import Data.Maybe (fromMaybe, isJust)
import Data.Sequence (ViewR (..), viewr) import Data.Sequence (ViewR (..), viewr)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import Text.Pandoc.Walk (walk)
import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines) import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines)
import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad, fetchItem, readFileFromDirs) import Text.Pandoc.Class (PandocMonad, fetchItem, readFileFromDirs)
@ -1314,19 +1313,24 @@ table = gridTable False <|> simpleTable False <|>
inline :: PandocMonad m => RSTParser m Inlines inline :: PandocMonad m => RSTParser m Inlines
inline = choice [ note -- can start with whitespace, so try before ws inline = choice [ note -- can start with whitespace, so try before ws
, whitespace
, link , link
, str
, endline , endline
, strong , strong
, emph , emph
, code , code
, subst , subst
, interpretedRole , interpretedRole
, smart , inlineContent ] <?> "inline"
, hyphens
, escapedChar -- strings, spaces and other characters that can appear either by
, symbol ] <?> "inline" -- themselves or within inline markup
inlineContent :: PandocMonad m => RSTParser m Inlines
inlineContent = choice [ whitespace
, str
, smart
, hyphens
, escapedChar
, symbol ] <?> "inline content"
parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines
parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline) parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline)
@ -1369,11 +1373,11 @@ atStart p = do
emph :: PandocMonad m => RSTParser m Inlines emph :: PandocMonad m => RSTParser m Inlines
emph = B.emph . trimInlines . mconcat <$> emph = B.emph . trimInlines . mconcat <$>
enclosed (atStart $ char '*') (char '*') inline enclosed (atStart $ char '*') (char '*') inlineContent
strong :: PandocMonad m => RSTParser m Inlines strong :: PandocMonad m => RSTParser m Inlines
strong = B.strong . trimInlines . mconcat <$> strong = B.strong . trimInlines . mconcat <$>
enclosed (atStart $ string "**") (try $ string "**") inline enclosed (atStart $ string "**") (try $ string "**") inlineContent
-- Note, this doesn't precisely implement the complex rule in -- Note, this doesn't precisely implement the complex rule in
-- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
@ -1480,8 +1484,8 @@ explicitLink :: PandocMonad m => RSTParser m Inlines
explicitLink = try $ do explicitLink = try $ do
char '`' char '`'
notFollowedBy (char '`') -- `` marks start of inline code notFollowedBy (char '`') -- `` marks start of inline code
label' <- removeLinks . trimInlines . mconcat <$> label' <- trimInlines . mconcat <$>
manyTill (notFollowedBy (char '`') >> inline) (char '<') manyTill (notFollowedBy (char '`') >> inlineContent) (char '<')
src <- trim <$> manyTill (noneOf ">\n") (char '>') src <- trim <$> manyTill (noneOf ">\n") (char '>')
skipSpaces skipSpaces
string "`_" string "`_"
@ -1495,12 +1499,6 @@ explicitLink = try $ do
_ -> return ((src, ""), nullAttr) _ -> return ((src, ""), nullAttr)
return $ B.linkWith attr (escapeURI src') tit label'' return $ B.linkWith attr (escapeURI src') tit label''
removeLinks :: B.Inlines -> B.Inlines
removeLinks = B.fromList . walk (concatMap go) . B.toList
where go :: Inline -> [Inline]
go (Link _ lab _) = lab
go x = [x]
citationName :: PandocMonad m => RSTParser m String citationName :: PandocMonad m => RSTParser m String
citationName = do citationName = do
raw <- citationMarker raw <- citationMarker

View file

@ -188,4 +188,15 @@ tests = [ "line block with blank line" =:
] =?> ] =?>
para ("foo" <> note (para "bar")) para ("foo" <> note (para "bar"))
] ]
, testGroup "inlines"
[ "links can contain an URI without being parsed twice (#4581)" =:
"`http://loc <http://loc>`__" =?>
para (link "http://loc" "" "http://loc")
, "inline markup cannot be nested" =:
"**a*b*c**" =?>
para (strong "a*b*c")
, "bare URI parsing disabled inside emphasis (#4561)" =:
"*http://location*" =?>
para (emph (text "http://location"))
]
] ]

View file

@ -1,6 +0,0 @@
```
% pandoc -f rst -t native
`http://loc <http://loc>`__
^D
[Para [Link ("",[],[]) [Str "http://loc"] ("http://loc","")]]
```