RST reader: more accurate parsing of references.

Previously we erroneously included the enclosing
backticks in a reference ID (closes #4156).

This change also disables interpretation of
syntax inside references, as in docutils.
So, there is no emphasis in

    `my *link*`_
This commit is contained in:
John MacFarlane 2017-12-14 12:47:15 -08:00
parent 3361f85f8e
commit b94f1e2045
3 changed files with 52 additions and 36 deletions

View file

@ -1030,7 +1030,7 @@ noteBlock' marker = try $ do
citationMarker :: Monad m => RSTParser m [Char]
citationMarker = do
char '['
res <- simpleReferenceName'
res <- simpleReferenceName
char ']'
return res
@ -1039,7 +1039,7 @@ noteMarker = do
char '['
res <- many1 digit
<|>
try (char '#' >> liftM ('#':) simpleReferenceName')
try (char '#' >> liftM ('#':) simpleReferenceName)
<|> count 1 (oneOf "#*")
char ']'
return res
@ -1048,34 +1048,24 @@ noteMarker = do
-- reference key
--
quotedReferenceName :: PandocMonad m => RSTParser m Inlines
quotedReferenceName :: PandocMonad m => RSTParser m String
quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`') -- `` means inline code!
trimInlines . mconcat <$> many1Till inline (char '`')
unquotedReferenceName :: PandocMonad m => RSTParser m Inlines
unquotedReferenceName = try $ do -- `` means inline code!
trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':')
manyTill anyChar (char '`')
-- Simple reference names are single words consisting of alphanumerics
-- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters
-- are allowed.
simpleReferenceName' :: Monad m => ParserT [Char] st m String
simpleReferenceName' = do
simpleReferenceName :: Monad m => ParserT [Char] st m String
simpleReferenceName = do
x <- alphaNum
xs <- many $ alphaNum
<|>
try (oneOf "-_:+." <* lookAhead alphaNum)
<|> try (oneOf "-_:+." <* lookAhead alphaNum)
return (x:xs)
simpleReferenceName :: Monad m => ParserT [Char] st m Inlines
simpleReferenceName = B.str <$> simpleReferenceName'
referenceName :: PandocMonad m => RSTParser m Inlines
referenceName = quotedReferenceName <|>
try (simpleReferenceName <* lookAhead (char ':')) <|>
unquotedReferenceName
referenceName :: PandocMonad m => RSTParser m String
referenceName = quotedReferenceName <|> simpleReferenceName
referenceKey :: PandocMonad m => RSTParser m [Char]
referenceKey = do
@ -1123,16 +1113,17 @@ anonymousKey = try $ do
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s }
stripTicks :: String -> String
stripTicks = reverse . stripTick . reverse . stripTick
where stripTick ('`':xs) = xs
stripTick xs = xs
referenceNames :: PandocMonad m => RSTParser m [String]
referenceNames = do
let rn = try $ do
string ".. _"
(_, ref) <- withRaw referenceName
ref <- quotedReferenceName
<|> many ( noneOf ":\n"
<|> try (char '\n' <*
string " " <*
notFollowedBy blankline)
<|> try (char ':' <* lookAhead alphaNum)
)
char ':'
return ref
first <- rn
@ -1147,16 +1138,15 @@ regularKey = try $ do
refs <- referenceNames
src <- targetURI
guard $ not (null src)
let keys = map (toKey . stripTicks) refs
let keys = map toKey refs
forM_ keys $ \key ->
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s }
anchorDef :: PandocMonad m => RSTParser m [Char]
anchorDef = try $ do
(refs, raw) <- withRaw (try (referenceNames <* blanklines))
let keys = map stripTicks refs
forM_ keys $ \rawkey ->
(refs, raw) <- withRaw $ try (referenceNames <* blanklines)
forM_ refs $ \rawkey ->
updateState $ \s -> s { stateKeys =
M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s }
-- keep this for 2nd round of parsing, where we'll add the divs (anchor)
@ -1479,22 +1469,20 @@ explicitLink = try $ do
_ -> return ((src, ""), nullAttr)
return $ B.linkWith attr (escapeURI src') tit label''
citationName :: PandocMonad m => RSTParser m Inlines
citationName :: PandocMonad m => RSTParser m String
citationName = do
raw <- citationMarker
return $ B.str $ "[" ++ raw ++ "]"
return $ "[" ++ raw ++ "]"
referenceLink :: PandocMonad m => RSTParser m Inlines
referenceLink = try $ do
(label',ref) <- withRaw (quotedReferenceName
<|> simpleReferenceName
<|> citationName) <*
char '_'
ref <- (referenceName <|> citationName) <* char '_'
let label' = B.text ref
let isAnonKey (Key ('_':_)) = True
isAnonKey _ = False
state <- getState
let keyTable = stateKeys state
key <- option (toKey $ stripTicks ref) $
key <- option (toKey ref) $
do char '_'
let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
case anonKeys of

10
test/command/4156.md Normal file
View file

@ -0,0 +1,10 @@
```
% pandoc -f rst
.. _`SOMEID`:
foo
^D
<div id="SOMEID">
<p>foo</p>
</div>
```

18
test/command/rst-links.md Normal file
View file

@ -0,0 +1,18 @@
```
% pandoc -f rst
`*ab*`_
.. _`*ab*`: foo
^D
<p><a href="foo">*ab*</a></p>
```
```
% pandoc -f rst
`A B
c`_
.. _A B C: foo
^D
<p><a href="foo">A B c</a></p>
```