RST reader: more accurate parsing of references.

Previously we erroneously included the enclosing
backticks in a reference ID (closes #4156).

This change also disables interpretation of
syntax inside references, as in docutils.
So, there is no emphasis in

    `my *link*`_
This commit is contained in:
John MacFarlane 2017-12-14 12:47:15 -08:00
parent 3361f85f8e
commit b94f1e2045
3 changed files with 52 additions and 36 deletions

View file

@ -1030,7 +1030,7 @@ noteBlock' marker = try $ do
citationMarker :: Monad m => RSTParser m [Char] citationMarker :: Monad m => RSTParser m [Char]
citationMarker = do citationMarker = do
char '[' char '['
res <- simpleReferenceName' res <- simpleReferenceName
char ']' char ']'
return res return res
@ -1039,7 +1039,7 @@ noteMarker = do
char '[' char '['
res <- many1 digit res <- many1 digit
<|> <|>
try (char '#' >> liftM ('#':) simpleReferenceName') try (char '#' >> liftM ('#':) simpleReferenceName)
<|> count 1 (oneOf "#*") <|> count 1 (oneOf "#*")
char ']' char ']'
return res return res
@ -1048,34 +1048,24 @@ noteMarker = do
-- reference key -- reference key
-- --
quotedReferenceName :: PandocMonad m => RSTParser m Inlines quotedReferenceName :: PandocMonad m => RSTParser m String
quotedReferenceName = try $ do quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`') -- `` means inline code! char '`' >> notFollowedBy (char '`') -- `` means inline code!
trimInlines . mconcat <$> many1Till inline (char '`') manyTill anyChar (char '`')
unquotedReferenceName :: PandocMonad m => RSTParser m Inlines
unquotedReferenceName = try $ do -- `` means inline code!
trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':')
-- Simple reference names are single words consisting of alphanumerics -- Simple reference names are single words consisting of alphanumerics
-- plus isolated (no two adjacent) internal hyphens, underscores, -- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters -- periods, colons and plus signs; no whitespace or other characters
-- are allowed. -- are allowed.
simpleReferenceName' :: Monad m => ParserT [Char] st m String simpleReferenceName :: Monad m => ParserT [Char] st m String
simpleReferenceName' = do simpleReferenceName = do
x <- alphaNum x <- alphaNum
xs <- many $ alphaNum xs <- many $ alphaNum
<|> <|> try (oneOf "-_:+." <* lookAhead alphaNum)
try (oneOf "-_:+." <* lookAhead alphaNum)
return (x:xs) return (x:xs)
simpleReferenceName :: Monad m => ParserT [Char] st m Inlines referenceName :: PandocMonad m => RSTParser m String
simpleReferenceName = B.str <$> simpleReferenceName' referenceName = quotedReferenceName <|> simpleReferenceName
referenceName :: PandocMonad m => RSTParser m Inlines
referenceName = quotedReferenceName <|>
try (simpleReferenceName <* lookAhead (char ':')) <|>
unquotedReferenceName
referenceKey :: PandocMonad m => RSTParser m [Char] referenceKey :: PandocMonad m => RSTParser m [Char]
referenceKey = do referenceKey = do
@ -1123,16 +1113,17 @@ anonymousKey = try $ do
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s } stateKeys s }
stripTicks :: String -> String
stripTicks = reverse . stripTick . reverse . stripTick
where stripTick ('`':xs) = xs
stripTick xs = xs
referenceNames :: PandocMonad m => RSTParser m [String] referenceNames :: PandocMonad m => RSTParser m [String]
referenceNames = do referenceNames = do
let rn = try $ do let rn = try $ do
string ".. _" string ".. _"
(_, ref) <- withRaw referenceName ref <- quotedReferenceName
<|> many ( noneOf ":\n"
<|> try (char '\n' <*
string " " <*
notFollowedBy blankline)
<|> try (char ':' <* lookAhead alphaNum)
)
char ':' char ':'
return ref return ref
first <- rn first <- rn
@ -1147,16 +1138,15 @@ regularKey = try $ do
refs <- referenceNames refs <- referenceNames
src <- targetURI src <- targetURI
guard $ not (null src) guard $ not (null src)
let keys = map (toKey . stripTicks) refs let keys = map toKey refs
forM_ keys $ \key -> forM_ keys $ \key ->
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s } stateKeys s }
anchorDef :: PandocMonad m => RSTParser m [Char] anchorDef :: PandocMonad m => RSTParser m [Char]
anchorDef = try $ do anchorDef = try $ do
(refs, raw) <- withRaw (try (referenceNames <* blanklines)) (refs, raw) <- withRaw $ try (referenceNames <* blanklines)
let keys = map stripTicks refs forM_ refs $ \rawkey ->
forM_ keys $ \rawkey ->
updateState $ \s -> s { stateKeys = updateState $ \s -> s { stateKeys =
M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s } M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s }
-- keep this for 2nd round of parsing, where we'll add the divs (anchor) -- keep this for 2nd round of parsing, where we'll add the divs (anchor)
@ -1479,22 +1469,20 @@ explicitLink = try $ do
_ -> return ((src, ""), nullAttr) _ -> return ((src, ""), nullAttr)
return $ B.linkWith attr (escapeURI src') tit label'' return $ B.linkWith attr (escapeURI src') tit label''
citationName :: PandocMonad m => RSTParser m Inlines citationName :: PandocMonad m => RSTParser m String
citationName = do citationName = do
raw <- citationMarker raw <- citationMarker
return $ B.str $ "[" ++ raw ++ "]" return $ "[" ++ raw ++ "]"
referenceLink :: PandocMonad m => RSTParser m Inlines referenceLink :: PandocMonad m => RSTParser m Inlines
referenceLink = try $ do referenceLink = try $ do
(label',ref) <- withRaw (quotedReferenceName ref <- (referenceName <|> citationName) <* char '_'
<|> simpleReferenceName let label' = B.text ref
<|> citationName) <*
char '_'
let isAnonKey (Key ('_':_)) = True let isAnonKey (Key ('_':_)) = True
isAnonKey _ = False isAnonKey _ = False
state <- getState state <- getState
let keyTable = stateKeys state let keyTable = stateKeys state
key <- option (toKey $ stripTicks ref) $ key <- option (toKey ref) $
do char '_' do char '_'
let anonKeys = sort $ filter isAnonKey $ M.keys keyTable let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
case anonKeys of case anonKeys of

10
test/command/4156.md Normal file
View file

@ -0,0 +1,10 @@
```
% pandoc -f rst
.. _`SOMEID`:
foo
^D
<div id="SOMEID">
<p>foo</p>
</div>
```

18
test/command/rst-links.md Normal file
View file

@ -0,0 +1,18 @@
```
% pandoc -f rst
`*ab*`_
.. _`*ab*`: foo
^D
<p><a href="foo">*ab*</a></p>
```
```
% pandoc -f rst
`A B
c`_
.. _A B C: foo
^D
<p><a href="foo">A B c</a></p>
```