RST reader: more accurate parsing of references.
Previously we erroneously included the enclosing backticks in a reference ID (closes #4156). This change also disables interpretation of syntax inside references, as in docutils. So, there is no emphasis in `my *link*`_
This commit is contained in:
parent
3361f85f8e
commit
b94f1e2045
3 changed files with 52 additions and 36 deletions
|
@ -1030,7 +1030,7 @@ noteBlock' marker = try $ do
|
||||||
citationMarker :: Monad m => RSTParser m [Char]
|
citationMarker :: Monad m => RSTParser m [Char]
|
||||||
citationMarker = do
|
citationMarker = do
|
||||||
char '['
|
char '['
|
||||||
res <- simpleReferenceName'
|
res <- simpleReferenceName
|
||||||
char ']'
|
char ']'
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
@ -1039,7 +1039,7 @@ noteMarker = do
|
||||||
char '['
|
char '['
|
||||||
res <- many1 digit
|
res <- many1 digit
|
||||||
<|>
|
<|>
|
||||||
try (char '#' >> liftM ('#':) simpleReferenceName')
|
try (char '#' >> liftM ('#':) simpleReferenceName)
|
||||||
<|> count 1 (oneOf "#*")
|
<|> count 1 (oneOf "#*")
|
||||||
char ']'
|
char ']'
|
||||||
return res
|
return res
|
||||||
|
@ -1048,34 +1048,24 @@ noteMarker = do
|
||||||
-- reference key
|
-- reference key
|
||||||
--
|
--
|
||||||
|
|
||||||
quotedReferenceName :: PandocMonad m => RSTParser m Inlines
|
quotedReferenceName :: PandocMonad m => RSTParser m String
|
||||||
quotedReferenceName = try $ do
|
quotedReferenceName = try $ do
|
||||||
char '`' >> notFollowedBy (char '`') -- `` means inline code!
|
char '`' >> notFollowedBy (char '`') -- `` means inline code!
|
||||||
trimInlines . mconcat <$> many1Till inline (char '`')
|
manyTill anyChar (char '`')
|
||||||
|
|
||||||
unquotedReferenceName :: PandocMonad m => RSTParser m Inlines
|
|
||||||
unquotedReferenceName = try $ do -- `` means inline code!
|
|
||||||
trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':')
|
|
||||||
|
|
||||||
-- Simple reference names are single words consisting of alphanumerics
|
-- Simple reference names are single words consisting of alphanumerics
|
||||||
-- plus isolated (no two adjacent) internal hyphens, underscores,
|
-- plus isolated (no two adjacent) internal hyphens, underscores,
|
||||||
-- periods, colons and plus signs; no whitespace or other characters
|
-- periods, colons and plus signs; no whitespace or other characters
|
||||||
-- are allowed.
|
-- are allowed.
|
||||||
simpleReferenceName' :: Monad m => ParserT [Char] st m String
|
simpleReferenceName :: Monad m => ParserT [Char] st m String
|
||||||
simpleReferenceName' = do
|
simpleReferenceName = do
|
||||||
x <- alphaNum
|
x <- alphaNum
|
||||||
xs <- many $ alphaNum
|
xs <- many $ alphaNum
|
||||||
<|>
|
<|> try (oneOf "-_:+." <* lookAhead alphaNum)
|
||||||
try (oneOf "-_:+." <* lookAhead alphaNum)
|
|
||||||
return (x:xs)
|
return (x:xs)
|
||||||
|
|
||||||
simpleReferenceName :: Monad m => ParserT [Char] st m Inlines
|
referenceName :: PandocMonad m => RSTParser m String
|
||||||
simpleReferenceName = B.str <$> simpleReferenceName'
|
referenceName = quotedReferenceName <|> simpleReferenceName
|
||||||
|
|
||||||
referenceName :: PandocMonad m => RSTParser m Inlines
|
|
||||||
referenceName = quotedReferenceName <|>
|
|
||||||
try (simpleReferenceName <* lookAhead (char ':')) <|>
|
|
||||||
unquotedReferenceName
|
|
||||||
|
|
||||||
referenceKey :: PandocMonad m => RSTParser m [Char]
|
referenceKey :: PandocMonad m => RSTParser m [Char]
|
||||||
referenceKey = do
|
referenceKey = do
|
||||||
|
@ -1123,16 +1113,17 @@ anonymousKey = try $ do
|
||||||
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
|
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
|
||||||
stateKeys s }
|
stateKeys s }
|
||||||
|
|
||||||
stripTicks :: String -> String
|
|
||||||
stripTicks = reverse . stripTick . reverse . stripTick
|
|
||||||
where stripTick ('`':xs) = xs
|
|
||||||
stripTick xs = xs
|
|
||||||
|
|
||||||
referenceNames :: PandocMonad m => RSTParser m [String]
|
referenceNames :: PandocMonad m => RSTParser m [String]
|
||||||
referenceNames = do
|
referenceNames = do
|
||||||
let rn = try $ do
|
let rn = try $ do
|
||||||
string ".. _"
|
string ".. _"
|
||||||
(_, ref) <- withRaw referenceName
|
ref <- quotedReferenceName
|
||||||
|
<|> many ( noneOf ":\n"
|
||||||
|
<|> try (char '\n' <*
|
||||||
|
string " " <*
|
||||||
|
notFollowedBy blankline)
|
||||||
|
<|> try (char ':' <* lookAhead alphaNum)
|
||||||
|
)
|
||||||
char ':'
|
char ':'
|
||||||
return ref
|
return ref
|
||||||
first <- rn
|
first <- rn
|
||||||
|
@ -1147,16 +1138,15 @@ regularKey = try $ do
|
||||||
refs <- referenceNames
|
refs <- referenceNames
|
||||||
src <- targetURI
|
src <- targetURI
|
||||||
guard $ not (null src)
|
guard $ not (null src)
|
||||||
let keys = map (toKey . stripTicks) refs
|
let keys = map toKey refs
|
||||||
forM_ keys $ \key ->
|
forM_ keys $ \key ->
|
||||||
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
|
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
|
||||||
stateKeys s }
|
stateKeys s }
|
||||||
|
|
||||||
anchorDef :: PandocMonad m => RSTParser m [Char]
|
anchorDef :: PandocMonad m => RSTParser m [Char]
|
||||||
anchorDef = try $ do
|
anchorDef = try $ do
|
||||||
(refs, raw) <- withRaw (try (referenceNames <* blanklines))
|
(refs, raw) <- withRaw $ try (referenceNames <* blanklines)
|
||||||
let keys = map stripTicks refs
|
forM_ refs $ \rawkey ->
|
||||||
forM_ keys $ \rawkey ->
|
|
||||||
updateState $ \s -> s { stateKeys =
|
updateState $ \s -> s { stateKeys =
|
||||||
M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s }
|
M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s }
|
||||||
-- keep this for 2nd round of parsing, where we'll add the divs (anchor)
|
-- keep this for 2nd round of parsing, where we'll add the divs (anchor)
|
||||||
|
@ -1479,22 +1469,20 @@ explicitLink = try $ do
|
||||||
_ -> return ((src, ""), nullAttr)
|
_ -> return ((src, ""), nullAttr)
|
||||||
return $ B.linkWith attr (escapeURI src') tit label''
|
return $ B.linkWith attr (escapeURI src') tit label''
|
||||||
|
|
||||||
citationName :: PandocMonad m => RSTParser m Inlines
|
citationName :: PandocMonad m => RSTParser m String
|
||||||
citationName = do
|
citationName = do
|
||||||
raw <- citationMarker
|
raw <- citationMarker
|
||||||
return $ B.str $ "[" ++ raw ++ "]"
|
return $ "[" ++ raw ++ "]"
|
||||||
|
|
||||||
referenceLink :: PandocMonad m => RSTParser m Inlines
|
referenceLink :: PandocMonad m => RSTParser m Inlines
|
||||||
referenceLink = try $ do
|
referenceLink = try $ do
|
||||||
(label',ref) <- withRaw (quotedReferenceName
|
ref <- (referenceName <|> citationName) <* char '_'
|
||||||
<|> simpleReferenceName
|
let label' = B.text ref
|
||||||
<|> citationName) <*
|
|
||||||
char '_'
|
|
||||||
let isAnonKey (Key ('_':_)) = True
|
let isAnonKey (Key ('_':_)) = True
|
||||||
isAnonKey _ = False
|
isAnonKey _ = False
|
||||||
state <- getState
|
state <- getState
|
||||||
let keyTable = stateKeys state
|
let keyTable = stateKeys state
|
||||||
key <- option (toKey $ stripTicks ref) $
|
key <- option (toKey ref) $
|
||||||
do char '_'
|
do char '_'
|
||||||
let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
|
let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
|
||||||
case anonKeys of
|
case anonKeys of
|
||||||
|
|
10
test/command/4156.md
Normal file
10
test/command/4156.md
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
```
|
||||||
|
% pandoc -f rst
|
||||||
|
.. _`SOMEID`:
|
||||||
|
|
||||||
|
foo
|
||||||
|
^D
|
||||||
|
<div id="SOMEID">
|
||||||
|
<p>foo</p>
|
||||||
|
</div>
|
||||||
|
```
|
18
test/command/rst-links.md
Normal file
18
test/command/rst-links.md
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
```
|
||||||
|
% pandoc -f rst
|
||||||
|
`*ab*`_
|
||||||
|
|
||||||
|
.. _`*ab*`: foo
|
||||||
|
^D
|
||||||
|
<p><a href="foo">*ab*</a></p>
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc -f rst
|
||||||
|
`A B
|
||||||
|
c`_
|
||||||
|
|
||||||
|
.. _A B C: foo
|
||||||
|
^D
|
||||||
|
<p><a href="foo">A B c</a></p>
|
||||||
|
```
|
Loading…
Reference in a new issue