Implement curly-brace syntax for Markdown citation keys.

The change provides a way to use citation keys that contain
special characters not usable with the standard citation
key syntax.  Example: `@{foo_bar{x}'}` for the key `foo_bar{x}`.
Closes #6026.

The change requires adding a new parameter to the `citeKey`
parser from Text.Pandoc.Parsing [API change].

Markdown reader: recognize @{..} syntax for citatinos.

Markdown writer:  use @{..} syntax for citations when needed.

Update manual with curly-brace syntax for citations.

Closes #6026.
This commit is contained in:
John MacFarlane 2021-05-13 21:11:52 -07:00
parent c46482bfc3
commit 3f09f53459
6 changed files with 56 additions and 19 deletions

View file

@ -4949,12 +4949,15 @@ Inline and regular footnotes may be mixed freely.
#### Extension: `citations` ####
Markdown citations go inside square brackets and are separated
by semicolons. Each citation must have a key, composed of '@' +
the citation identifier from the database, and may optionally
have a prefix, a locator, and a suffix. The citation key must
begin with a letter, digit, or `_`, and may contain
by semicolons. Each citation must have a key and may optionally
have a prefix, a locator, and a suffix. The citation key
consists of `@` plus the citation identifier, possibly
enclosed in curly braces. If the identifier starts
with a letter, digit, or `_`, followed by zero or more
alphanumerics, `_`, and internal punctuation characters
(`:.#$%&-+?<>~/`). Here are some examples:
(`:.#$%&-+?<>~/`), then the curly braces may be omitted.
Identifiers may not contain whitespace characters or unbalanced
curly braces. Here are some examples:
Blah blah [see @doe99, pp. 33-35; also @smith04, chap. 1].
@ -4962,6 +4965,8 @@ alphanumerics, `_`, and internal punctuation characters
Blah blah [@smith04; @doe99].
Blah blah [@{https://example.com/bib?name=foobar&date=2000}, p. 33].
`pandoc` detects locator terms in the [CSL locale files].
Either abbreviated or unabbreviated forms are accepted. In the `en-US`
locale, locator terms can be written in either singular or plural forms,

View file

@ -1605,19 +1605,27 @@ nested p = do
return res
citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st)
=> ParserT s st m (Bool, Text)
citeKey = try $ do
=> Bool -- ^ If True, allow expanded @{..} syntax.
-> ParserT s st m (Bool, Text)
citeKey allowBraced = try $ do
guard =<< notAfterString
suppress_author <- option False (True <$ char '-')
char '@'
key <- simpleCiteIdentifier
<|> if allowBraced
then charsInBalanced '{' '}' (satisfy (not . isSpace))
else mzero
return (suppress_author, key)
simpleCiteIdentifier :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Text
simpleCiteIdentifier = do
firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite
let regchar = satisfy (\c -> isAlphaNum c || c == '_')
let internal p = try $ p <* lookAhead regchar
rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|>
try (oneOf ":/" <* lookAhead (char '/'))
let key = firstChar:rest
return (suppress_author, T.pack key)
return $ T.pack $ firstChar:rest
token :: (Stream s m t)
=> (t -> Text)

View file

@ -2094,7 +2094,7 @@ cite = do
textualCite :: PandocMonad m => MarkdownParser m (F Inlines)
textualCite = try $ do
(suppressAuthor, key) <- citeKey
(suppressAuthor, key) <- citeKey True
-- If this is a reference to an earlier example list item,
-- then don't parse it as a citation. If the example list
-- item comes later, we'll parse it here and figure out in
@ -2174,7 +2174,7 @@ prefix = trimInlinesF . mconcat <$>
manyTill inline (char ']'
<|> lookAhead
(try $ do optional (try (char ';' >> spnl))
citeKey
citeKey True
return ']'))
citeList :: PandocMonad m => MarkdownParser m (F [Citation])
@ -2183,7 +2183,7 @@ citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
citation :: PandocMonad m => MarkdownParser m (F Citation)
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
(suppress_author, key) <- citeKey True
suff <- suffix
noteNum <- stateNoteNumber <$> getState
return $ do

View file

@ -263,7 +263,7 @@ berkeleyCitationList = try $ do
where
citationListPart :: PandocMonad m => OrgParser m (F Inlines)
citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
notFollowedBy' citeKey
notFollowedBy' $ citeKey False
notFollowedBy (oneOf ";]")
inline
@ -278,7 +278,7 @@ berkeleyBareTag' = try $ void (string "cite")
berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
berkeleyTextualCite = try $ do
(suppressAuthor, key) <- citeKey
(suppressAuthor, key) <- citeKey False
returnF . return $ Citation
{ citationId = key
, citationPrefix = mempty
@ -351,7 +351,7 @@ citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
citation :: PandocMonad m => OrgParser m (F Citation)
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
(suppress_author, key) <- citeKey False
suff <- suffix
return $ do
x <- pref
@ -368,7 +368,7 @@ citation = try $ do
}
where
prefix = trimInlinesF . mconcat <$>
manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False)))
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
skipSpaces

View file

@ -491,11 +491,16 @@ inlineToMarkdown opts (Cite (c:cs) lst)
rest <- mapM convertOne cs
let inbr = suffs <+> joincits rest
br = if isEmpty inbr then empty else char '[' <> inbr <> char ']'
return $ literal ("@" <> citationId c) <+> br
return $ literal ("@" <> maybeInBraces (citationId c)) <+> br
else do
cits <- mapM convertOne (c:cs)
return $ literal "[" <> joincits cits <> literal "]"
where
maybeInBraces key =
case readWith (citeKey False >> spaces >> eof)
defaultParserState ("@" <> key) of
Left _ -> "{" <> key <> "}"
Right _ -> key
joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty)
convertOne Citation { citationId = k
, citationPrefix = pinlines
@ -504,7 +509,7 @@ inlineToMarkdown opts (Cite (c:cs) lst)
= do
pdoc <- inlineListToMarkdown opts pinlines
sdoc <- inlineListToMarkdown opts sinlines
let k' = literal (modekey m <> "@" <> k)
let k' = literal (modekey m <> "@" <> maybeInBraces k)
r = case sinlines of
Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc
_ -> k' <+> sdoc

19
test/command/6026.md Normal file
View file

@ -0,0 +1,19 @@
```
% pandoc -t native
@{https://openreview.net/forum?id=HkwoSDPgg}
@https://openreview.net/forum?id=HkwoSDPgg
^D
[Para [Cite [Citation {citationId = "https://openreview.net/forum?id=HkwoSDPgg", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 1, citationHash = 0}] [Str "@https://openreview.net/forum?id=HkwoSDPgg"]]
,Para [Cite [Citation {citationId = "https://openreview.net/forum?id", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 2, citationHash = 0}] [Str "@https://openreview.net/forum?id"],Str "=HkwoSDPgg"]]
```
```
% pandoc -t markdown
@{https://openreview.net/forum?id=HkwoSDPgg}
@https://openreview.net/forum?id=HkwoSDPgg
^D
@{https://openreview.net/forum?id=HkwoSDPgg}
@https://openreview.net/forum?id=HkwoSDPgg
```