From 3f09f53459b877f53072efbf57dec21fa37280b5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 13 May 2021 21:11:52 -0700 Subject: [PATCH] Implement curly-brace syntax for Markdown citation keys. The change provides a way to use citation keys that contain special characters not usable with the standard citation key syntax. Example: `@{foo_bar{x}'}` for the key `foo_bar{x}`. Closes #6026. The change requires adding a new parameter to the `citeKey` parser from Text.Pandoc.Parsing [API change]. Markdown reader: recognize @{..} syntax for citatinos. Markdown writer: use @{..} syntax for citations when needed. Update manual with curly-brace syntax for citations. Closes #6026. --- MANUAL.txt | 15 ++++++++++----- src/Text/Pandoc/Parsing.hs | 18 +++++++++++++----- src/Text/Pandoc/Readers/Markdown.hs | 6 +++--- src/Text/Pandoc/Readers/Org/Inlines.hs | 8 ++++---- src/Text/Pandoc/Writers/Markdown/Inline.hs | 9 +++++++-- test/command/6026.md | 19 +++++++++++++++++++ 6 files changed, 56 insertions(+), 19 deletions(-) create mode 100644 test/command/6026.md diff --git a/MANUAL.txt b/MANUAL.txt index 75fe599e2..6f06d1e8a 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -4949,12 +4949,15 @@ Inline and regular footnotes may be mixed freely. #### Extension: `citations` #### Markdown citations go inside square brackets and are separated -by semicolons. Each citation must have a key, composed of '@' + -the citation identifier from the database, and may optionally -have a prefix, a locator, and a suffix. The citation key must -begin with a letter, digit, or `_`, and may contain +by semicolons. Each citation must have a key and may optionally +have a prefix, a locator, and a suffix. The citation key +consists of `@` plus the citation identifier, possibly +enclosed in curly braces. If the identifier starts +with a letter, digit, or `_`, followed by zero or more alphanumerics, `_`, and internal punctuation characters -(`:.#$%&-+?<>~/`). Here are some examples: +(`:.#$%&-+?<>~/`), then the curly braces may be omitted. +Identifiers may not contain whitespace characters or unbalanced +curly braces. Here are some examples: Blah blah [see @doe99, pp. 33-35; also @smith04, chap. 1]. @@ -4962,6 +4965,8 @@ alphanumerics, `_`, and internal punctuation characters Blah blah [@smith04; @doe99]. + Blah blah [@{https://example.com/bib?name=foobar&date=2000}, p. 33]. + `pandoc` detects locator terms in the [CSL locale files]. Either abbreviated or unabbreviated forms are accepted. In the `en-US` locale, locator terms can be written in either singular or plural forms, diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index cbe9993c6..0bb794ba1 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1605,19 +1605,27 @@ nested p = do return res citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st) - => ParserT s st m (Bool, Text) -citeKey = try $ do + => Bool -- ^ If True, allow expanded @{..} syntax. + -> ParserT s st m (Bool, Text) +citeKey allowBraced = try $ do guard =<< notAfterString suppress_author <- option False (True <$ char '-') char '@' + key <- simpleCiteIdentifier + <|> if allowBraced + then charsInBalanced '{' '}' (satisfy (not . isSpace)) + else mzero + return (suppress_author, key) + +simpleCiteIdentifier :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m Text +simpleCiteIdentifier = do firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite let regchar = satisfy (\c -> isAlphaNum c || c == '_') let internal p = try $ p <* lookAhead regchar rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|> try (oneOf ":/" <* lookAhead (char '/')) - let key = firstChar:rest - return (suppress_author, T.pack key) - + return $ T.pack $ firstChar:rest token :: (Stream s m t) => (t -> Text) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 2d20ff018..34f16ab4e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -2094,7 +2094,7 @@ cite = do textualCite :: PandocMonad m => MarkdownParser m (F Inlines) textualCite = try $ do - (suppressAuthor, key) <- citeKey + (suppressAuthor, key) <- citeKey True -- If this is a reference to an earlier example list item, -- then don't parse it as a citation. If the example list -- item comes later, we'll parse it here and figure out in @@ -2174,7 +2174,7 @@ prefix = trimInlinesF . mconcat <$> manyTill inline (char ']' <|> lookAhead (try $ do optional (try (char ';' >> spnl)) - citeKey + citeKey True return ']')) citeList :: PandocMonad m => MarkdownParser m (F [Citation]) @@ -2183,7 +2183,7 @@ citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) citation :: PandocMonad m => MarkdownParser m (F Citation) citation = try $ do pref <- prefix - (suppress_author, key) <- citeKey + (suppress_author, key) <- citeKey True suff <- suffix noteNum <- stateNoteNumber <$> getState return $ do diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 054f2611a..6862dd71e 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -263,7 +263,7 @@ berkeleyCitationList = try $ do where citationListPart :: PandocMonad m => OrgParser m (F Inlines) citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do - notFollowedBy' citeKey + notFollowedBy' $ citeKey False notFollowedBy (oneOf ";]") inline @@ -278,7 +278,7 @@ berkeleyBareTag' = try $ void (string "cite") berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation]) berkeleyTextualCite = try $ do - (suppressAuthor, key) <- citeKey + (suppressAuthor, key) <- citeKey False returnF . return $ Citation { citationId = key , citationPrefix = mempty @@ -351,7 +351,7 @@ citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) citation :: PandocMonad m => OrgParser m (F Citation) citation = try $ do pref <- prefix - (suppress_author, key) <- citeKey + (suppress_author, key) <- citeKey False suff <- suffix return $ do x <- pref @@ -368,7 +368,7 @@ citation = try $ do } where prefix = trimInlinesF . mconcat <$> - manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) + manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False))) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) skipSpaces diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs index e35e1a0b9..2062050e4 100644 --- a/src/Text/Pandoc/Writers/Markdown/Inline.hs +++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs @@ -491,11 +491,16 @@ inlineToMarkdown opts (Cite (c:cs) lst) rest <- mapM convertOne cs let inbr = suffs <+> joincits rest br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' - return $ literal ("@" <> citationId c) <+> br + return $ literal ("@" <> maybeInBraces (citationId c)) <+> br else do cits <- mapM convertOne (c:cs) return $ literal "[" <> joincits cits <> literal "]" where + maybeInBraces key = + case readWith (citeKey False >> spaces >> eof) + defaultParserState ("@" <> key) of + Left _ -> "{" <> key <> "}" + Right _ -> key joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty) convertOne Citation { citationId = k , citationPrefix = pinlines @@ -504,7 +509,7 @@ inlineToMarkdown opts (Cite (c:cs) lst) = do pdoc <- inlineListToMarkdown opts pinlines sdoc <- inlineListToMarkdown opts sinlines - let k' = literal (modekey m <> "@" <> k) + let k' = literal (modekey m <> "@" <> maybeInBraces k) r = case sinlines of Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc _ -> k' <+> sdoc diff --git a/test/command/6026.md b/test/command/6026.md new file mode 100644 index 000000000..5e18a5f42 --- /dev/null +++ b/test/command/6026.md @@ -0,0 +1,19 @@ +``` +% pandoc -t native +@{https://openreview.net/forum?id=HkwoSDPgg} + +@https://openreview.net/forum?id=HkwoSDPgg +^D +[Para [Cite [Citation {citationId = "https://openreview.net/forum?id=HkwoSDPgg", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 1, citationHash = 0}] [Str "@https://openreview.net/forum?id=HkwoSDPgg"]] +,Para [Cite [Citation {citationId = "https://openreview.net/forum?id", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 2, citationHash = 0}] [Str "@https://openreview.net/forum?id"],Str "=HkwoSDPgg"]] +``` +``` +% pandoc -t markdown +@{https://openreview.net/forum?id=HkwoSDPgg} + +@https://openreview.net/forum?id=HkwoSDPgg +^D +@{https://openreview.net/forum?id=HkwoSDPgg} + +@https://openreview.net/forum?id=HkwoSDPgg +```