Merge pull request #1297 from tarleb/citations
Org reader: support Pandocs citation extension
This commit is contained in:
commit
b5959b2007
5 changed files with 118 additions and 40 deletions
|
@ -275,6 +275,7 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
|
||||||
getDefaultExtensions "markdown_github" = githubMarkdownExtensions
|
getDefaultExtensions "markdown_github" = githubMarkdownExtensions
|
||||||
getDefaultExtensions "markdown" = pandocExtensions
|
getDefaultExtensions "markdown" = pandocExtensions
|
||||||
getDefaultExtensions "plain" = pandocExtensions
|
getDefaultExtensions "plain" = pandocExtensions
|
||||||
|
getDefaultExtensions "org" = Set.fromList [Ext_citations]
|
||||||
getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex]
|
getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex]
|
||||||
getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers]
|
getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers]
|
||||||
|
|
||||||
|
@ -319,4 +320,3 @@ readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy
|
||||||
|
|
||||||
writeJSON :: WriterOptions -> Pandoc -> String
|
writeJSON :: WriterOptions -> Pandoc -> String
|
||||||
writeJSON _ = UTF8.toStringLazy . encode
|
writeJSON _ = UTF8.toStringLazy . encode
|
||||||
|
|
||||||
|
|
|
@ -54,7 +54,6 @@ module Text.Pandoc.Parsing ( (>>~),
|
||||||
withRaw,
|
withRaw,
|
||||||
escaped,
|
escaped,
|
||||||
characterReference,
|
characterReference,
|
||||||
updateLastStrPos,
|
|
||||||
anyOrderedListMarker,
|
anyOrderedListMarker,
|
||||||
orderedListMarker,
|
orderedListMarker,
|
||||||
charRef,
|
charRef,
|
||||||
|
@ -66,11 +65,14 @@ module Text.Pandoc.Parsing ( (>>~),
|
||||||
testStringWith,
|
testStringWith,
|
||||||
guardEnabled,
|
guardEnabled,
|
||||||
guardDisabled,
|
guardDisabled,
|
||||||
|
updateLastStrPos,
|
||||||
|
notAfterString,
|
||||||
ParserState (..),
|
ParserState (..),
|
||||||
HasReaderOptions (..),
|
HasReaderOptions (..),
|
||||||
HasHeaderMap (..),
|
HasHeaderMap (..),
|
||||||
HasIdentifierList (..),
|
HasIdentifierList (..),
|
||||||
HasMacros (..),
|
HasMacros (..),
|
||||||
|
HasLastStrPosition (..),
|
||||||
defaultParserState,
|
defaultParserState,
|
||||||
HeaderType (..),
|
HeaderType (..),
|
||||||
ParserContext (..),
|
ParserContext (..),
|
||||||
|
@ -92,6 +94,7 @@ module Text.Pandoc.Parsing ( (>>~),
|
||||||
apostrophe,
|
apostrophe,
|
||||||
dash,
|
dash,
|
||||||
nested,
|
nested,
|
||||||
|
citeKey,
|
||||||
macro,
|
macro,
|
||||||
applyMacros',
|
applyMacros',
|
||||||
Parser,
|
Parser,
|
||||||
|
@ -904,6 +907,14 @@ instance HasMacros ParserState where
|
||||||
extractMacros = stateMacros
|
extractMacros = stateMacros
|
||||||
updateMacros f st = st{ stateMacros = f $ stateMacros st }
|
updateMacros f st = st{ stateMacros = f $ stateMacros st }
|
||||||
|
|
||||||
|
class HasLastStrPosition st where
|
||||||
|
setLastStrPos :: SourcePos -> st -> st
|
||||||
|
getLastStrPos :: st -> Maybe SourcePos
|
||||||
|
|
||||||
|
instance HasLastStrPosition ParserState where
|
||||||
|
setLastStrPos pos st = st{ stateLastStrPos = Just pos }
|
||||||
|
getLastStrPos st = stateLastStrPos st
|
||||||
|
|
||||||
defaultParserState :: ParserState
|
defaultParserState :: ParserState
|
||||||
defaultParserState =
|
defaultParserState =
|
||||||
ParserState { stateOptions = def,
|
ParserState { stateOptions = def,
|
||||||
|
@ -938,6 +949,17 @@ guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
|
||||||
guardDisabled :: HasReaderOptions st => Extension -> Parser s st ()
|
guardDisabled :: HasReaderOptions st => Extension -> Parser s st ()
|
||||||
guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
|
guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
|
||||||
|
|
||||||
|
-- | Update the position on which the last string ended.
|
||||||
|
updateLastStrPos :: HasLastStrPosition st => Parser s st ()
|
||||||
|
updateLastStrPos = getPosition >>= updateState . setLastStrPos
|
||||||
|
|
||||||
|
-- | Whether we are right after the end of a string.
|
||||||
|
notAfterString :: HasLastStrPosition st => Parser s st Bool
|
||||||
|
notAfterString = do
|
||||||
|
pos <- getPosition
|
||||||
|
st <- getState
|
||||||
|
return $ getLastStrPos st /= Just pos
|
||||||
|
|
||||||
data HeaderType
|
data HeaderType
|
||||||
= SingleHeader Char -- ^ Single line of characters underneath
|
= SingleHeader Char -- ^ Single line of characters underneath
|
||||||
| DoubleHeader Char -- ^ Lines of characters above and below
|
| DoubleHeader Char -- ^ Lines of characters above and below
|
||||||
|
@ -1049,17 +1071,11 @@ charOrRef cs =
|
||||||
guard (c `elem` cs)
|
guard (c `elem` cs)
|
||||||
return c)
|
return c)
|
||||||
|
|
||||||
updateLastStrPos :: Parser [Char] ParserState ()
|
|
||||||
updateLastStrPos = getPosition >>= \p ->
|
|
||||||
updateState $ \s -> s{ stateLastStrPos = Just p }
|
|
||||||
|
|
||||||
singleQuoteStart :: Parser [Char] ParserState ()
|
singleQuoteStart :: Parser [Char] ParserState ()
|
||||||
singleQuoteStart = do
|
singleQuoteStart = do
|
||||||
failIfInQuoteContext InSingleQuote
|
failIfInQuoteContext InSingleQuote
|
||||||
pos <- getPosition
|
|
||||||
st <- getState
|
|
||||||
-- single quote start can't be right after str
|
-- single quote start can't be right after str
|
||||||
guard $ stateLastStrPos st /= Just pos
|
guard =<< notAfterString
|
||||||
() <$ charOrRef "'\8216\145"
|
() <$ charOrRef "'\8216\145"
|
||||||
|
|
||||||
singleQuoteEnd :: Parser [Char] st ()
|
singleQuoteEnd :: Parser [Char] st ()
|
||||||
|
@ -1129,6 +1145,18 @@ nested p = do
|
||||||
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
|
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
citeKey :: HasLastStrPosition st => Parser [Char] st (Bool, String)
|
||||||
|
citeKey = try $ do
|
||||||
|
guard =<< notAfterString
|
||||||
|
suppress_author <- option False (char '-' *> return True)
|
||||||
|
char '@'
|
||||||
|
firstChar <- letter <|> char '_'
|
||||||
|
let regchar = satisfy (\c -> isAlphaNum c || c == '_')
|
||||||
|
let internal p = try $ p <* lookAhead regchar
|
||||||
|
rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
|
||||||
|
let key = firstChar:rest
|
||||||
|
return (suppress_author, key)
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Macros
|
-- Macros
|
||||||
--
|
--
|
||||||
|
@ -1156,4 +1184,3 @@ applyMacros' target = do
|
||||||
then do macros <- extractMacros `fmap` getState
|
then do macros <- extractMacros `fmap` getState
|
||||||
return $ applyMacros macros target
|
return $ applyMacros macros target
|
||||||
else return target
|
else return target
|
||||||
|
|
||||||
|
|
|
@ -1474,9 +1474,7 @@ strongOrEmph = enclosure '*' <|> (checkIntraword >> enclosure '_')
|
||||||
where checkIntraword = do
|
where checkIntraword = do
|
||||||
exts <- getOption readerExtensions
|
exts <- getOption readerExtensions
|
||||||
when (Ext_intraword_underscores `Set.member` exts) $ do
|
when (Ext_intraword_underscores `Set.member` exts) $ do
|
||||||
pos <- getPosition
|
guard =<< notAfterString
|
||||||
lastStrPos <- stateLastStrPos <$> getState
|
|
||||||
guard $ lastStrPos /= Just pos
|
|
||||||
|
|
||||||
-- | Parses a list of inlines between start and end delimiters.
|
-- | Parses a list of inlines between start and end delimiters.
|
||||||
inlinesBetween :: (Show b)
|
inlinesBetween :: (Show b)
|
||||||
|
@ -1518,8 +1516,7 @@ nonEndline = satisfy (/='\n')
|
||||||
str :: MarkdownParser (F Inlines)
|
str :: MarkdownParser (F Inlines)
|
||||||
str = do
|
str = do
|
||||||
result <- many1 alphaNum
|
result <- many1 alphaNum
|
||||||
pos <- getPosition
|
updateLastStrPos
|
||||||
updateState $ \s -> s{ stateLastStrPos = Just pos }
|
|
||||||
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
|
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
|
||||||
isSmart <- getOption readerSmart
|
isSmart <- getOption readerSmart
|
||||||
if isSmart
|
if isSmart
|
||||||
|
@ -1817,22 +1814,6 @@ normalCite = try $ do
|
||||||
char ']'
|
char ']'
|
||||||
return citations
|
return citations
|
||||||
|
|
||||||
citeKey :: MarkdownParser (Bool, String)
|
|
||||||
citeKey = try $ do
|
|
||||||
-- make sure we're not right after an alphanumeric,
|
|
||||||
-- since foo@bar.baz is probably an email address
|
|
||||||
lastStrPos <- stateLastStrPos <$> getState
|
|
||||||
pos <- getPosition
|
|
||||||
guard $ lastStrPos /= Just pos
|
|
||||||
suppress_author <- option False (char '-' >> return True)
|
|
||||||
char '@'
|
|
||||||
first <- letter <|> char '_'
|
|
||||||
let regchar = satisfy (\c -> isAlphaNum c || c == '_')
|
|
||||||
let internal p = try $ p >>~ lookAhead regchar
|
|
||||||
rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
|
|
||||||
let key = first:rest
|
|
||||||
return (suppress_author, key)
|
|
||||||
|
|
||||||
suffix :: MarkdownParser (F Inlines)
|
suffix :: MarkdownParser (F Inlines)
|
||||||
suffix = try $ do
|
suffix = try $ do
|
||||||
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
|
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
|
||||||
|
|
|
@ -105,6 +105,10 @@ instance HasMeta OrgParserState where
|
||||||
deleteMeta field st =
|
deleteMeta field st =
|
||||||
st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
|
st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
|
||||||
|
|
||||||
|
instance HasLastStrPosition OrgParserState where
|
||||||
|
getLastStrPos = orgStateLastStrPos
|
||||||
|
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
|
||||||
|
|
||||||
instance Default OrgParserState where
|
instance Default OrgParserState where
|
||||||
def = defaultOrgParserState
|
def = defaultOrgParserState
|
||||||
|
|
||||||
|
@ -865,6 +869,7 @@ inline :: OrgParser (F Inlines)
|
||||||
inline =
|
inline =
|
||||||
choice [ whitespace
|
choice [ whitespace
|
||||||
, linebreak
|
, linebreak
|
||||||
|
, cite
|
||||||
, footnote
|
, footnote
|
||||||
, linkOrImage
|
, linkOrImage
|
||||||
, anchor
|
, anchor
|
||||||
|
@ -929,6 +934,51 @@ endline = try $ do
|
||||||
updateLastPreCharPos
|
updateLastPreCharPos
|
||||||
return . return $ B.space
|
return . return $ B.space
|
||||||
|
|
||||||
|
cite :: OrgParser (F Inlines)
|
||||||
|
cite = try $ do
|
||||||
|
guardEnabled Ext_citations
|
||||||
|
(cs, raw) <- withRaw normalCite
|
||||||
|
return $ (flip B.cite (B.text raw)) <$> cs
|
||||||
|
|
||||||
|
normalCite :: OrgParser (F [Citation])
|
||||||
|
normalCite = try $ char '['
|
||||||
|
*> skipSpaces
|
||||||
|
*> citeList
|
||||||
|
<* skipSpaces
|
||||||
|
<* char ']'
|
||||||
|
|
||||||
|
citeList :: OrgParser (F [Citation])
|
||||||
|
citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
|
||||||
|
|
||||||
|
citation :: OrgParser (F Citation)
|
||||||
|
citation = try $ do
|
||||||
|
pref <- prefix
|
||||||
|
(suppress_author, key) <- citeKey
|
||||||
|
suff <- suffix
|
||||||
|
return $ do
|
||||||
|
x <- pref
|
||||||
|
y <- suff
|
||||||
|
return $ Citation{ citationId = key
|
||||||
|
, citationPrefix = B.toList x
|
||||||
|
, citationSuffix = B.toList y
|
||||||
|
, citationMode = if suppress_author
|
||||||
|
then SuppressAuthor
|
||||||
|
else NormalCitation
|
||||||
|
, citationNoteNum = 0
|
||||||
|
, citationHash = 0
|
||||||
|
}
|
||||||
|
where
|
||||||
|
prefix = trimInlinesF . mconcat <$>
|
||||||
|
manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
|
||||||
|
suffix = try $ do
|
||||||
|
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
|
||||||
|
skipSpaces
|
||||||
|
rest <- trimInlinesF . mconcat <$>
|
||||||
|
many (notFollowedBy (oneOf ";]") *> inline)
|
||||||
|
return $ if hasSpace
|
||||||
|
then (B.space <>) <$> rest
|
||||||
|
else rest
|
||||||
|
|
||||||
footnote :: OrgParser (F Inlines)
|
footnote :: OrgParser (F Inlines)
|
||||||
footnote = try $ inlineNote <|> referencedNote
|
footnote = try $ inlineNote <|> referencedNote
|
||||||
|
|
||||||
|
@ -1003,7 +1053,7 @@ selfTarget :: OrgParser String
|
||||||
selfTarget = try $ char '[' *> linkTarget <* char ']'
|
selfTarget = try $ char '[' *> linkTarget <* char ']'
|
||||||
|
|
||||||
linkTarget :: OrgParser String
|
linkTarget :: OrgParser String
|
||||||
linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
|
linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
|
||||||
|
|
||||||
applyCustomLinkFormat :: String -> OrgParser (F String)
|
applyCustomLinkFormat :: String -> OrgParser (F String)
|
||||||
applyCustomLinkFormat link = do
|
applyCustomLinkFormat link = do
|
||||||
|
@ -1079,7 +1129,12 @@ inlineCodeBlock = try $ do
|
||||||
let attrClasses = [translateLang lang, rundocBlockClass]
|
let attrClasses = [translateLang lang, rundocBlockClass]
|
||||||
let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
|
let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
|
||||||
returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
|
returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
|
||||||
where enclosedByPair s e p = char s *> many1Till p (char e)
|
|
||||||
|
enclosedByPair :: Char -- ^ opening char
|
||||||
|
-> Char -- ^ closing char
|
||||||
|
-> OrgParser a -- ^ parser
|
||||||
|
-> OrgParser [a]
|
||||||
|
enclosedByPair s e p = char s *> many1Till p (char e)
|
||||||
|
|
||||||
emph :: OrgParser (F Inlines)
|
emph :: OrgParser (F Inlines)
|
||||||
emph = fmap B.emph <$> emphasisBetween '/'
|
emph = fmap B.emph <$> emphasisBetween '/'
|
||||||
|
@ -1274,13 +1329,6 @@ afterEmphasisPreChar = do
|
||||||
lastPrePos <- orgStateLastPreCharPos <$> getState
|
lastPrePos <- orgStateLastPreCharPos <$> getState
|
||||||
return . fromMaybe True $ (== pos) <$> lastPrePos
|
return . fromMaybe True $ (== pos) <$> lastPrePos
|
||||||
|
|
||||||
-- | Whether we are right after the end of a string
|
|
||||||
notAfterString :: OrgParser Bool
|
|
||||||
notAfterString = do
|
|
||||||
pos <- getPosition
|
|
||||||
lastStrPos <- orgStateLastStrPos <$> getState
|
|
||||||
return $ lastStrPos /= Just pos
|
|
||||||
|
|
||||||
-- | Whether the parser is right after a forbidden border char
|
-- | Whether the parser is right after a forbidden border char
|
||||||
notAfterForbiddenBorderChar :: OrgParser Bool
|
notAfterForbiddenBorderChar :: OrgParser Bool
|
||||||
notAfterForbiddenBorderChar = do
|
notAfterForbiddenBorderChar = do
|
||||||
|
|
|
@ -225,6 +225,28 @@ tests =
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
"echo 'Hello, World'")
|
"echo 'Hello, World'")
|
||||||
|
|
||||||
|
, "Citation" =:
|
||||||
|
"[@nonexistent]" =?>
|
||||||
|
let citation = Citation
|
||||||
|
{ citationId = "nonexistent"
|
||||||
|
, citationPrefix = []
|
||||||
|
, citationSuffix = []
|
||||||
|
, citationMode = NormalCitation
|
||||||
|
, citationNoteNum = 0
|
||||||
|
, citationHash = 0}
|
||||||
|
in (para $ cite [citation] "[@nonexistent]")
|
||||||
|
|
||||||
|
, "Citation containing text" =:
|
||||||
|
"[see @item1 p. 34-35]" =?>
|
||||||
|
let citation = Citation
|
||||||
|
{ citationId = "item1"
|
||||||
|
, citationPrefix = [Str "see"]
|
||||||
|
, citationSuffix = [Space ,Str "p.",Space,Str "34-35"]
|
||||||
|
, citationMode = NormalCitation
|
||||||
|
, citationNoteNum = 0
|
||||||
|
, citationHash = 0}
|
||||||
|
in (para $ cite [citation] "[see @item1 p. 34-35]")
|
||||||
]
|
]
|
||||||
|
|
||||||
, testGroup "Meta Information" $
|
, testGroup "Meta Information" $
|
||||||
|
|
Loading…
Add table
Reference in a new issue