Merge pull request #1297 from tarleb/citations
Org reader: support Pandocs citation extension
This commit is contained in:
commit
b5959b2007
5 changed files with 118 additions and 40 deletions
|
@ -275,6 +275,7 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
|
|||
getDefaultExtensions "markdown_github" = githubMarkdownExtensions
|
||||
getDefaultExtensions "markdown" = pandocExtensions
|
||||
getDefaultExtensions "plain" = pandocExtensions
|
||||
getDefaultExtensions "org" = Set.fromList [Ext_citations]
|
||||
getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex]
|
||||
getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers]
|
||||
|
||||
|
@ -319,4 +320,3 @@ readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy
|
|||
|
||||
writeJSON :: WriterOptions -> Pandoc -> String
|
||||
writeJSON _ = UTF8.toStringLazy . encode
|
||||
|
||||
|
|
|
@ -54,7 +54,6 @@ module Text.Pandoc.Parsing ( (>>~),
|
|||
withRaw,
|
||||
escaped,
|
||||
characterReference,
|
||||
updateLastStrPos,
|
||||
anyOrderedListMarker,
|
||||
orderedListMarker,
|
||||
charRef,
|
||||
|
@ -66,11 +65,14 @@ module Text.Pandoc.Parsing ( (>>~),
|
|||
testStringWith,
|
||||
guardEnabled,
|
||||
guardDisabled,
|
||||
updateLastStrPos,
|
||||
notAfterString,
|
||||
ParserState (..),
|
||||
HasReaderOptions (..),
|
||||
HasHeaderMap (..),
|
||||
HasIdentifierList (..),
|
||||
HasMacros (..),
|
||||
HasLastStrPosition (..),
|
||||
defaultParserState,
|
||||
HeaderType (..),
|
||||
ParserContext (..),
|
||||
|
@ -92,6 +94,7 @@ module Text.Pandoc.Parsing ( (>>~),
|
|||
apostrophe,
|
||||
dash,
|
||||
nested,
|
||||
citeKey,
|
||||
macro,
|
||||
applyMacros',
|
||||
Parser,
|
||||
|
@ -904,6 +907,14 @@ instance HasMacros ParserState where
|
|||
extractMacros = stateMacros
|
||||
updateMacros f st = st{ stateMacros = f $ stateMacros st }
|
||||
|
||||
class HasLastStrPosition st where
|
||||
setLastStrPos :: SourcePos -> st -> st
|
||||
getLastStrPos :: st -> Maybe SourcePos
|
||||
|
||||
instance HasLastStrPosition ParserState where
|
||||
setLastStrPos pos st = st{ stateLastStrPos = Just pos }
|
||||
getLastStrPos st = stateLastStrPos st
|
||||
|
||||
defaultParserState :: ParserState
|
||||
defaultParserState =
|
||||
ParserState { stateOptions = def,
|
||||
|
@ -938,6 +949,17 @@ guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
|
|||
guardDisabled :: HasReaderOptions st => Extension -> Parser s st ()
|
||||
guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
|
||||
|
||||
-- | Update the position on which the last string ended.
|
||||
updateLastStrPos :: HasLastStrPosition st => Parser s st ()
|
||||
updateLastStrPos = getPosition >>= updateState . setLastStrPos
|
||||
|
||||
-- | Whether we are right after the end of a string.
|
||||
notAfterString :: HasLastStrPosition st => Parser s st Bool
|
||||
notAfterString = do
|
||||
pos <- getPosition
|
||||
st <- getState
|
||||
return $ getLastStrPos st /= Just pos
|
||||
|
||||
data HeaderType
|
||||
= SingleHeader Char -- ^ Single line of characters underneath
|
||||
| DoubleHeader Char -- ^ Lines of characters above and below
|
||||
|
@ -1049,17 +1071,11 @@ charOrRef cs =
|
|||
guard (c `elem` cs)
|
||||
return c)
|
||||
|
||||
updateLastStrPos :: Parser [Char] ParserState ()
|
||||
updateLastStrPos = getPosition >>= \p ->
|
||||
updateState $ \s -> s{ stateLastStrPos = Just p }
|
||||
|
||||
singleQuoteStart :: Parser [Char] ParserState ()
|
||||
singleQuoteStart = do
|
||||
failIfInQuoteContext InSingleQuote
|
||||
pos <- getPosition
|
||||
st <- getState
|
||||
-- single quote start can't be right after str
|
||||
guard $ stateLastStrPos st /= Just pos
|
||||
guard =<< notAfterString
|
||||
() <$ charOrRef "'\8216\145"
|
||||
|
||||
singleQuoteEnd :: Parser [Char] st ()
|
||||
|
@ -1129,6 +1145,18 @@ nested p = do
|
|||
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
|
||||
return res
|
||||
|
||||
citeKey :: HasLastStrPosition st => Parser [Char] st (Bool, String)
|
||||
citeKey = try $ do
|
||||
guard =<< notAfterString
|
||||
suppress_author <- option False (char '-' *> return True)
|
||||
char '@'
|
||||
firstChar <- letter <|> char '_'
|
||||
let regchar = satisfy (\c -> isAlphaNum c || c == '_')
|
||||
let internal p = try $ p <* lookAhead regchar
|
||||
rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
|
||||
let key = firstChar:rest
|
||||
return (suppress_author, key)
|
||||
|
||||
--
|
||||
-- Macros
|
||||
--
|
||||
|
@ -1156,4 +1184,3 @@ applyMacros' target = do
|
|||
then do macros <- extractMacros `fmap` getState
|
||||
return $ applyMacros macros target
|
||||
else return target
|
||||
|
||||
|
|
|
@ -1474,9 +1474,7 @@ strongOrEmph = enclosure '*' <|> (checkIntraword >> enclosure '_')
|
|||
where checkIntraword = do
|
||||
exts <- getOption readerExtensions
|
||||
when (Ext_intraword_underscores `Set.member` exts) $ do
|
||||
pos <- getPosition
|
||||
lastStrPos <- stateLastStrPos <$> getState
|
||||
guard $ lastStrPos /= Just pos
|
||||
guard =<< notAfterString
|
||||
|
||||
-- | Parses a list of inlines between start and end delimiters.
|
||||
inlinesBetween :: (Show b)
|
||||
|
@ -1518,8 +1516,7 @@ nonEndline = satisfy (/='\n')
|
|||
str :: MarkdownParser (F Inlines)
|
||||
str = do
|
||||
result <- many1 alphaNum
|
||||
pos <- getPosition
|
||||
updateState $ \s -> s{ stateLastStrPos = Just pos }
|
||||
updateLastStrPos
|
||||
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
|
||||
isSmart <- getOption readerSmart
|
||||
if isSmart
|
||||
|
@ -1817,22 +1814,6 @@ normalCite = try $ do
|
|||
char ']'
|
||||
return citations
|
||||
|
||||
citeKey :: MarkdownParser (Bool, String)
|
||||
citeKey = try $ do
|
||||
-- make sure we're not right after an alphanumeric,
|
||||
-- since foo@bar.baz is probably an email address
|
||||
lastStrPos <- stateLastStrPos <$> getState
|
||||
pos <- getPosition
|
||||
guard $ lastStrPos /= Just pos
|
||||
suppress_author <- option False (char '-' >> return True)
|
||||
char '@'
|
||||
first <- letter <|> char '_'
|
||||
let regchar = satisfy (\c -> isAlphaNum c || c == '_')
|
||||
let internal p = try $ p >>~ lookAhead regchar
|
||||
rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
|
||||
let key = first:rest
|
||||
return (suppress_author, key)
|
||||
|
||||
suffix :: MarkdownParser (F Inlines)
|
||||
suffix = try $ do
|
||||
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
|
||||
|
|
|
@ -105,6 +105,10 @@ instance HasMeta OrgParserState where
|
|||
deleteMeta field st =
|
||||
st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
|
||||
|
||||
instance HasLastStrPosition OrgParserState where
|
||||
getLastStrPos = orgStateLastStrPos
|
||||
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
|
||||
|
||||
instance Default OrgParserState where
|
||||
def = defaultOrgParserState
|
||||
|
||||
|
@ -865,6 +869,7 @@ inline :: OrgParser (F Inlines)
|
|||
inline =
|
||||
choice [ whitespace
|
||||
, linebreak
|
||||
, cite
|
||||
, footnote
|
||||
, linkOrImage
|
||||
, anchor
|
||||
|
@ -929,6 +934,51 @@ endline = try $ do
|
|||
updateLastPreCharPos
|
||||
return . return $ B.space
|
||||
|
||||
cite :: OrgParser (F Inlines)
|
||||
cite = try $ do
|
||||
guardEnabled Ext_citations
|
||||
(cs, raw) <- withRaw normalCite
|
||||
return $ (flip B.cite (B.text raw)) <$> cs
|
||||
|
||||
normalCite :: OrgParser (F [Citation])
|
||||
normalCite = try $ char '['
|
||||
*> skipSpaces
|
||||
*> citeList
|
||||
<* skipSpaces
|
||||
<* char ']'
|
||||
|
||||
citeList :: OrgParser (F [Citation])
|
||||
citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
|
||||
|
||||
citation :: OrgParser (F Citation)
|
||||
citation = try $ do
|
||||
pref <- prefix
|
||||
(suppress_author, key) <- citeKey
|
||||
suff <- suffix
|
||||
return $ do
|
||||
x <- pref
|
||||
y <- suff
|
||||
return $ Citation{ citationId = key
|
||||
, citationPrefix = B.toList x
|
||||
, citationSuffix = B.toList y
|
||||
, citationMode = if suppress_author
|
||||
then SuppressAuthor
|
||||
else NormalCitation
|
||||
, citationNoteNum = 0
|
||||
, citationHash = 0
|
||||
}
|
||||
where
|
||||
prefix = trimInlinesF . mconcat <$>
|
||||
manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
|
||||
suffix = try $ do
|
||||
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
|
||||
skipSpaces
|
||||
rest <- trimInlinesF . mconcat <$>
|
||||
many (notFollowedBy (oneOf ";]") *> inline)
|
||||
return $ if hasSpace
|
||||
then (B.space <>) <$> rest
|
||||
else rest
|
||||
|
||||
footnote :: OrgParser (F Inlines)
|
||||
footnote = try $ inlineNote <|> referencedNote
|
||||
|
||||
|
@ -1003,7 +1053,7 @@ selfTarget :: OrgParser String
|
|||
selfTarget = try $ char '[' *> linkTarget <* char ']'
|
||||
|
||||
linkTarget :: OrgParser String
|
||||
linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
|
||||
linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
|
||||
|
||||
applyCustomLinkFormat :: String -> OrgParser (F String)
|
||||
applyCustomLinkFormat link = do
|
||||
|
@ -1079,7 +1129,12 @@ inlineCodeBlock = try $ do
|
|||
let attrClasses = [translateLang lang, rundocBlockClass]
|
||||
let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
|
||||
returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
|
||||
where enclosedByPair s e p = char s *> many1Till p (char e)
|
||||
|
||||
enclosedByPair :: Char -- ^ opening char
|
||||
-> Char -- ^ closing char
|
||||
-> OrgParser a -- ^ parser
|
||||
-> OrgParser [a]
|
||||
enclosedByPair s e p = char s *> many1Till p (char e)
|
||||
|
||||
emph :: OrgParser (F Inlines)
|
||||
emph = fmap B.emph <$> emphasisBetween '/'
|
||||
|
@ -1274,13 +1329,6 @@ afterEmphasisPreChar = do
|
|||
lastPrePos <- orgStateLastPreCharPos <$> getState
|
||||
return . fromMaybe True $ (== pos) <$> lastPrePos
|
||||
|
||||
-- | Whether we are right after the end of a string
|
||||
notAfterString :: OrgParser Bool
|
||||
notAfterString = do
|
||||
pos <- getPosition
|
||||
lastStrPos <- orgStateLastStrPos <$> getState
|
||||
return $ lastStrPos /= Just pos
|
||||
|
||||
-- | Whether the parser is right after a forbidden border char
|
||||
notAfterForbiddenBorderChar :: OrgParser Bool
|
||||
notAfterForbiddenBorderChar = do
|
||||
|
|
|
@ -225,6 +225,28 @@ tests =
|
|||
]
|
||||
)
|
||||
"echo 'Hello, World'")
|
||||
|
||||
, "Citation" =:
|
||||
"[@nonexistent]" =?>
|
||||
let citation = Citation
|
||||
{ citationId = "nonexistent"
|
||||
, citationPrefix = []
|
||||
, citationSuffix = []
|
||||
, citationMode = NormalCitation
|
||||
, citationNoteNum = 0
|
||||
, citationHash = 0}
|
||||
in (para $ cite [citation] "[@nonexistent]")
|
||||
|
||||
, "Citation containing text" =:
|
||||
"[see @item1 p. 34-35]" =?>
|
||||
let citation = Citation
|
||||
{ citationId = "item1"
|
||||
, citationPrefix = [Str "see"]
|
||||
, citationSuffix = [Space ,Str "p.",Space,Str "34-35"]
|
||||
, citationMode = NormalCitation
|
||||
, citationNoteNum = 0
|
||||
, citationHash = 0}
|
||||
in (para $ cite [citation] "[see @item1 p. 34-35]")
|
||||
]
|
||||
|
||||
, testGroup "Meta Information" $
|
||||
|
|
Loading…
Reference in a new issue