Merge pull request #1297 from tarleb/citations

Org reader: support Pandocs citation extension
This commit is contained in:
John MacFarlane 2014-05-14 06:37:29 -07:00
commit b5959b2007
5 changed files with 118 additions and 40 deletions

View file

@ -275,6 +275,7 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
getDefaultExtensions "markdown_github" = githubMarkdownExtensions getDefaultExtensions "markdown_github" = githubMarkdownExtensions
getDefaultExtensions "markdown" = pandocExtensions getDefaultExtensions "markdown" = pandocExtensions
getDefaultExtensions "plain" = pandocExtensions getDefaultExtensions "plain" = pandocExtensions
getDefaultExtensions "org" = Set.fromList [Ext_citations]
getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex] getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex]
getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers]
@ -319,4 +320,3 @@ readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy
writeJSON :: WriterOptions -> Pandoc -> String writeJSON :: WriterOptions -> Pandoc -> String
writeJSON _ = UTF8.toStringLazy . encode writeJSON _ = UTF8.toStringLazy . encode

View file

@ -54,7 +54,6 @@ module Text.Pandoc.Parsing ( (>>~),
withRaw, withRaw,
escaped, escaped,
characterReference, characterReference,
updateLastStrPos,
anyOrderedListMarker, anyOrderedListMarker,
orderedListMarker, orderedListMarker,
charRef, charRef,
@ -66,11 +65,14 @@ module Text.Pandoc.Parsing ( (>>~),
testStringWith, testStringWith,
guardEnabled, guardEnabled,
guardDisabled, guardDisabled,
updateLastStrPos,
notAfterString,
ParserState (..), ParserState (..),
HasReaderOptions (..), HasReaderOptions (..),
HasHeaderMap (..), HasHeaderMap (..),
HasIdentifierList (..), HasIdentifierList (..),
HasMacros (..), HasMacros (..),
HasLastStrPosition (..),
defaultParserState, defaultParserState,
HeaderType (..), HeaderType (..),
ParserContext (..), ParserContext (..),
@ -92,6 +94,7 @@ module Text.Pandoc.Parsing ( (>>~),
apostrophe, apostrophe,
dash, dash,
nested, nested,
citeKey,
macro, macro,
applyMacros', applyMacros',
Parser, Parser,
@ -904,6 +907,14 @@ instance HasMacros ParserState where
extractMacros = stateMacros extractMacros = stateMacros
updateMacros f st = st{ stateMacros = f $ stateMacros st } updateMacros f st = st{ stateMacros = f $ stateMacros st }
class HasLastStrPosition st where
setLastStrPos :: SourcePos -> st -> st
getLastStrPos :: st -> Maybe SourcePos
instance HasLastStrPosition ParserState where
setLastStrPos pos st = st{ stateLastStrPos = Just pos }
getLastStrPos st = stateLastStrPos st
defaultParserState :: ParserState defaultParserState :: ParserState
defaultParserState = defaultParserState =
ParserState { stateOptions = def, ParserState { stateOptions = def,
@ -938,6 +949,17 @@ guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
guardDisabled :: HasReaderOptions st => Extension -> Parser s st () guardDisabled :: HasReaderOptions st => Extension -> Parser s st ()
guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
-- | Update the position on which the last string ended.
updateLastStrPos :: HasLastStrPosition st => Parser s st ()
updateLastStrPos = getPosition >>= updateState . setLastStrPos
-- | Whether we are right after the end of a string.
notAfterString :: HasLastStrPosition st => Parser s st Bool
notAfterString = do
pos <- getPosition
st <- getState
return $ getLastStrPos st /= Just pos
data HeaderType data HeaderType
= SingleHeader Char -- ^ Single line of characters underneath = SingleHeader Char -- ^ Single line of characters underneath
| DoubleHeader Char -- ^ Lines of characters above and below | DoubleHeader Char -- ^ Lines of characters above and below
@ -1049,17 +1071,11 @@ charOrRef cs =
guard (c `elem` cs) guard (c `elem` cs)
return c) return c)
updateLastStrPos :: Parser [Char] ParserState ()
updateLastStrPos = getPosition >>= \p ->
updateState $ \s -> s{ stateLastStrPos = Just p }
singleQuoteStart :: Parser [Char] ParserState () singleQuoteStart :: Parser [Char] ParserState ()
singleQuoteStart = do singleQuoteStart = do
failIfInQuoteContext InSingleQuote failIfInQuoteContext InSingleQuote
pos <- getPosition
st <- getState
-- single quote start can't be right after str -- single quote start can't be right after str
guard $ stateLastStrPos st /= Just pos guard =<< notAfterString
() <$ charOrRef "'\8216\145" () <$ charOrRef "'\8216\145"
singleQuoteEnd :: Parser [Char] st () singleQuoteEnd :: Parser [Char] st ()
@ -1129,6 +1145,18 @@ nested p = do
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res return res
citeKey :: HasLastStrPosition st => Parser [Char] st (Bool, String)
citeKey = try $ do
guard =<< notAfterString
suppress_author <- option False (char '-' *> return True)
char '@'
firstChar <- letter <|> char '_'
let regchar = satisfy (\c -> isAlphaNum c || c == '_')
let internal p = try $ p <* lookAhead regchar
rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
let key = firstChar:rest
return (suppress_author, key)
-- --
-- Macros -- Macros
-- --
@ -1156,4 +1184,3 @@ applyMacros' target = do
then do macros <- extractMacros `fmap` getState then do macros <- extractMacros `fmap` getState
return $ applyMacros macros target return $ applyMacros macros target
else return target else return target

View file

@ -1474,9 +1474,7 @@ strongOrEmph = enclosure '*' <|> (checkIntraword >> enclosure '_')
where checkIntraword = do where checkIntraword = do
exts <- getOption readerExtensions exts <- getOption readerExtensions
when (Ext_intraword_underscores `Set.member` exts) $ do when (Ext_intraword_underscores `Set.member` exts) $ do
pos <- getPosition guard =<< notAfterString
lastStrPos <- stateLastStrPos <$> getState
guard $ lastStrPos /= Just pos
-- | Parses a list of inlines between start and end delimiters. -- | Parses a list of inlines between start and end delimiters.
inlinesBetween :: (Show b) inlinesBetween :: (Show b)
@ -1518,8 +1516,7 @@ nonEndline = satisfy (/='\n')
str :: MarkdownParser (F Inlines) str :: MarkdownParser (F Inlines)
str = do str = do
result <- many1 alphaNum result <- many1 alphaNum
pos <- getPosition updateLastStrPos
updateState $ \s -> s{ stateLastStrPos = Just pos }
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
isSmart <- getOption readerSmart isSmart <- getOption readerSmart
if isSmart if isSmart
@ -1817,22 +1814,6 @@ normalCite = try $ do
char ']' char ']'
return citations return citations
citeKey :: MarkdownParser (Bool, String)
citeKey = try $ do
-- make sure we're not right after an alphanumeric,
-- since foo@bar.baz is probably an email address
lastStrPos <- stateLastStrPos <$> getState
pos <- getPosition
guard $ lastStrPos /= Just pos
suppress_author <- option False (char '-' >> return True)
char '@'
first <- letter <|> char '_'
let regchar = satisfy (\c -> isAlphaNum c || c == '_')
let internal p = try $ p >>~ lookAhead regchar
rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
let key = first:rest
return (suppress_author, key)
suffix :: MarkdownParser (F Inlines) suffix :: MarkdownParser (F Inlines)
suffix = try $ do suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True) hasSpace <- option False (notFollowedBy nonspaceChar >> return True)

View file

@ -105,6 +105,10 @@ instance HasMeta OrgParserState where
deleteMeta field st = deleteMeta field st =
st{ orgStateMeta = deleteMeta field $ orgStateMeta st } st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
instance HasLastStrPosition OrgParserState where
getLastStrPos = orgStateLastStrPos
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
instance Default OrgParserState where instance Default OrgParserState where
def = defaultOrgParserState def = defaultOrgParserState
@ -865,6 +869,7 @@ inline :: OrgParser (F Inlines)
inline = inline =
choice [ whitespace choice [ whitespace
, linebreak , linebreak
, cite
, footnote , footnote
, linkOrImage , linkOrImage
, anchor , anchor
@ -929,6 +934,51 @@ endline = try $ do
updateLastPreCharPos updateLastPreCharPos
return . return $ B.space return . return $ B.space
cite :: OrgParser (F Inlines)
cite = try $ do
guardEnabled Ext_citations
(cs, raw) <- withRaw normalCite
return $ (flip B.cite (B.text raw)) <$> cs
normalCite :: OrgParser (F [Citation])
normalCite = try $ char '['
*> skipSpaces
*> citeList
<* skipSpaces
<* char ']'
citeList :: OrgParser (F [Citation])
citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
citation :: OrgParser (F Citation)
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
suff <- suffix
return $ do
x <- pref
y <- suff
return $ Citation{ citationId = key
, citationPrefix = B.toList x
, citationSuffix = B.toList y
, citationMode = if suppress_author
then SuppressAuthor
else NormalCitation
, citationNoteNum = 0
, citationHash = 0
}
where
prefix = trimInlinesF . mconcat <$>
manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
skipSpaces
rest <- trimInlinesF . mconcat <$>
many (notFollowedBy (oneOf ";]") *> inline)
return $ if hasSpace
then (B.space <>) <$> rest
else rest
footnote :: OrgParser (F Inlines) footnote :: OrgParser (F Inlines)
footnote = try $ inlineNote <|> referencedNote footnote = try $ inlineNote <|> referencedNote
@ -1003,7 +1053,7 @@ selfTarget :: OrgParser String
selfTarget = try $ char '[' *> linkTarget <* char ']' selfTarget = try $ char '[' *> linkTarget <* char ']'
linkTarget :: OrgParser String linkTarget :: OrgParser String
linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
applyCustomLinkFormat :: String -> OrgParser (F String) applyCustomLinkFormat :: String -> OrgParser (F String)
applyCustomLinkFormat link = do applyCustomLinkFormat link = do
@ -1079,7 +1129,12 @@ inlineCodeBlock = try $ do
let attrClasses = [translateLang lang, rundocBlockClass] let attrClasses = [translateLang lang, rundocBlockClass]
let attrKeyVal = map toRundocAttrib (("language", lang) : opts) let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
where enclosedByPair s e p = char s *> many1Till p (char e)
enclosedByPair :: Char -- ^ opening char
-> Char -- ^ closing char
-> OrgParser a -- ^ parser
-> OrgParser [a]
enclosedByPair s e p = char s *> many1Till p (char e)
emph :: OrgParser (F Inlines) emph :: OrgParser (F Inlines)
emph = fmap B.emph <$> emphasisBetween '/' emph = fmap B.emph <$> emphasisBetween '/'
@ -1274,13 +1329,6 @@ afterEmphasisPreChar = do
lastPrePos <- orgStateLastPreCharPos <$> getState lastPrePos <- orgStateLastPreCharPos <$> getState
return . fromMaybe True $ (== pos) <$> lastPrePos return . fromMaybe True $ (== pos) <$> lastPrePos
-- | Whether we are right after the end of a string
notAfterString :: OrgParser Bool
notAfterString = do
pos <- getPosition
lastStrPos <- orgStateLastStrPos <$> getState
return $ lastStrPos /= Just pos
-- | Whether the parser is right after a forbidden border char -- | Whether the parser is right after a forbidden border char
notAfterForbiddenBorderChar :: OrgParser Bool notAfterForbiddenBorderChar :: OrgParser Bool
notAfterForbiddenBorderChar = do notAfterForbiddenBorderChar = do

View file

@ -225,6 +225,28 @@ tests =
] ]
) )
"echo 'Hello, World'") "echo 'Hello, World'")
, "Citation" =:
"[@nonexistent]" =?>
let citation = Citation
{ citationId = "nonexistent"
, citationPrefix = []
, citationSuffix = []
, citationMode = NormalCitation
, citationNoteNum = 0
, citationHash = 0}
in (para $ cite [citation] "[@nonexistent]")
, "Citation containing text" =:
"[see @item1 p. 34-35]" =?>
let citation = Citation
{ citationId = "item1"
, citationPrefix = [Str "see"]
, citationSuffix = [Space ,Str "p.",Space,Str "34-35"]
, citationMode = NormalCitation
, citationNoteNum = 0
, citationHash = 0}
in (para $ cite [citation] "[see @item1 p. 34-35]")
] ]
, testGroup "Meta Information" $ , testGroup "Meta Information" $