Merge pull request #1297 from tarleb/citations

Org reader: support Pandocs citation extension
This commit is contained in:
John MacFarlane 2014-05-14 06:37:29 -07:00
commit b5959b2007
5 changed files with 118 additions and 40 deletions

View file

@ -275,6 +275,7 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
getDefaultExtensions "markdown_github" = githubMarkdownExtensions
getDefaultExtensions "markdown" = pandocExtensions
getDefaultExtensions "plain" = pandocExtensions
getDefaultExtensions "org" = Set.fromList [Ext_citations]
getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex]
getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers]
@ -319,4 +320,3 @@ readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy
writeJSON :: WriterOptions -> Pandoc -> String
writeJSON _ = UTF8.toStringLazy . encode

View file

@ -54,7 +54,6 @@ module Text.Pandoc.Parsing ( (>>~),
withRaw,
escaped,
characterReference,
updateLastStrPos,
anyOrderedListMarker,
orderedListMarker,
charRef,
@ -66,11 +65,14 @@ module Text.Pandoc.Parsing ( (>>~),
testStringWith,
guardEnabled,
guardDisabled,
updateLastStrPos,
notAfterString,
ParserState (..),
HasReaderOptions (..),
HasHeaderMap (..),
HasIdentifierList (..),
HasMacros (..),
HasLastStrPosition (..),
defaultParserState,
HeaderType (..),
ParserContext (..),
@ -92,6 +94,7 @@ module Text.Pandoc.Parsing ( (>>~),
apostrophe,
dash,
nested,
citeKey,
macro,
applyMacros',
Parser,
@ -904,6 +907,14 @@ instance HasMacros ParserState where
extractMacros = stateMacros
updateMacros f st = st{ stateMacros = f $ stateMacros st }
class HasLastStrPosition st where
setLastStrPos :: SourcePos -> st -> st
getLastStrPos :: st -> Maybe SourcePos
instance HasLastStrPosition ParserState where
setLastStrPos pos st = st{ stateLastStrPos = Just pos }
getLastStrPos st = stateLastStrPos st
defaultParserState :: ParserState
defaultParserState =
ParserState { stateOptions = def,
@ -938,6 +949,17 @@ guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
guardDisabled :: HasReaderOptions st => Extension -> Parser s st ()
guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
-- | Update the position on which the last string ended.
updateLastStrPos :: HasLastStrPosition st => Parser s st ()
updateLastStrPos = getPosition >>= updateState . setLastStrPos
-- | Whether we are right after the end of a string.
notAfterString :: HasLastStrPosition st => Parser s st Bool
notAfterString = do
pos <- getPosition
st <- getState
return $ getLastStrPos st /= Just pos
data HeaderType
= SingleHeader Char -- ^ Single line of characters underneath
| DoubleHeader Char -- ^ Lines of characters above and below
@ -1049,17 +1071,11 @@ charOrRef cs =
guard (c `elem` cs)
return c)
updateLastStrPos :: Parser [Char] ParserState ()
updateLastStrPos = getPosition >>= \p ->
updateState $ \s -> s{ stateLastStrPos = Just p }
singleQuoteStart :: Parser [Char] ParserState ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
pos <- getPosition
st <- getState
-- single quote start can't be right after str
guard $ stateLastStrPos st /= Just pos
guard =<< notAfterString
() <$ charOrRef "'\8216\145"
singleQuoteEnd :: Parser [Char] st ()
@ -1129,6 +1145,18 @@ nested p = do
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
citeKey :: HasLastStrPosition st => Parser [Char] st (Bool, String)
citeKey = try $ do
guard =<< notAfterString
suppress_author <- option False (char '-' *> return True)
char '@'
firstChar <- letter <|> char '_'
let regchar = satisfy (\c -> isAlphaNum c || c == '_')
let internal p = try $ p <* lookAhead regchar
rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
let key = firstChar:rest
return (suppress_author, key)
--
-- Macros
--
@ -1156,4 +1184,3 @@ applyMacros' target = do
then do macros <- extractMacros `fmap` getState
return $ applyMacros macros target
else return target

View file

@ -1474,9 +1474,7 @@ strongOrEmph = enclosure '*' <|> (checkIntraword >> enclosure '_')
where checkIntraword = do
exts <- getOption readerExtensions
when (Ext_intraword_underscores `Set.member` exts) $ do
pos <- getPosition
lastStrPos <- stateLastStrPos <$> getState
guard $ lastStrPos /= Just pos
guard =<< notAfterString
-- | Parses a list of inlines between start and end delimiters.
inlinesBetween :: (Show b)
@ -1518,8 +1516,7 @@ nonEndline = satisfy (/='\n')
str :: MarkdownParser (F Inlines)
str = do
result <- many1 alphaNum
pos <- getPosition
updateState $ \s -> s{ stateLastStrPos = Just pos }
updateLastStrPos
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
isSmart <- getOption readerSmart
if isSmart
@ -1817,22 +1814,6 @@ normalCite = try $ do
char ']'
return citations
citeKey :: MarkdownParser (Bool, String)
citeKey = try $ do
-- make sure we're not right after an alphanumeric,
-- since foo@bar.baz is probably an email address
lastStrPos <- stateLastStrPos <$> getState
pos <- getPosition
guard $ lastStrPos /= Just pos
suppress_author <- option False (char '-' >> return True)
char '@'
first <- letter <|> char '_'
let regchar = satisfy (\c -> isAlphaNum c || c == '_')
let internal p = try $ p >>~ lookAhead regchar
rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
let key = first:rest
return (suppress_author, key)
suffix :: MarkdownParser (F Inlines)
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)

View file

@ -105,6 +105,10 @@ instance HasMeta OrgParserState where
deleteMeta field st =
st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
instance HasLastStrPosition OrgParserState where
getLastStrPos = orgStateLastStrPos
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
instance Default OrgParserState where
def = defaultOrgParserState
@ -865,6 +869,7 @@ inline :: OrgParser (F Inlines)
inline =
choice [ whitespace
, linebreak
, cite
, footnote
, linkOrImage
, anchor
@ -929,6 +934,51 @@ endline = try $ do
updateLastPreCharPos
return . return $ B.space
cite :: OrgParser (F Inlines)
cite = try $ do
guardEnabled Ext_citations
(cs, raw) <- withRaw normalCite
return $ (flip B.cite (B.text raw)) <$> cs
normalCite :: OrgParser (F [Citation])
normalCite = try $ char '['
*> skipSpaces
*> citeList
<* skipSpaces
<* char ']'
citeList :: OrgParser (F [Citation])
citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
citation :: OrgParser (F Citation)
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
suff <- suffix
return $ do
x <- pref
y <- suff
return $ Citation{ citationId = key
, citationPrefix = B.toList x
, citationSuffix = B.toList y
, citationMode = if suppress_author
then SuppressAuthor
else NormalCitation
, citationNoteNum = 0
, citationHash = 0
}
where
prefix = trimInlinesF . mconcat <$>
manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
skipSpaces
rest <- trimInlinesF . mconcat <$>
many (notFollowedBy (oneOf ";]") *> inline)
return $ if hasSpace
then (B.space <>) <$> rest
else rest
footnote :: OrgParser (F Inlines)
footnote = try $ inlineNote <|> referencedNote
@ -1003,7 +1053,7 @@ selfTarget :: OrgParser String
selfTarget = try $ char '[' *> linkTarget <* char ']'
linkTarget :: OrgParser String
linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
applyCustomLinkFormat :: String -> OrgParser (F String)
applyCustomLinkFormat link = do
@ -1079,7 +1129,12 @@ inlineCodeBlock = try $ do
let attrClasses = [translateLang lang, rundocBlockClass]
let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
where enclosedByPair s e p = char s *> many1Till p (char e)
enclosedByPair :: Char -- ^ opening char
-> Char -- ^ closing char
-> OrgParser a -- ^ parser
-> OrgParser [a]
enclosedByPair s e p = char s *> many1Till p (char e)
emph :: OrgParser (F Inlines)
emph = fmap B.emph <$> emphasisBetween '/'
@ -1274,13 +1329,6 @@ afterEmphasisPreChar = do
lastPrePos <- orgStateLastPreCharPos <$> getState
return . fromMaybe True $ (== pos) <$> lastPrePos
-- | Whether we are right after the end of a string
notAfterString :: OrgParser Bool
notAfterString = do
pos <- getPosition
lastStrPos <- orgStateLastStrPos <$> getState
return $ lastStrPos /= Just pos
-- | Whether the parser is right after a forbidden border char
notAfterForbiddenBorderChar :: OrgParser Bool
notAfterForbiddenBorderChar = do

View file

@ -225,6 +225,28 @@ tests =
]
)
"echo 'Hello, World'")
, "Citation" =:
"[@nonexistent]" =?>
let citation = Citation
{ citationId = "nonexistent"
, citationPrefix = []
, citationSuffix = []
, citationMode = NormalCitation
, citationNoteNum = 0
, citationHash = 0}
in (para $ cite [citation] "[@nonexistent]")
, "Citation containing text" =:
"[see @item1 p. 34-35]" =?>
let citation = Citation
{ citationId = "item1"
, citationPrefix = [Str "see"]
, citationSuffix = [Space ,Str "p.",Space,Str "34-35"]
, citationMode = NormalCitation
, citationNoteNum = 0
, citationHash = 0}
in (para $ cite [citation] "[see @item1 p. 34-35]")
]
, testGroup "Meta Information" $