Merge pull request #1297 from tarleb/citations

Org reader: support Pandocs citation extension
2014-05-14 06:37:29 -07:00 · 2014-05-14 06:37:29 -07:00 · b5959b2007
commit b5959b2007
parent 222a51bf99 ceeb701c25
5 changed files with 118 additions and 40 deletions
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@ -275,6 +275,7 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
 getDefaultExtensions "markdown_github" = githubMarkdownExtensions
 getDefaultExtensions "markdown"        = pandocExtensions
 getDefaultExtensions "plain"           = pandocExtensions
 getDefaultExtensions "org"             = Set.fromList [Ext_citations]
 getDefaultExtensions "textile"         = Set.fromList [Ext_auto_identifiers, Ext_raw_tex]
 getDefaultExtensions _                 = Set.fromList [Ext_auto_identifiers]
@ -319,4 +320,3 @@ readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy
 writeJSON :: WriterOptions -> Pandoc -> String
 writeJSON _ = UTF8.toStringLazy . encode
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@ -54,7 +54,6 @@ module Text.Pandoc.Parsing ( (>>~),
                             withRaw,
                             escaped,
                             characterReference,
                             updateLastStrPos,
                             anyOrderedListMarker,
                             orderedListMarker,
                             charRef,
@ -66,11 +65,14 @@ module Text.Pandoc.Parsing ( (>>~),
                             testStringWith,
                             guardEnabled,
                             guardDisabled,
                             updateLastStrPos,
                             notAfterString,
                             ParserState (..),
                             HasReaderOptions (..),
                             HasHeaderMap (..),
                             HasIdentifierList (..),
                             HasMacros (..),
                             HasLastStrPosition (..),
                             defaultParserState,
                             HeaderType (..),
                             ParserContext (..),
@ -92,6 +94,7 @@ module Text.Pandoc.Parsing ( (>>~),
                             apostrophe,
                             dash,
                             nested,
                             citeKey,
                             macro,
                             applyMacros',
                             Parser,
@ -904,6 +907,14 @@ instance HasMacros ParserState where
  extractMacros        = stateMacros
  updateMacros f st    = st{ stateMacros = f $ stateMacros st }
 class HasLastStrPosition st where
  setLastStrPos  :: SourcePos -> st -> st
  getLastStrPos  :: st -> Maybe SourcePos
 instance HasLastStrPosition ParserState where
  setLastStrPos pos st = st{ stateLastStrPos = Just pos }
  getLastStrPos st     = stateLastStrPos st
 defaultParserState :: ParserState
 defaultParserState =
    ParserState { stateOptions         = def,
@ -938,6 +949,17 @@ guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
 guardDisabled :: HasReaderOptions st => Extension -> Parser s st ()
 guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
 -- | Update the position on which the last string ended.
 updateLastStrPos :: HasLastStrPosition st => Parser s st ()
 updateLastStrPos = getPosition >>= updateState . setLastStrPos
 -- | Whether we are right after the end of a string.
 notAfterString :: HasLastStrPosition st => Parser s st Bool
 notAfterString = do
  pos <- getPosition
  st  <- getState
  return $ getLastStrPos st /= Just pos
 data HeaderType
    = SingleHeader Char  -- ^ Single line of characters underneath
    | DoubleHeader Char  -- ^ Lines of characters above and below
@ -1049,17 +1071,11 @@ charOrRef cs =
                       guard (c `elem` cs)
                       return c)
 updateLastStrPos :: Parser [Char] ParserState ()
 updateLastStrPos = getPosition >>= \p ->
  updateState $ \s -> s{ stateLastStrPos = Just p }
 singleQuoteStart :: Parser [Char] ParserState ()
 singleQuoteStart = do
  failIfInQuoteContext InSingleQuote
  pos <- getPosition
  st <- getState
  -- single quote start can't be right after str
-  guard $ stateLastStrPos st /= Just pos
+  guard =<< notAfterString
  () <$ charOrRef "'\8216\145"
 singleQuoteEnd :: Parser [Char] st ()
@ -1129,6 +1145,18 @@ nested p = do
  updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
  return res
 citeKey :: HasLastStrPosition st => Parser [Char] st (Bool, String)
 citeKey = try $ do
  guard =<< notAfterString
  suppress_author <- option False (char '-' *> return True)
  char '@'
  firstChar <- letter <|> char '_'
  let regchar = satisfy (\c -> isAlphaNum c || c == '_')
  let internal p = try $ p <* lookAhead regchar
  rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
  let key = firstChar:rest
  return (suppress_author, key)
 --
 -- Macros
 --
@ -1156,4 +1184,3 @@ applyMacros' target = do
     then do macros <- extractMacros `fmap` getState
             return $ applyMacros macros target
     else return target
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@ -1474,9 +1474,7 @@ strongOrEmph =  enclosure '*' <|> (checkIntraword >> enclosure '_')
  where  checkIntraword = do
           exts <- getOption readerExtensions
           when (Ext_intraword_underscores `Set.member` exts) $ do
-             pos <- getPosition
+             guard =<< notAfterString
             lastStrPos <- stateLastStrPos <$> getState
             guard $ lastStrPos /= Just pos
 -- | Parses a list of inlines between start and end delimiters.
 inlinesBetween :: (Show b)
@ -1518,8 +1516,7 @@ nonEndline = satisfy (/='\n')
 str :: MarkdownParser (F Inlines)
 str = do
  result <- many1 alphaNum
-  pos <- getPosition
+  updateLastStrPos
  updateState $ \s -> s{ stateLastStrPos = Just pos }
  let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
  isSmart <- getOption readerSmart
  if isSmart
@ -1817,22 +1814,6 @@ normalCite = try $ do
  char ']'
  return citations
 citeKey :: MarkdownParser (Bool, String)
 citeKey = try $ do
  -- make sure we're not right after an alphanumeric,
  -- since foo@bar.baz is probably an email address
  lastStrPos <- stateLastStrPos <$> getState
  pos <- getPosition
  guard $ lastStrPos /= Just pos
  suppress_author <- option False (char '-' >> return True)
  char '@'
  first <- letter <|> char '_'
  let regchar = satisfy (\c -> isAlphaNum c || c == '_')
  let internal p = try $ p >>~ lookAhead regchar
  rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
  let key = first:rest
  return (suppress_author, key)
 suffix :: MarkdownParser (F Inlines)
 suffix = try $ do
  hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@ -105,6 +105,10 @@ instance HasMeta OrgParserState where
  deleteMeta field st =
    st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
 instance HasLastStrPosition OrgParserState where
  getLastStrPos = orgStateLastStrPos
  setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
 instance Default OrgParserState where
  def = defaultOrgParserState
@ -865,6 +869,7 @@ inline :: OrgParser (F Inlines)
 inline =
  choice [ whitespace
         , linebreak
         , cite
         , footnote
         , linkOrImage
         , anchor
@ -929,6 +934,51 @@ endline = try $ do
  updateLastPreCharPos
  return . return $ B.space
 cite :: OrgParser (F Inlines)
 cite = try $ do
  guardEnabled Ext_citations
  (cs, raw) <- withRaw normalCite
  return $ (flip B.cite (B.text raw)) <$> cs
 normalCite :: OrgParser (F [Citation])
 normalCite = try $  char '['
                 *> skipSpaces
                 *> citeList
                 <* skipSpaces
                 <* char ']'
 citeList :: OrgParser (F [Citation])
 citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
 citation :: OrgParser (F Citation)
 citation = try $ do
  pref <- prefix
  (suppress_author, key) <- citeKey
  suff <- suffix
  return $ do
    x <- pref
    y <- suff
    return $ Citation{ citationId      = key
                     , citationPrefix  = B.toList x
                     , citationSuffix  = B.toList y
                     , citationMode    = if suppress_author
                                            then SuppressAuthor
                                            else NormalCitation
                     , citationNoteNum = 0
                     , citationHash    = 0
                     }
 where
   prefix = trimInlinesF . mconcat <$>
            manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
   suffix = try $ do
     hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
     skipSpaces
     rest <- trimInlinesF . mconcat <$>
             many (notFollowedBy (oneOf ";]") *> inline)
     return $ if hasSpace
              then (B.space <>) <$> rest
              else rest
 footnote :: OrgParser (F Inlines)
 footnote = try $ inlineNote <|> referencedNote
@ -1003,7 +1053,7 @@ selfTarget :: OrgParser String
 selfTarget = try $ char '[' *> linkTarget <* char ']'
 linkTarget :: OrgParser String
-linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
+linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
 applyCustomLinkFormat :: String -> OrgParser (F String)
 applyCustomLinkFormat link = do
@ -1079,7 +1129,12 @@ inlineCodeBlock = try $ do
  let attrClasses = [translateLang lang, rundocBlockClass]
  let attrKeyVal  = map toRundocAttrib (("language", lang) : opts)
  returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
- where enclosedByPair s e p = char s *> many1Till p (char e)
+
 enclosedByPair :: Char          -- ^ opening char
               -> Char          -- ^ closing char
               -> OrgParser a   -- ^ parser
               -> OrgParser [a]
 enclosedByPair s e p = char s *> many1Till p (char e)
 emph      :: OrgParser (F Inlines)
 emph      = fmap B.emph         <$> emphasisBetween '/'
@ -1274,13 +1329,6 @@ afterEmphasisPreChar = do
  lastPrePos <- orgStateLastPreCharPos <$> getState
  return . fromMaybe True $ (== pos) <$> lastPrePos
 -- | Whether we are right after the end of a string
 notAfterString :: OrgParser Bool
 notAfterString = do
  pos <- getPosition
  lastStrPos <- orgStateLastStrPos <$> getState
  return $ lastStrPos /= Just pos
 -- | Whether the parser is right after a forbidden border char
 notAfterForbiddenBorderChar :: OrgParser Bool
 notAfterForbiddenBorderChar = do
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@ -225,6 +225,28 @@ tests =
                             ]
                           )
                           "echo 'Hello, World'")
      , "Citation" =:
          "[@nonexistent]" =?>
          let citation = Citation
                         { citationId = "nonexistent"
                         , citationPrefix = []
                         , citationSuffix = []
                         , citationMode = NormalCitation
                         , citationNoteNum = 0
                         , citationHash = 0}
          in (para $ cite [citation] "[@nonexistent]")
      , "Citation containing text" =:
          "[see @item1 p. 34-35]" =?>
          let citation = Citation
                         { citationId = "item1"
                         , citationPrefix = [Str "see"]
                         , citationSuffix = [Space ,Str "p.",Space,Str "34-35"]
                         , citationMode = NormalCitation
                         , citationNoteNum = 0
                         , citationHash = 0}
          in (para $ cite [citation] "[see @item1 p. 34-35]")
      ]
  , testGroup "Meta Information" $