From 1bfe1b84a8692c5e2ea8a036208f61612b48e9fb Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Mon, 4 Aug 2008 03:15:34 +0000 Subject: [PATCH] Added support for Cite to Markdown reader, and conditional support for citeproc module. + The citeproc cabal configuration option sets the _CITEPROC macro, which conditionally includes code for handling citations. + Added Text.Pandoc.Biblio module. + Made highlighting option default to False. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1376 788f1e2b-df1e-0410-8736-df70ead52e1b --- Main.hs | 71 +++++++++++++++++++++++------ Text/Pandoc/Biblio.hs | 76 ++++++++++++++++++++++++++++++++ Text/Pandoc/Definition.hs | 2 + Text/Pandoc/Readers/Markdown.hs | 43 +++++++++++++++++- Text/Pandoc/Shared.hs | 6 +++ Text/Pandoc/Writers/MediaWiki.hs | 2 + pandoc.cabal | 13 +++++- 7 files changed, 197 insertions(+), 16 deletions(-) create mode 100644 Text/Pandoc/Biblio.hs diff --git a/Main.hs b/Main.hs index 90f3301f5..2bb2f13a6 100644 --- a/Main.hs +++ b/Main.hs @@ -47,6 +47,10 @@ import System.IO ( stdout, stderr ) #else import System.IO #endif +#ifdef _CITEPROC +import Text.CSL +import Text.Pandoc.Biblio +#endif copyrightMessage :: String copyrightMessage = "\nCopyright (C) 2006-7 John MacFarlane\n" ++ @@ -55,15 +59,25 @@ copyrightMessage = "\nCopyright (C) 2006-7 John MacFarlane\n" ++ "warranty, not even for merchantability or fitness for a particular purpose." compileInfo :: String -compileInfo = "Compiled" ++ +compileInfo = #ifdef _UTF8 - " with UTF-8 support" ++ + " +utf8" ++ #else - " without UTF-8 support" ++ + " -utf8" ++ +#endif +#ifdef _CITEPROC + " +citeproc" ++ +#else + " -citeproc" ++ +#endif +#ifdef _HIGHLIGHTING + " +highlighting" ++ +#else + " -highlighting" ++ #endif if null languages - then " and without syntax highlighting support." - else " and with syntax highlighting support for:\n" ++ + then "\n" + else "\nCompiled with syntax highlighting support for:\n" ++ (unlines $ map unwords $ chunk 5 $ map (\s -> s ++ replicate (15 - length s) ' ') languages) -- | Splits a list into groups of at most n. @@ -137,6 +151,10 @@ data Opt = Opt , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , optWrapText :: Bool -- ^ Wrap text , optSanitizeHTML :: Bool -- ^ Sanitize HTML +#ifdef _CITEPROC + , optModsFile :: String + , optCslFile :: String +#endif } -- | Defaults for command-line options. @@ -166,6 +184,10 @@ defaultOpts = Opt , optReferenceLinks = False , optWrapText = True , optSanitizeHTML = False +#ifdef _CITEPROC + , optModsFile = [] + , optCslFile = [] +#endif } -- | A list of functions, each transforming the options data structure @@ -333,13 +355,24 @@ options = exitWith ExitSuccess) "FORMAT") "" -- "Print default header for FORMAT" - +#ifdef _CITEPROC + , Option "" ["mods"] + (ReqArg + (\arg opt -> return opt { optModsFile = arg} ) + "FILENAME") + "" + , Option "" ["csl"] + (ReqArg + (\arg opt -> return opt { optCslFile = arg} ) + "FILENAME") + "" +#endif , Option "" ["dump-args"] (NoArg (\opt -> return opt { optDumpArgs = True })) "" -- "Print output filename and arguments to stdout." - , Option "" ["ignore-args"] + , Option "" ["ignore-args"] (NoArg (\opt -> return opt { optIgnoreArgs = True })) "" -- "Ignore command-line arguments." @@ -348,7 +381,7 @@ options = (NoArg (\_ -> do prg <- getProgName - hPutStrLn stderr (prg ++ " " ++ pandocVersion ++ "\n" ++ compileInfo ++ + hPutStrLn stderr (prg ++ " " ++ pandocVersion ++ compileInfo ++ copyrightMessage) exitWith $ ExitFailure 4)) "" -- "Print version" @@ -464,6 +497,10 @@ main = do , optReferenceLinks = referenceLinks , optWrapText = wrap , optSanitizeHTML = sanitize +#ifdef _CITEPROC + , optModsFile = modsFile + , optCslFile = cslFile +#endif } = opts if dumpArgs @@ -513,11 +550,18 @@ main = do let standalone' = (standalone && not strict) || writerName' == "odt" +#ifdef _CITEPROC + refs <- if null modsFile then return [] else readModsColletionFile modsFile +#endif + let startParserState = defaultParserState { stateParseRaw = parseRaw, stateTabStop = tabStop, stateSanitizeHTML = sanitize, stateStandalone = standalone', +#ifdef _CITEPROC + stateCitations = map citeKey refs, +#endif stateSmart = smart || writerName' `elem` ["latex", "context"], stateColumns = columns, @@ -564,11 +608,12 @@ main = do then putStrLn else writeFile outputFile . (++ "\n") - (readSources sources) >>= writeOutput . - writer writerOptions . - reader startParserState . - tabFilter tabStop . - joinWithSep "\n" + fmap (reader startParserState . tabFilter tabStop . joinWithSep "\n") + (readSources sources) >>= +#ifdef _CITEPROC + processBiblio cslFile refs >>= +#endif + writeOutput . writer writerOptions where readSources [] = mapM readSource ["-"] diff --git a/Text/Pandoc/Biblio.hs b/Text/Pandoc/Biblio.hs new file mode 100644 index 000000000..f39b6a608 --- /dev/null +++ b/Text/Pandoc/Biblio.hs @@ -0,0 +1,76 @@ +{-# LANGUAGE PatternGuards, CPP #-} +{- +Copyright (C) 2008 Andrea Rossato + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Biblio + Copyright : Copyright (C) 2008 Andrea Rossato + License : GNU GPL, version 2 or above + + Maintainer : Andrea Rossato + Stability : alpha + Portability : portable +-} + +#ifdef _CITEPROC +module Text.Pandoc.Biblio ( processBiblio ) where + +import Control.Monad ( when ) +import Data.List +import Text.CSL +import Text.Pandoc.Definition + +#else +module Text.Pandoc.Biblio () where +#endif + +#ifdef _CITEPROC +processBiblio :: String -> [Reference] -> Pandoc -> IO Pandoc +processBiblio cf r p + = if null r then return p + else do + when (null cf) $ error "Missing the needed citation style file" + csl <- readCSLFile cf + let groups = queryPandoc getCite p + citations = zip groups . processCitations csl r $ groups + Pandoc m b = processPandoc (processCite citations) p + return $ Pandoc m $ b ++ renderBiblio csl r p + +renderBiblio :: Style -> [Reference] -> Pandoc -> [Block] +renderBiblio s r p + = map (read . renderPandoc') $ processBibliography s refs + where cits = nub . map fst . concat . queryPandoc getCite $ p + refs = getRefs r $ zip cits (repeat "") + +processCite :: [([Target],[FormattedOutput])] -> Inline -> Inline +processCite cs il + | Cite t _ <- il = Cite t (process t) + | otherwise = il + where + process t = case elemIndex t (map fst cs) of + Just i -> read . renderPandoc $ snd (cs !! i) + Nothing -> [Str ("Error processing " ++ show t)] + +getCite :: Inline -> [[(String,String)]] +getCite i | Cite t _ <- i = [t] + | otherwise = [] + +getRefs :: [Reference] -> [Target] -> [Reference] +getRefs r = map $ getReference r + +#endif diff --git a/Text/Pandoc/Definition.hs b/Text/Pandoc/Definition.hs index f302f0dbd..7fc04f698 100644 --- a/Text/Pandoc/Definition.hs +++ b/Text/Pandoc/Definition.hs @@ -125,8 +125,10 @@ data Inline | Note [Block] -- ^ Footnote or endnote deriving (Show, Eq, Read, Typeable, Data) +-- | Applies a transformation to matching elements in a Pandoc document. processPandoc :: Typeable a => (a -> a) -> Pandoc -> Pandoc processPandoc f = everywhere (mkT f) +-- | Runs a query on matching elements in a Pandoc document. queryPandoc :: Typeable a => (a -> [b]) -> Pandoc -> [b] queryPandoc f = everything (++) ([] `mkQ` f) diff --git a/Text/Pandoc/Readers/Markdown.hs b/Text/Pandoc/Readers/Markdown.hs index 712d66a37..2bf53684f 100644 --- a/Text/Pandoc/Readers/Markdown.hs +++ b/Text/Pandoc/Readers/Markdown.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {- Copyright (C) 2006-8 John MacFarlane @@ -34,7 +35,7 @@ module Text.Pandoc.Readers.Markdown ( import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex ) import Data.Ord ( comparing ) import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit ) -import Data.Maybe ( fromMaybe ) +import Data.Maybe import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) @@ -173,7 +174,7 @@ parseMarkdown = do setPosition startPos -- now parse it for real... (title, author, date) <- option ([],[],"") titleBlock - blocks <- parseBlocks + blocks <- parseBlocks return $ Pandoc (Meta title author date) $ filter (/= Null) blocks -- @@ -804,6 +805,9 @@ inlineParsers = [ abbrev , note , inlineNote , link +#ifdef _CITEPROC + , inlineCitation +#endif , image , math , strikeout @@ -1152,3 +1156,38 @@ rawHtmlInline' = do else anyHtmlInlineTag return $ HtmlInline result +#ifdef _CITEPROC +inlineCitation :: GenParser Char ParserState Inline +inlineCitation = try $ do + failIfStrict + cit <- citeMarker + let citations = readWith parseCitation defaultParserState cit + mr <- mapM chkCit citations + if catMaybes mr /= [] + then return $ Cite citations [] + else fail "no citation found" + +chkCit :: Target -> GenParser Char ParserState (Maybe Target) +chkCit t = do + st <- getState + case lookupKeySrc (stateKeys st) [Str $ fst t] of + Just _ -> fail "This is a link" + Nothing -> if elem (fst t) $ stateCitations st + then return $ Just t + else return $ Nothing + +citeMarker :: GenParser Char ParserState String +citeMarker = string "[" >> manyTill (noneOf "\t\n") (string "]") + +parseCitation :: GenParser Char ParserState [(String,String)] +parseCitation = try $ sepBy (parseLabel) (oneOf ";") + +parseLabel :: GenParser Char ParserState (String,String) +parseLabel = try $ do + res <- sepBy (skipSpaces >> many1 (noneOf "@;\n\t")) (oneOf "@") + case res of + [lab,loc] -> return (lab, loc) + [lab] -> return (lab, "" ) + _ -> return ("" , "" ) + +#endif diff --git a/Text/Pandoc/Shared.hs b/Text/Pandoc/Shared.hs index 6ae507cfa..bc0791d77 100644 --- a/Text/Pandoc/Shared.hs +++ b/Text/Pandoc/Shared.hs @@ -629,6 +629,9 @@ data ParserState = ParserState stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? stateSanitizeHTML :: Bool, -- ^ Sanitize HTML? stateKeys :: KeyTable, -- ^ List of reference keys +#ifdef _CITEPROC + stateCitations :: [String], -- ^ List of available citations +#endif stateNotes :: NoteTable, -- ^ List of notes stateTabStop :: Int, -- ^ Tab stop stateStandalone :: Bool, -- ^ Parse bibliographic info? @@ -649,6 +652,9 @@ defaultParserState = stateQuoteContext = NoQuote, stateSanitizeHTML = False, stateKeys = [], +#ifdef _CITEPROC + stateCitations = [], +#endif stateNotes = [], stateTabStop = 4, stateStandalone = False, diff --git a/Text/Pandoc/Writers/MediaWiki.hs b/Text/Pandoc/Writers/MediaWiki.hs index 14df15bfe..97d9d00c6 100644 --- a/Text/Pandoc/Writers/MediaWiki.hs +++ b/Text/Pandoc/Writers/MediaWiki.hs @@ -342,6 +342,8 @@ inlineToMediaWiki opts (Quoted DoubleQuote lst) = do contents <- inlineListToMediaWiki opts lst return $ "“" ++ contents ++ "”" +inlineToMediaWiki opts (Cite _ lst) = inlineListToMediaWiki opts lst + inlineToMediaWiki _ EmDash = return "—" inlineToMediaWiki _ EnDash = return "–" diff --git a/pandoc.cabal b/pandoc.cabal index 727a91c41..bfd99f5e0 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -57,7 +57,7 @@ Flag splitBase Default: True Flag highlighting Description: Compile in support for syntax highlighting of code blocks. - Default: True + Default: False Flag executable Description: Build the pandoc executable. Default: True @@ -67,6 +67,9 @@ Flag library Flag utf8 Description: Compile in support for UTF-8 input and output. Default: True +Flag citeproc + Description: Compile in support for citeproc-hs bibliographic formatting. + Default: False Library if flag(splitBase) @@ -79,10 +82,14 @@ Library if flag(utf8) Build-depends: utf8-string cpp-options: -D_UTF8 + if flag(citeproc) + Build-depends: citeproc-hs + cpp-options: -D_CITEPROC Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory, template-haskell, bytestring Hs-Source-Dirs: . Exposed-Modules: Text.Pandoc, + Text.Pandoc.Biblio, Text.Pandoc.Blocks, Text.Pandoc.Definition, Text.Pandoc.CharacterReferences, @@ -136,7 +143,11 @@ Executable pandoc cpp-options: -D_HIGHLIGHTING if flag(utf8) cpp-options: -D_UTF8 + if flag(citeproc) + Build-depends: citeproc-hs + cpp-options: -D_CITEPROC if flag(executable) Buildable: True else Buildable: False +