2012-01-24 18:06:19 +01:00
|
|
|
{-# LANGUAGE ScopedTypeVariables, FlexibleInstances #-}
|
2007-11-04 00:27:58 +01:00
|
|
|
{-
|
2010-03-23 21:31:09 +01:00
|
|
|
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
|
2007-11-04 00:27:58 +01:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc
|
2010-03-23 21:31:09 +01:00
|
|
|
Copyright : Copyright (C) 2006-2010 John MacFarlane
|
2011-02-15 19:40:50 +01:00
|
|
|
License : GNU GPL, version 2 or above
|
2007-11-04 00:27:58 +01:00
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
2011-02-15 19:40:50 +01:00
|
|
|
Stability : alpha
|
2007-11-04 00:27:58 +01:00
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
This helper module exports the main writers, readers, and data
|
|
|
|
structure definitions from the Pandoc libraries.
|
|
|
|
|
|
|
|
A typical application will chain together a reader and a writer
|
|
|
|
to convert strings from one format to another. For example, the
|
|
|
|
following simple program will act as a filter converting markdown
|
|
|
|
fragments to reStructuredText, using reference-style links instead of
|
|
|
|
inline links:
|
|
|
|
|
|
|
|
> module Main where
|
|
|
|
> import Text.Pandoc
|
2009-12-31 17:48:21 +01:00
|
|
|
> -- include the following two lines only if you're using ghc < 6.12:
|
|
|
|
> import Prelude hiding (getContents, putStrLn)
|
|
|
|
> import System.IO.UTF8
|
2008-08-01 01:16:31 +02:00
|
|
|
>
|
2007-11-04 00:27:58 +01:00
|
|
|
> markdownToRST :: String -> String
|
2008-08-01 01:16:31 +02:00
|
|
|
> markdownToRST =
|
2012-07-26 07:35:41 +02:00
|
|
|
> (writeRST def {writerReferenceLinks = True}) . readMarkdown def
|
2011-02-15 19:40:50 +01:00
|
|
|
>
|
2009-12-31 17:48:21 +01:00
|
|
|
> main = getContents >>= putStrLn . markdownToRST
|
2007-11-04 00:27:58 +01:00
|
|
|
|
2009-10-05 00:09:23 +02:00
|
|
|
Note: all of the readers assume that the input text has @'\n'@
|
|
|
|
line endings. So if you get your input text from a web form,
|
|
|
|
you should remove @'\r'@ characters using @filter (/='\r')@.
|
|
|
|
|
2007-11-04 00:27:58 +01:00
|
|
|
-}
|
|
|
|
|
|
|
|
module Text.Pandoc
|
2011-02-15 19:40:50 +01:00
|
|
|
(
|
2007-11-04 00:27:58 +01:00
|
|
|
-- * Definitions
|
|
|
|
module Text.Pandoc.Definition
|
2010-12-24 22:39:27 +01:00
|
|
|
-- * Generics
|
|
|
|
, module Text.Pandoc.Generic
|
2012-07-25 19:45:45 +02:00
|
|
|
-- * Options
|
|
|
|
, module Text.Pandoc.Options
|
2010-12-11 02:30:32 +01:00
|
|
|
-- * Lists of readers and writers
|
|
|
|
, readers
|
|
|
|
, writers
|
2007-11-04 00:27:58 +01:00
|
|
|
-- * Readers: converting /to/ Pandoc format
|
|
|
|
, readMarkdown
|
|
|
|
, readRST
|
|
|
|
, readLaTeX
|
|
|
|
, readHtml
|
2010-09-16 11:26:12 +02:00
|
|
|
, readTextile
|
2012-05-13 04:29:39 +02:00
|
|
|
, readDocBook
|
2011-01-20 17:52:11 +01:00
|
|
|
, readNative
|
2007-11-04 00:27:58 +01:00
|
|
|
-- * Writers: converting /from/ Pandoc format
|
2012-07-25 04:05:38 +02:00
|
|
|
, Writer (..)
|
2010-07-04 22:55:48 +02:00
|
|
|
, writeNative
|
2007-11-04 00:27:58 +01:00
|
|
|
, writeMarkdown
|
2010-03-18 07:45:43 +01:00
|
|
|
, writePlain
|
2007-11-04 00:27:58 +01:00
|
|
|
, writeRST
|
|
|
|
, writeLaTeX
|
|
|
|
, writeConTeXt
|
2008-02-24 06:48:41 +01:00
|
|
|
, writeTexinfo
|
2007-11-04 00:27:58 +01:00
|
|
|
, writeHtml
|
|
|
|
, writeHtmlString
|
|
|
|
, writeDocbook
|
2008-03-19 19:45:51 +01:00
|
|
|
, writeOpenDocument
|
2007-11-04 00:27:58 +01:00
|
|
|
, writeMan
|
2008-07-27 05:25:51 +02:00
|
|
|
, writeMediaWiki
|
2010-04-10 21:38:07 +02:00
|
|
|
, writeTextile
|
2007-11-04 00:27:58 +01:00
|
|
|
, writeRTF
|
2010-07-03 05:12:14 +02:00
|
|
|
, writeODT
|
2012-01-03 21:10:10 +01:00
|
|
|
, writeDocx
|
2010-07-03 07:07:00 +02:00
|
|
|
, writeEPUB
|
2011-02-15 19:40:50 +01:00
|
|
|
, writeFB2
|
2010-12-04 11:27:39 +01:00
|
|
|
, writeOrg
|
2011-11-19 06:09:23 +01:00
|
|
|
, writeAsciiDoc
|
2009-12-31 02:10:57 +01:00
|
|
|
-- * Rendering templates and default templates
|
|
|
|
, module Text.Pandoc.Templates
|
2007-11-04 00:27:58 +01:00
|
|
|
-- * Version
|
|
|
|
, pandocVersion
|
2011-01-28 17:42:04 +01:00
|
|
|
-- * Miscellaneous
|
2012-08-10 04:11:19 +02:00
|
|
|
, getReader
|
|
|
|
, getWriter
|
2011-01-28 17:42:04 +01:00
|
|
|
, rtfEmbedImage
|
2011-01-23 02:52:25 +01:00
|
|
|
, jsonFilter
|
2012-01-24 18:06:19 +01:00
|
|
|
, ToJsonFilter(..)
|
2007-11-04 00:27:58 +01:00
|
|
|
) where
|
|
|
|
|
|
|
|
import Text.Pandoc.Definition
|
2010-12-24 22:39:27 +01:00
|
|
|
import Text.Pandoc.Generic
|
2007-11-04 00:27:58 +01:00
|
|
|
import Text.Pandoc.Readers.Markdown
|
|
|
|
import Text.Pandoc.Readers.RST
|
2012-04-15 01:44:21 +02:00
|
|
|
import Text.Pandoc.Readers.DocBook
|
2007-11-04 00:27:58 +01:00
|
|
|
import Text.Pandoc.Readers.LaTeX
|
|
|
|
import Text.Pandoc.Readers.HTML
|
2010-09-16 11:26:12 +02:00
|
|
|
import Text.Pandoc.Readers.Textile
|
2011-01-20 03:36:27 +01:00
|
|
|
import Text.Pandoc.Readers.Native
|
2010-07-04 22:55:48 +02:00
|
|
|
import Text.Pandoc.Writers.Native
|
2007-11-04 00:27:58 +01:00
|
|
|
import Text.Pandoc.Writers.Markdown
|
2011-02-15 19:40:50 +01:00
|
|
|
import Text.Pandoc.Writers.RST
|
2007-11-04 00:27:58 +01:00
|
|
|
import Text.Pandoc.Writers.LaTeX
|
|
|
|
import Text.Pandoc.Writers.ConTeXt
|
2008-02-24 06:48:41 +01:00
|
|
|
import Text.Pandoc.Writers.Texinfo
|
2007-11-04 00:27:58 +01:00
|
|
|
import Text.Pandoc.Writers.HTML
|
2010-07-03 05:12:14 +02:00
|
|
|
import Text.Pandoc.Writers.ODT
|
2012-01-03 21:10:10 +01:00
|
|
|
import Text.Pandoc.Writers.Docx
|
2010-07-03 07:07:00 +02:00
|
|
|
import Text.Pandoc.Writers.EPUB
|
2011-02-15 19:40:50 +01:00
|
|
|
import Text.Pandoc.Writers.FB2
|
2007-11-04 00:27:58 +01:00
|
|
|
import Text.Pandoc.Writers.Docbook
|
2008-03-19 19:45:51 +01:00
|
|
|
import Text.Pandoc.Writers.OpenDocument
|
2007-11-04 00:27:58 +01:00
|
|
|
import Text.Pandoc.Writers.Man
|
2011-02-15 19:40:50 +01:00
|
|
|
import Text.Pandoc.Writers.RTF
|
2008-07-27 05:25:51 +02:00
|
|
|
import Text.Pandoc.Writers.MediaWiki
|
2010-04-10 21:38:07 +02:00
|
|
|
import Text.Pandoc.Writers.Textile
|
2010-12-04 11:27:39 +01:00
|
|
|
import Text.Pandoc.Writers.Org
|
2011-11-19 06:09:23 +01:00
|
|
|
import Text.Pandoc.Writers.AsciiDoc
|
2009-12-31 02:10:57 +01:00
|
|
|
import Text.Pandoc.Templates
|
2012-07-25 19:45:45 +02:00
|
|
|
import Text.Pandoc.Options
|
2012-08-09 17:11:28 +02:00
|
|
|
import Text.Pandoc.Shared (safeRead)
|
2012-07-25 04:05:38 +02:00
|
|
|
import Data.ByteString.Lazy (ByteString)
|
2012-08-10 05:19:06 +02:00
|
|
|
import Data.List (intercalate)
|
2009-04-18 18:15:56 +02:00
|
|
|
import Data.Version (showVersion)
|
2010-12-11 09:06:03 +01:00
|
|
|
import Text.JSON.Generic
|
2012-08-09 16:42:40 +02:00
|
|
|
import Data.Set (Set)
|
|
|
|
import qualified Data.Set as Set
|
|
|
|
import Text.Parsec
|
2012-08-10 05:19:06 +02:00
|
|
|
import Text.Parsec.Error
|
2009-04-18 18:15:56 +02:00
|
|
|
import Paths_pandoc (version)
|
2007-11-04 00:27:58 +01:00
|
|
|
|
|
|
|
-- | Version number of pandoc library.
|
|
|
|
pandocVersion :: String
|
2009-04-18 18:15:56 +02:00
|
|
|
pandocVersion = showVersion version
|
2010-12-11 02:30:32 +01:00
|
|
|
|
2012-08-09 16:42:40 +02:00
|
|
|
parseFormatSpec :: String
|
|
|
|
-> Either ParseError (String, Set Extension -> Set Extension)
|
|
|
|
parseFormatSpec = parse formatSpec ""
|
|
|
|
where formatSpec = do
|
|
|
|
name <- formatName
|
|
|
|
extMods <- many extMod
|
|
|
|
return (name, foldl (.) id extMods)
|
|
|
|
formatName = many1 $ noneOf "-+"
|
|
|
|
extMod = do
|
|
|
|
polarity <- oneOf "-+"
|
2012-08-10 05:19:06 +02:00
|
|
|
name <- many $ noneOf "-+"
|
|
|
|
ext <- case safeRead ("Ext_" ++ name) of
|
2012-08-09 17:11:28 +02:00
|
|
|
Just n -> return n
|
2012-08-10 05:19:06 +02:00
|
|
|
Nothing
|
|
|
|
| name == "lhs" -> return Ext_literate_haskell
|
|
|
|
| otherwise -> fail $ "Unknown extension: " ++ name
|
2012-08-09 16:42:40 +02:00
|
|
|
return $ case polarity of
|
|
|
|
'-' -> Set.delete ext
|
|
|
|
_ -> Set.insert ext
|
|
|
|
|
2010-12-11 02:30:32 +01:00
|
|
|
-- | Association list of formats and readers.
|
2012-07-26 07:35:41 +02:00
|
|
|
readers :: [(String, ReaderOptions -> String -> Pandoc)]
|
2011-01-20 03:36:27 +01:00
|
|
|
readers = [("native" , \_ -> readNative)
|
2010-12-11 09:06:03 +01:00
|
|
|
,("json" , \_ -> decodeJSON)
|
2012-08-10 07:30:44 +02:00
|
|
|
,("markdown_strict" , readMarkdown)
|
2010-12-11 02:30:32 +01:00
|
|
|
,("markdown" , readMarkdown)
|
|
|
|
,("rst" , readRST)
|
2012-04-15 01:44:21 +02:00
|
|
|
,("docbook" , readDocBook)
|
2011-02-15 19:40:50 +01:00
|
|
|
,("textile" , readTextile) -- TODO : textile+lhs
|
2010-12-11 02:30:32 +01:00
|
|
|
,("html" , readHtml)
|
|
|
|
,("latex" , readLaTeX)
|
|
|
|
]
|
|
|
|
|
2012-07-25 04:05:38 +02:00
|
|
|
data Writer = PureStringWriter (WriterOptions -> Pandoc -> String)
|
|
|
|
| IOStringWriter (WriterOptions -> Pandoc -> IO String)
|
|
|
|
| IOByteStringWriter (WriterOptions -> Pandoc -> IO ByteString)
|
2011-01-23 02:52:25 +01:00
|
|
|
|
2012-07-25 04:05:38 +02:00
|
|
|
-- | Association list of formats and writers.
|
|
|
|
writers :: [ ( String, Writer ) ]
|
|
|
|
writers = [
|
|
|
|
("native" , PureStringWriter writeNative)
|
|
|
|
,("json" , PureStringWriter $ \_ -> encodeJSON)
|
|
|
|
,("docx" , IOByteStringWriter writeDocx)
|
|
|
|
,("odt" , IOByteStringWriter writeODT)
|
|
|
|
,("epub" , IOByteStringWriter writeEPUB)
|
|
|
|
,("fb2" , IOStringWriter writeFB2)
|
|
|
|
,("html" , PureStringWriter writeHtmlString)
|
|
|
|
,("html5" , PureStringWriter $ \o ->
|
|
|
|
writeHtmlString o{ writerHtml5 = True })
|
|
|
|
,("s5" , PureStringWriter $ \o ->
|
|
|
|
writeHtmlString o{ writerSlideVariant = S5Slides
|
|
|
|
, writerTableOfContents = False })
|
|
|
|
,("slidy" , PureStringWriter $ \o ->
|
|
|
|
writeHtmlString o{ writerSlideVariant = SlidySlides })
|
|
|
|
,("slideous" , PureStringWriter $ \o ->
|
|
|
|
writeHtmlString o{ writerSlideVariant = SlideousSlides })
|
|
|
|
,("dzslides" , PureStringWriter $ \o ->
|
|
|
|
writeHtmlString o{ writerSlideVariant = DZSlides
|
|
|
|
, writerHtml5 = True })
|
|
|
|
,("docbook" , PureStringWriter writeDocbook)
|
|
|
|
,("opendocument" , PureStringWriter writeOpenDocument)
|
|
|
|
,("latex" , PureStringWriter writeLaTeX)
|
|
|
|
,("beamer" , PureStringWriter $ \o ->
|
|
|
|
writeLaTeX o{ writerBeamer = True })
|
|
|
|
,("context" , PureStringWriter writeConTeXt)
|
|
|
|
,("texinfo" , PureStringWriter writeTexinfo)
|
|
|
|
,("man" , PureStringWriter writeMan)
|
|
|
|
,("markdown" , PureStringWriter writeMarkdown)
|
2012-08-10 07:30:44 +02:00
|
|
|
,("markdown_strict" , PureStringWriter writeMarkdown)
|
2012-07-25 04:05:38 +02:00
|
|
|
,("plain" , PureStringWriter writePlain)
|
|
|
|
,("rst" , PureStringWriter writeRST)
|
|
|
|
,("mediawiki" , PureStringWriter writeMediaWiki)
|
|
|
|
,("textile" , PureStringWriter writeTextile)
|
|
|
|
,("rtf" , PureStringWriter writeRTF)
|
|
|
|
,("org" , PureStringWriter writeOrg)
|
|
|
|
,("asciidoc" , PureStringWriter writeAsciiDoc)
|
|
|
|
]
|
2011-02-15 19:40:50 +01:00
|
|
|
|
2012-08-10 05:41:50 +02:00
|
|
|
getDefaultExtensions :: String -> Set Extension
|
2012-08-10 07:30:44 +02:00
|
|
|
getDefaultExtensions "markdown_strict" = strictExtensions
|
2012-08-10 05:41:50 +02:00
|
|
|
getDefaultExtensions _ = pandocExtensions
|
|
|
|
|
2012-08-10 05:19:06 +02:00
|
|
|
-- | Retrieve reader based on formatSpec (format+extensions).
|
2012-08-10 04:11:19 +02:00
|
|
|
getReader :: String -> Either String (ReaderOptions -> String -> Pandoc)
|
|
|
|
getReader s =
|
|
|
|
case parseFormatSpec s of
|
2012-08-10 05:19:06 +02:00
|
|
|
Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e]
|
2012-08-10 04:11:19 +02:00
|
|
|
Right (readerName, setExts) ->
|
|
|
|
case lookup readerName readers of
|
|
|
|
Nothing -> Left $ "Unknown reader: " ++ readerName
|
|
|
|
Just r -> Right $ \o ->
|
|
|
|
r o{ readerExtensions = setExts $
|
2012-08-10 05:41:50 +02:00
|
|
|
getDefaultExtensions readerName }
|
2012-08-10 04:11:19 +02:00
|
|
|
|
|
|
|
-- | Retrieve writer based on formatSpec (format+extensions).
|
|
|
|
getWriter :: String -> Either String Writer
|
|
|
|
getWriter s =
|
|
|
|
case parseFormatSpec s of
|
2012-08-10 05:19:06 +02:00
|
|
|
Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e]
|
2012-08-10 04:11:19 +02:00
|
|
|
Right (writerName, setExts) ->
|
|
|
|
case lookup writerName writers of
|
|
|
|
Nothing -> Left $ "Unknown writer: " ++ writerName
|
|
|
|
Just (PureStringWriter r) -> Right $ PureStringWriter $
|
|
|
|
\o -> r o{ writerExtensions = setExts $
|
2012-08-10 05:41:50 +02:00
|
|
|
getDefaultExtensions writerName }
|
2012-08-10 04:11:19 +02:00
|
|
|
Just (IOStringWriter r) -> Right $ IOStringWriter $
|
|
|
|
\o -> r o{ writerExtensions = setExts $
|
2012-08-10 05:41:50 +02:00
|
|
|
getDefaultExtensions writerName }
|
2012-08-10 04:11:19 +02:00
|
|
|
Just (IOByteStringWriter r) -> Right $ IOByteStringWriter $
|
|
|
|
\o -> r o{ writerExtensions = setExts $
|
2012-08-10 05:41:50 +02:00
|
|
|
getDefaultExtensions writerName }
|
2012-08-10 04:11:19 +02:00
|
|
|
|
2012-01-24 18:06:19 +01:00
|
|
|
{-# DEPRECATED jsonFilter "Use toJsonFilter instead" #-}
|
2011-01-23 02:52:25 +01:00
|
|
|
-- | Converts a transformation on the Pandoc AST into a function
|
|
|
|
-- that reads and writes a JSON-encoded string. This is useful
|
|
|
|
-- for writing small scripts.
|
|
|
|
jsonFilter :: (Pandoc -> Pandoc) -> String -> String
|
|
|
|
jsonFilter f = encodeJSON . f . decodeJSON
|
2012-01-24 18:06:19 +01:00
|
|
|
|
|
|
|
-- | 'toJsonFilter' convert a function into a filter that reads pandoc's json output
|
2012-01-25 05:49:05 +01:00
|
|
|
-- from stdin, transforms it by walking the AST and applying the specified
|
|
|
|
-- function, and writes the result as json to stdout. Usage example:
|
2012-01-24 18:06:19 +01:00
|
|
|
--
|
|
|
|
-- > -- capitalize.hs
|
2012-01-25 05:49:05 +01:00
|
|
|
-- > -- compile with: ghc --make capitalize
|
|
|
|
-- > -- run with: pandoc -t json | ./capitalize | pandoc -f json
|
2012-01-24 18:06:19 +01:00
|
|
|
-- >
|
|
|
|
-- > import Text.Pandoc
|
|
|
|
-- > import Data.Char (toUpper)
|
|
|
|
-- >
|
|
|
|
-- > main :: IO ()
|
|
|
|
-- > main = toJsonFilter capitalizeStrings
|
|
|
|
-- >
|
|
|
|
-- > capitalizeStrings :: Inline -> Inline
|
|
|
|
-- > capitalizeStrings (Str s) = Str $ map toUpper s
|
|
|
|
-- > capitalizeStrings x = x
|
|
|
|
--
|
2012-01-25 05:49:05 +01:00
|
|
|
-- The function can be any type @(a -> a)@, @(a -> IO a)@, @(a -> [a])@,
|
|
|
|
-- or @(a -> IO [a])@, where @a@ is an instance of 'Data'.
|
|
|
|
-- So, for example, @a@ can be 'Pandoc', 'Inline', 'Block', ['Inline'],
|
|
|
|
-- ['Block'], 'Meta', 'ListNumberStyle', 'Alignment', 'ListNumberDelim',
|
|
|
|
-- 'QuoteType', etc. See 'Text.Pandoc.Definition'.
|
2012-01-24 18:06:19 +01:00
|
|
|
class ToJsonFilter a where
|
|
|
|
toJsonFilter :: a -> IO ()
|
|
|
|
|
|
|
|
instance (Data a) => ToJsonFilter (a -> a) where
|
|
|
|
toJsonFilter f = getContents
|
|
|
|
>>= putStr . encodeJSON . (bottomUp f :: Pandoc -> Pandoc) . decodeJSON
|
|
|
|
|
|
|
|
instance (Data a) => ToJsonFilter (a -> IO a) where
|
|
|
|
toJsonFilter f = getContents >>= (bottomUpM f :: Pandoc -> IO Pandoc) . decodeJSON
|
|
|
|
>>= putStr . encodeJSON
|
2012-01-24 20:50:35 +01:00
|
|
|
|
|
|
|
instance (Data a) => ToJsonFilter (a -> [a]) where
|
|
|
|
toJsonFilter f = getContents
|
|
|
|
>>= putStr . encodeJSON . (bottomUp (concatMap f) :: Pandoc -> Pandoc) . decodeJSON
|
|
|
|
|
|
|
|
instance (Data a) => ToJsonFilter (a -> IO [a]) where
|
|
|
|
toJsonFilter f = getContents
|
|
|
|
>>= (bottomUpM (fmap concat . mapM f) :: Pandoc -> IO Pandoc) . decodeJSON
|
|
|
|
>>= putStr . encodeJSON
|