pandoc/src/Text/Pandoc.hs

401 lines
16 KiB
Haskell
Raw Normal View History

{-# LANGUAGE ScopedTypeVariables, FlexibleInstances, GADTs #-}
{-
Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc
Copyright : Copyright (C) 2006-2016 John MacFarlane
2011-02-15 19:40:50 +01:00
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
2011-02-15 19:40:50 +01:00
Stability : alpha
Portability : portable
This helper module exports the main writers, readers, and data
structure definitions from the Pandoc libraries.
A typical application will chain together a reader and a writer
to convert strings from one format to another. For example, the
following simple program will act as a filter converting markdown
fragments to reStructuredText, using reference-style links instead of
inline links:
> module Main where
> import Text.Pandoc
>
> markdownToRST :: String -> Either PandocError String
> markdownToRST =
> writeRST def {writerReferenceLinks = True} . readMarkdown def
2011-02-15 19:40:50 +01:00
>
> main = getContents >>= either error return markdownToRST >>= putStrLn
Note: all of the readers assume that the input text has @'\n'@
line endings. So if you get your input text from a web form,
you should remove @'\r'@ characters using @filter (/='\r')@.
-}
module Text.Pandoc
2011-02-15 19:40:50 +01:00
(
-- * Definitions
module Text.Pandoc.Definition
-- * Generics
, module Text.Pandoc.Generic
-- * Options
, module Text.Pandoc.Options
-- * Typeclass
, PandocMonad
, runIO
, runPure
, runIOorExplode
-- * Error handling
, module Text.Pandoc.Error
-- * Lists of readers and writers
, readers
-- , writers
, writers
-- * Readers: converting /to/ Pandoc format
, Reader (..)
, readDocx
, readOdt
, readMarkdown
, readCommonMark
, readMediaWiki
, readRST
, readOrg
, readLaTeX
, readHtml
2010-09-16 11:26:12 +02:00
, readTextile
2012-05-13 04:29:39 +02:00
, readDocBook
, readOPML
, readHaddock
, readNative
, readJSON
2014-07-07 20:21:38 +02:00
, readTWiki
, readTxt2Tags
2014-07-30 01:57:20 +02:00
, readEPUB
-- * Writers: converting /from/ Pandoc format
, Writer(..)
, writeNative
, writeJSON
, writeMarkdown
, writePlain
, writeRST
, writeLaTeX
, writeConTeXt
, writeTexinfo
, writeHtml4
, writeHtml4String
, writeHtml5
, writeHtml5String
2013-12-01 21:11:39 +01:00
, writeICML
, writeDocbook
, writeOPML
, writeOpenDocument
, writeMan
, writeMediaWiki
, writeDokuWiki
, writeZimWiki
2010-04-10 21:38:07 +02:00
, writeTextile
, writeRTF
, writeODT
, writeDocx
, writeEPUB2
, writeEPUB3
2011-02-15 19:40:50 +01:00
, writeFB2
, writeOrg
, writeAsciiDoc
, writeHaddock
, writeCommonMark
, writeCustom
2015-12-24 17:36:58 +01:00
, writeTEI
-- * Rendering templates and default templates
, module Text.Pandoc.Templates
-- * Miscellaneous
, getReader
, getWriter
, getDefaultExtensions
, pandocVersion
) where
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.CommonMark
import Text.Pandoc.Readers.MediaWiki
import Text.Pandoc.Readers.RST
import Text.Pandoc.Readers.Org
import Text.Pandoc.Readers.DocBook
import Text.Pandoc.Readers.OPML
import Text.Pandoc.Readers.LaTeX
import Text.Pandoc.Readers.HTML
2010-09-16 11:26:12 +02:00
import Text.Pandoc.Readers.Textile
import Text.Pandoc.Readers.Native
import Text.Pandoc.Readers.Haddock
2014-07-07 20:21:38 +02:00
import Text.Pandoc.Readers.TWiki
import Text.Pandoc.Readers.Docx
import Text.Pandoc.Readers.Odt
import Text.Pandoc.Readers.Txt2Tags
2014-07-30 01:57:20 +02:00
import Text.Pandoc.Readers.EPUB
import Text.Pandoc.Writers.Native
import Text.Pandoc.Writers.Markdown
2011-02-15 19:40:50 +01:00
import Text.Pandoc.Writers.RST
import Text.Pandoc.Writers.LaTeX
import Text.Pandoc.Writers.ConTeXt
import Text.Pandoc.Writers.Texinfo
import Text.Pandoc.Writers.HTML
import Text.Pandoc.Writers.ODT
import Text.Pandoc.Writers.Docx
import Text.Pandoc.Writers.EPUB
2011-02-15 19:40:50 +01:00
import Text.Pandoc.Writers.FB2
2013-12-01 21:11:39 +01:00
import Text.Pandoc.Writers.ICML
import Text.Pandoc.Writers.Docbook
import Text.Pandoc.Writers.OPML
import Text.Pandoc.Writers.OpenDocument
import Text.Pandoc.Writers.Man
2011-02-15 19:40:50 +01:00
import Text.Pandoc.Writers.RTF
import Text.Pandoc.Writers.MediaWiki
import Text.Pandoc.Writers.DokuWiki
import Text.Pandoc.Writers.ZimWiki
2010-04-10 21:38:07 +02:00
import Text.Pandoc.Writers.Textile
import Text.Pandoc.Writers.Org
import Text.Pandoc.Writers.AsciiDoc
import Text.Pandoc.Writers.Haddock
import Text.Pandoc.Writers.CommonMark
import Text.Pandoc.Writers.Custom
2015-12-24 17:36:58 +01:00
import Text.Pandoc.Writers.TEI
import Text.Pandoc.Templates
import Text.Pandoc.Options
2016-11-30 19:26:32 +01:00
import Text.Pandoc.Shared (safeRead, mapLeft, pandocVersion)
2015-02-18 20:56:59 +01:00
import Text.Pandoc.Error
import Text.Pandoc.Class
import Data.Aeson
import qualified Data.ByteString.Lazy as BL
import Data.List (intercalate)
2012-08-09 16:42:40 +02:00
import Text.Parsec
import Text.Parsec.Error
import qualified Text.Pandoc.UTF8 as UTF8
2016-11-28 23:13:46 +01:00
import Control.Monad.Except (throwError)
2012-08-09 16:42:40 +02:00
parseFormatSpec :: String
-> Either ParseError (String, Extensions -> Extensions)
2012-08-09 16:42:40 +02:00
parseFormatSpec = parse formatSpec ""
where formatSpec = do
name <- formatName
extMods <- many extMod
return (name, \x -> foldl (flip ($)) x extMods)
2012-08-09 16:42:40 +02:00
formatName = many1 $ noneOf "-+"
extMod = do
polarity <- oneOf "-+"
name <- many $ noneOf "-+"
ext <- case safeRead ("Ext_" ++ name) of
Just n -> return n
Nothing
| name == "lhs" -> return Ext_literate_haskell
| otherwise -> fail $ "Unknown extension: " ++ name
2012-08-09 16:42:40 +02:00
return $ case polarity of
'-' -> disableExtension ext
_ -> enableExtension ext
2012-08-09 16:42:40 +02:00
-- TODO: when we get the PandocMonad stuff all sorted out,
-- we can simply these types considerably. Errors/MediaBag can be
-- part of the monad's internal state.
2016-11-28 23:13:46 +01:00
data Reader m = StringReader (ReaderOptions -> String -> m Pandoc)
| ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)
2016-11-28 23:13:46 +01:00
-- mkStringReader :: (ReaderOptions -> String -> Either PandocError Pandoc) -> Reader IO
-- mkStringReader r = StringReader (\o s -> return $ r o s)
2016-11-28 23:13:46 +01:00
-- mkStringReaderWithWarnings :: (ReaderOptions -> String -> Either PandocError (Pandoc, [String])) -> Reader IO
-- mkStringReaderWithWarnings r = StringReader $ \o s ->
-- case r o s of
-- Left err -> return $ Left err
-- Right (doc, warnings) -> do
-- mapM_ warn warnings
-- return (Right doc)
2016-11-28 23:13:46 +01:00
-- mkBSReader :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)) -> Reader IO
-- mkBSReader r = ByteStringReader (\o s -> return $ r o s)
2016-11-28 23:13:46 +01:00
-- mkBSReaderWithWarnings :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag, [String])) -> Reader IO
-- mkBSReaderWithWarnings r = ByteStringReader $ \o s ->
-- case r o s of
-- Left err -> return $ Left err
-- Right (doc, mediaBag, warnings) -> do
-- mapM_ warn warnings
-- return $ Right (doc, mediaBag)
-- | Association list of formats and readers.
2016-11-28 23:13:46 +01:00
readers :: PandocMonad m => [(String, Reader m)]
readers = [ ("native" , StringReader readNative)
2016-11-28 23:13:46 +01:00
,("json" , StringReader $ \o s ->
case readJSON o s of
Right doc -> return doc
Left _ -> throwError $ PandocParseError "JSON parse error")
,("markdown" , StringReader readMarkdown)
,("markdown_strict" , StringReader readMarkdown)
,("markdown_phpextra" , StringReader readMarkdown)
,("markdown_github" , StringReader readMarkdown)
,("markdown_mmd", StringReader readMarkdown)
,("commonmark" , StringReader readCommonMark)
,("rst" , StringReader readRST)
2016-11-28 23:13:46 +01:00
,("mediawiki" , StringReader readMediaWiki)
,("docbook" , StringReader readDocBook)
,("opml" , StringReader readOPML)
,("org" , StringReader readOrg)
,("textile" , StringReader readTextile) -- TODO : textile+lhs
,("html" , StringReader readHtml)
,("latex" , StringReader readLaTeX)
,("haddock" , StringReader readHaddock)
,("twiki" , StringReader readTWiki)
,("docx" , ByteStringReader readDocx)
,("odt" , ByteStringReader readOdt)
2016-11-30 19:26:32 +01:00
,("t2t" , StringReader readTxt2Tags)
2016-11-28 23:13:46 +01:00
,("epub" , ByteStringReader readEPUB)
]
data Writer m = StringWriter (WriterOptions -> Pandoc -> m String)
| ByteStringWriter (WriterOptions -> Pandoc -> m BL.ByteString)
-- | Association list of formats and writers.
writers :: PandocMonad m => [ ( String, Writer m) ]
writers = [
("native" , StringWriter writeNative)
,("json" , StringWriter $ \o d -> return $ writeJSON o d)
,("docx" , ByteStringWriter writeDocx)
,("odt" , ByteStringWriter writeODT)
,("epub" , ByteStringWriter writeEPUB2)
,("epub2" , ByteStringWriter writeEPUB2)
,("epub3" , ByteStringWriter writeEPUB3)
,("fb2" , StringWriter writeFB2)
,("html" , StringWriter writeHtml5String)
,("html4" , StringWriter writeHtml4String)
,("html5" , StringWriter writeHtml5String)
,("icml" , StringWriter writeICML)
,("s5" , StringWriter $ \o ->
writeHtml4String o{ writerSlideVariant = S5Slides
, writerTableOfContents = False })
,("slidy" , StringWriter $ \o ->
writeHtml4String o{ writerSlideVariant = SlidySlides })
,("slideous" , StringWriter $ \o ->
writeHtml4String o{ writerSlideVariant = SlideousSlides })
,("dzslides" , StringWriter $ \o ->
writeHtml5String o{ writerSlideVariant = DZSlides })
,("revealjs" , StringWriter $ \o ->
writeHtml5String o{ writerSlideVariant = RevealJsSlides })
,("docbook" , StringWriter writeDocbook)
,("docbook5" , StringWriter $ \o ->
2016-04-30 00:43:15 +02:00
writeDocbook o{ writerDocbook5 = True })
,("opml" , StringWriter writeOPML)
,("opendocument" , StringWriter writeOpenDocument)
,("latex" , StringWriter writeLaTeX)
,("beamer" , StringWriter $ \o ->
writeLaTeX o{ writerBeamer = True })
,("context" , StringWriter writeConTeXt)
,("texinfo" , StringWriter writeTexinfo)
,("man" , StringWriter writeMan)
,("markdown" , StringWriter writeMarkdown)
,("markdown_strict" , StringWriter writeMarkdown)
,("markdown_phpextra" , StringWriter writeMarkdown)
,("markdown_github" , StringWriter writeMarkdown)
,("markdown_mmd" , StringWriter writeMarkdown)
,("plain" , StringWriter writePlain)
,("rst" , StringWriter writeRST)
,("mediawiki" , StringWriter writeMediaWiki)
,("dokuwiki" , StringWriter writeDokuWiki)
,("zimwiki" , StringWriter writeZimWiki)
,("textile" , StringWriter writeTextile)
,("rtf" , StringWriter writeRTF)
,("org" , StringWriter writeOrg)
,("asciidoc" , StringWriter writeAsciiDoc)
,("haddock" , StringWriter writeHaddock)
,("commonmark" , StringWriter writeCommonMark)
,("tei" , StringWriter writeTEI)
]
2011-02-15 19:40:50 +01:00
getDefaultExtensions :: String -> Extensions
2012-08-10 07:30:44 +02:00
getDefaultExtensions "markdown_strict" = strictExtensions
getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions
getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
getDefaultExtensions "markdown_github" = githubMarkdownExtensions
getDefaultExtensions "markdown" = pandocExtensions
getDefaultExtensions "plain" = plainExtensions
getDefaultExtensions "org" = extensionsFromList
[Ext_citations,
Ext_auto_identifiers]
getDefaultExtensions "html" = extensionsFromList
[Ext_auto_identifiers,
Ext_native_divs,
Ext_native_spans]
getDefaultExtensions "html4" = getDefaultExtensions "html"
getDefaultExtensions "html5" = getDefaultExtensions "html"
getDefaultExtensions "epub" = extensionsFromList
[Ext_raw_html,
Ext_native_divs,
Ext_native_spans,
Ext_epub_html_exts]
getDefaultExtensions "epub2" = getDefaultExtensions "epub"
getDefaultExtensions "epub3" = getDefaultExtensions "epub"
getDefaultExtensions "latex" = extensionsFromList
[Ext_smart,
Ext_auto_identifiers]
getDefaultExtensions "context" = extensionsFromList
[Ext_smart,
Ext_auto_identifiers]
getDefaultExtensions "textile" = extensionsFromList
[Ext_old_dashes,
Ext_smart,
Ext_auto_identifiers]
getDefaultExtensions _ = extensionsFromList
[Ext_auto_identifiers]
-- | Retrieve reader based on formatSpec (format+extensions).
2016-11-28 23:13:46 +01:00
getReader :: PandocMonad m => String -> Either String (Reader m)
getReader s =
case parseFormatSpec s of
2015-11-22 16:37:51 +01:00
Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e]
Right (readerName, setExts) ->
case lookup readerName readers of
Nothing -> Left $ "Unknown reader: " ++ readerName
Just (StringReader r) -> Right $ StringReader $ \o ->
r o{ readerExtensions = setExts $
getDefaultExtensions readerName }
Just (ByteStringReader r) -> Right $ ByteStringReader $ \o ->
r o{ readerExtensions = setExts $
getDefaultExtensions readerName }
getWriter :: PandocMonad m => String -> Either String (Writer m)
getWriter s
= case parseFormatSpec s of
2015-11-22 16:37:51 +01:00
Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e]
Right (writerName, setExts) ->
case lookup writerName writers of
Nothing -> Left $ "Unknown writer: " ++ writerName
Just (StringWriter r) -> Right $ StringWriter $
\o -> r o{ writerExtensions = setExts $
getDefaultExtensions writerName }
Just (ByteStringWriter r) -> Right $ ByteStringWriter $
\o -> r o{ writerExtensions = setExts $
getDefaultExtensions writerName }
2015-02-18 20:56:59 +01:00
readJSON :: ReaderOptions -> String -> Either PandocError Pandoc
2016-12-01 18:13:51 +01:00
readJSON _ = mapLeft PandocParseError . eitherDecode' . UTF8.fromStringLazy
writeJSON :: WriterOptions -> Pandoc -> String
writeJSON _ = UTF8.toStringLazy . encode