Integrate server into main pandoc.

- Remove server flag.
- Remove pandoc-server executable.
- Add Text.Pandoc.Server as exposed module. [API change]
- Re-use Opt (and our existing FromJSON instance) for Params.
- Document.
This commit is contained in:
John MacFarlane 2022-08-16 16:27:31 -07:00
parent 90d52b7129
commit 8ddc2fc79a
13 changed files with 604 additions and 570 deletions

View file

@ -56,7 +56,7 @@ jobs:
testopts: '--test-option=--hide-successes --test-option=--ansi-tricks=false'
- ghc: '8.10.7'
cabal: '3.2'
cabalopts: '-fserver'
cabalopts: ''
testopts: '--test-option=--hide-successes --test-option=--ansi-tricks=false'
- ghc: '9.0.2'
cabal: '3.4'

View file

@ -96,7 +96,7 @@ jobs:
run: |
stack --no-terminal setup
stack --no-terminal update
stack --no-terminal install
stack --no-terminal install --ghc-options='-j4 +RTS -A256m -RTS -split-sections'
- name: Create artifacts
run: |
@ -112,8 +112,13 @@ jobs:
mkdir -p ${DEST}/bin
mkdir -p ${DEST}/share/man/man1
cp ~/.local/bin/pandoc ${DEST}/bin/
strip ${DEST}/bin/pandoc
SRCDIR=$(pwd)
cd ${DEST}/bin
strip pandoc
ln -s pandoc pandoc-server
cd ${SRCDIR}
cp man/pandoc.1 ${DEST}/share/man/man1/pandoc.1
cp man/pandoc-server.1 ${DEST}/share/man/man1/pandoc-server.1
~/.local/bin/pandoc -s COPYING.md -Vpagetitle=License -o ${RESOURCES}/license.html
chown -R $ME:staff ${ROOT}
sed -e "s/PANDOCVERSION/${VERSION}/" macos/distribution.xml.in > ${ARTIFACTS}/distribution.xml

View file

@ -102,7 +102,7 @@ man/pandoc.1: MANUAL.txt man/pandoc.1.before man/pandoc.1.after
--variable footer="pandoc $(version)" \
-o $@
man/pandoc-server.1: server/pandoc-server.md
man/pandoc-server.1: doc/pandoc-server.md
pandoc $< -f markdown -t man -s \
--lua-filter man/manfilter.lua \
--variable footer="pandoc-server $(version)" \

View file

@ -14,7 +14,18 @@ module Main where
import qualified Control.Exception as E
import Text.Pandoc.App (convertWithOpts, defaultOpts, options, parseOptions)
import Text.Pandoc.Error (handleError)
import Text.Pandoc.Server (ServerOpts(..), parseServerOpts, app)
import System.Environment (getProgName)
import qualified Network.Wai.Handler.CGI as CGI
import qualified Network.Wai.Handler.Warp as Warp
import Network.Wai.Middleware.Timeout (timeout)
main :: IO ()
main = E.catch (parseOptions options defaultOpts >>= convertWithOpts)
(handleError . Left)
main = E.handle (handleError . Left) $ do
prg <- getProgName
case prg of
"pandoc-server.cgi" -> CGI.run (timeout 2 app)
"pandoc-server" -> do
sopts <- parseServerOpts
Warp.run (serverPort sopts) (timeout (serverTimeout sopts) app)
_ -> parseOptions options defaultOpts >>= convertWithOpts

View file

@ -12,16 +12,17 @@ date: August 15, 2022
`pandoc-server` is a web server that can perform pandoc
conversions. It can be used either as a running server
or as a CGI program. To use `pandoc-server` as a CGI
program, rename it (or symlink it) as `pandoc-server.cgi`.
(Note: if you symlink it, you may need to adjust your
webserver's configuration in order to allow it to follow
symlinks for the CGI script.)
or as a CGI program.
To use `pandoc-server` as a CGI program, rename it (or symlink
it) as `pandoc-server.cgi`. (Note: if you symlink it, you may
need to adjust your webserver's configuration in order to allow
it to follow symlinks for the CGI script.)
All pandoc functions are run in the PandocPure monad, which
ensures that they can do no I/O operations on the server.
This should provide a high degree of security. It does,
however, impose certain limitations:
This should provide a high degree of security. This security
does, however, impose certain limitations:
- PDFs cannot be produced.
@ -85,17 +86,38 @@ the first one given is the default.
: The output format, possibly with extensions, just as it is
specified on the pandoc command line.
`wrapText` (`"auto"|"preserve"|"none"`)
`shift-heading-level-by` (integer, default 0)
: Text wrapping option: either `"auto"` (automatic
hard-wrapping to fit within a column width), `"preserve"`
(insert newlines where they are present in the source),
or `"none"` (don't insert any unnecessary newlines at all).
: Increase or decrease the level of all headings.
`columns` (integer, default 72)
`indented-code-classes` (array of strings)
: Column width (affects text wrapping and calculation of
table column widths in plain text formats)
: List of classes to be applied to indented Markdown code blocks.
`default-image-extension` (string)
: Extension to be applied to image sources that lack extensions
(e.g. `".jpg"`).
`metadata` (JSON map)
: String-valued metadata.
`tab-stop` (integer, default 4)
: Tab stop (spaces per tab).
`track-changes` (`"accept"|"reject"|"all"`)
: Specifies what to do with insertions, deletions, and
comments produced by the MS Word "Track Changes" feature. Only
affects docx input.
`abbreviations` (file path)
: List of strings to be regarded as abbreviations when
parsing Markdown. See `--abbreviations` in `pandoc(1)` for
details.
`standalone` (boolean, default false)
@ -108,119 +130,7 @@ the first one given is the default.
: String contents of a document template (see Templates in
`pandoc(1)` for the format).
`tabStop` (integer, default 4)
: Tab stop (spaces per tab).
`indentedCodeClasses` (array of strings)
: List of classes to be applied to indented Markdown code blocks.
`abbreviations` (array of strings)
: List of strings to be regarded as abbreviations when
parsing Markdown. See `--abbreviations` in `pandoc(1)` for
details.
`defaultImageExtension` (string)
: Extension to be applied to image sources that lack extensions
(e.g. `".jpg"`).
`trackChanges` (`"accept"|"reject"|"all"`)
: Specifies what to do with insertions, deletions, and
comments produced by the MS Word "Track Changes" feature. Only
affects docx input.
`stripComments` (boolean, default false)
: Causes HTML comments to be stripped in Markdown or Textile
source, instead of being passed through to the output format.
`citeproc` (boolean, default false)
: Causes citations to be processed using citeproc. See
Citations in `pandoc(1)` for details.
`citeMethod` (`"citeproc"|"natbib"|"biblatex"`)
: Determines how citations are formatted in LaTeX output.
`tableOfContents` (boolean, default false)
: Include a table of contents (in supported formats).
`tocDepth` (integer, default 3)
: Depth of sections to include in the table of contents.
`numberSections` (boolean, default false)
: Automatically number sections (in supported formats).
`numberOffset` (array of integers)
: Offsets to be added to each component of the section number.
For example, `[1]` will cause the first section to be
numbered "2" and the first subsection "2.1"; `[0,1]` will
cause the first section to be numbered "1" and the first
subsection "1.2."
`identifierPrefix` (string)
: Prefix to be added to all automatically-generated identifiers.
`sectionDivs` (boolean, default false)
: Arrange the document into a hierarchy of nested sections
based on the headings.
`htmlQTags` (boolean, default false)
: Use `<q>` elements in HTML instead of literal quotation marks.
`listings` (boolean, default false)
: Use the `listings` package to format code in LaTeX output.
`referenceLinks` (boolean, default false)
: Create reference links rather than inline links in Markdown output.
`setextHeaders` (boolean, default false)
: Use Setext (underlined) headings instead of ATX (`#`-prefixed)
in Markdown output.
`preferAscii` (boolean, default false)
: Use entities and escapes when possible to avoid non-ASCII
characters in the output.
`referenceLocation` (`"document"|"section"|"block"`)
: Determines whether link references and footnotes are placed
at the end of the document, the end of the section, or the
end of the block (e.g. paragraph), in
certain formats. (See `pandoc(1)` under `--reference-location`.)
`topLevelDivision` (`"default"|"part"|"chapter"|"section"`)
: Determines how top-level headings are interpreted in
LaTeX, ConTeXt, DocBook, and TEI. The `"default"` value
tries to choose the best interpretation based on heuristics.
`emailObfuscation` (`"none"|"references"|"javascript"`)
: Determines how email addresses are obfuscated in HTML.
`htmlMathMethod` (`"plain"|"webtex"|"gladtex"|"mathml"|"mathjax"|"katex"`)
: Determines how math is represented in HTML.
`variables` (JSON mapping)
`variables` (JSON map)
: Variables to be interpolated in the template. (See Templates
in `pandoc(1)`.)
@ -230,17 +140,32 @@ the first one given is the default.
: Dots-per-inch to use for conversions between pixels and
other measurements (for image sizes).
`incremental` (boolean, default false)
`wrap` (`"auto"|"preserve"|"none"`)
: If true, lists appear incrementally by default in slide shows.
: Text wrapping option: either `"auto"` (automatic
hard-wrapping to fit within a column width), `"preserve"`
(insert newlines where they are present in the source),
or `"none"` (don't insert any unnecessary newlines at all).
`slideLevel` (integer)
`columns` (integer, default 72)
: Heading level that deterimes slide divisions in slide shows.
The default is to pick the highest heading level under which
there is body text.
: Column width (affects text wrapping and calculation of
table column widths in plain text formats)
`highlightStyle` (string, default `"pygments"`)
`table-of-contents` (boolean, default false)
: Include a table of contents (in supported formats).
`toc-depth` (integer, default 3)
: Depth of sections to include in the table of contents.
`strip-comments` (boolean, default false)
: Causes HTML comments to be stripped in Markdown or Textile
source, instead of being passed through to the output format.
`highlight-style` (string, default `"pygments"`)
: Specify the style to use for syntax highlighting of code.
Standard styles are `"pygments"` (the default), `"kate"`,
@ -250,27 +175,149 @@ the first one given is the default.
case, the relevant file contents must also be included
in `files`, see below).
`epubMetadata` (string)
`embed-resources`
: Dublin core XML elements to be used for EPUB metadata.
: Embed images, scripts, styles and other resources in an HTML
document using `data` URIs. Note that this will not work
unless the contents of all external resources are included
under `files`.
`epubChapterLevel` (integer, default 1)
`html-q-tags` (boolean, default false)
: Use `<q>` elements in HTML instead of literal quotation marks.
`ascii` (boolean, default false)
: Use entities and escapes when possible to avoid non-ASCII
characters in the output.
`reference-links` (boolean, default false)
: Create reference links rather than inline links in Markdown output.
`referenceLocation` (`"document"|"section"|"block"`)
: Determines whether link references and footnotes are placed
at the end of the document, the end of the section, or the
end of the block (e.g. paragraph), in
certain formats. (See `pandoc(1)` under `--reference-location`.)
`setext-headers` (boolean, default false)
: Use Setext (underlined) headings instead of ATX (`#`-prefixed)
in Markdown output.
`top-level-division` (`"default"|"part"|"chapter"|"section"`)
: Determines how top-level headings are interpreted in
LaTeX, ConTeXt, DocBook, and TEI. The `"default"` value
tries to choose the best interpretation based on heuristics.
`number-sections` (boolean, default false)
: Automatically number sections (in supported formats).
`number-offset` (array of integers)
: Offsets to be added to each component of the section number.
For example, `[1]` will cause the first section to be
numbered "2" and the first subsection "2.1"; `[0,1]` will
cause the first section to be numbered "1" and the first
subsection "1.2."
`html-math-method` (`"plain"|"webtex"|"gladtex"|"mathml"|"mathjax"|"katex"`)
: Determines how math is represented in HTML.
`listings` (boolean, default false)
: Use the `listings` package to format code in LaTeX output.
`incremental` (boolean, default false)
: If true, lists appear incrementally by default in slide shows.
`slide-level` (integer)
: Heading level that deterimes slide divisions in slide shows.
The default is to pick the highest heading level under which
there is body text.
`section-divs` (boolean, default false)
: Arrange the document into a hierarchy of nested sections
based on the headings.
`email-obfuscation` (`"none"|"references"|"javascript"`)
: Determines how email addresses are obfuscated in HTML.
`identifier-prefix` (string)
: Prefix to be added to all automatically-generated identifiers.
`title-prefix` (string)
: Prefix to be added to the title in the HTML header.
`reference-doc` (file path)
: Reference doc to use in creating `docx` or `odt` or `pptx`.
See `pandoc(1)` under `--reference-doc` for details.
The contents of the file must be included under `files`.
`epub-cover-image` (file path)
: Cover image for EPUB.
The contents of the file must be included under `files`.
`epub-metadata` (file path)
: Path of file containing Dublin core XML elements to be used for
EPUB metadata. The contents of the file must be included
under `files`.
`epub-chapter-level` (integer, default 1)
: Heading level at which chapter splitting occurs in EPUBs.
`epubSubdirectory` (string, default "EPUB")
`epub-subdirectory` (string, default "EPUB")
: Name of content subdirectory in the EPUB container.
`epubFonts` (array of file paths)
`epub-fonts` (array of file paths)
: Fonts to include in the EPUB. The fonts themselves must be
included in `files` (see below).
`referenceDoc` (file path)
`ipynb-output` (`"best"|"all"|"none"`)
: Reference doc to use in creating `docx` or `odt` or `pptx`.
See `pandoc(1)` under `--reference-doc` for details.
: Determines how ipynb output cells are treated. `all` means
that all of the data formats included in the original are
preserved. `none` means that the contents of data cells
are omitted. `best` causes pandoc to try to pick the
richest data block in each output cell that is compatible
with the output format.
`citeproc` (boolean, default false)
: Causes citations to be processed using citeproc. See
Citations in `pandoc(1)` for details.
`bibliography` (array of file paths)
: Files containing bibliographic data. The contents of the
files must be included in `files`.
`csl` (file path)
: CSL style file. The contents of the file must be included
in `files`.
`cite-method` (`"citeproc"|"natbib"|"biblatex"`)
: Determines how citations are formatted in LaTeX output.
`files` (JSON mapping of file paths to base64-encoded strings)
@ -280,7 +327,6 @@ the first one given is the default.
left as it is, unless it is *also* valid base 64 data,
in which case it will be interpreted that way.
## `/batch` endpoint
The `/batch` endpoint behaves like the root endpoint,

View file

@ -27,42 +27,44 @@ ghc --version
cabal update
cabal clean
cabal configure -fserver -f-export-dynamic -fembed_data_files --enable-executable-static --ghc-options '-j4 +RTS -A256m -RTS -split-sections -optc-Os -optl=-pthread' pandoc pandoc-server
cabal configure -f-export-dynamic -fembed_data_files --enable-executable-static --ghc-options '-j4 +RTS -A256m -RTS -split-sections -optc-Os -optl=-pthread' pandoc
cabal build -j4
for f in $(find dist-newstyle -name 'pandoc' -type f -perm /400); do cp $f $ARTIFACTS/; done
for f in $(find dist-newstyle -name 'pandoc-server' -type f -perm /400); do cp $f /$ARTIFACTS/; done
# Confirm that we have static builds
file $ARTIFACTS/pandoc | grep "statically linked"
file $ARTIFACTS/pandoc-server | grep "statically linked"
# make deb for EXE
make_deb() {
VERSION=`$ARTIFACTS/$EXE --version | awk '{print $2; exit;}'`
VERSION=`$ARTIFACTS/pandoc --version | awk '{print $2; exit;}'`
REVISION=${REVISION:-1}
DEBVER=$VERSION-$REVISION
BASE=$EXE-$DEBVER-$ARCHITECTURE
BASE=pandoc-$DEBVER-$ARCHITECTURE
DIST=/mnt/$BASE
DEST=$DIST/usr
COPYRIGHT=$DEST/share/doc/$EXE/copyright
COPYRIGHT=$DEST/share/doc/pandoc/copyright
cd /mnt
mkdir -p $DEST/bin
mkdir -p $DEST/share/man/man1
mkdir -p $DEST/share/doc/$EXE
mkdir -p $DEST/share/doc/pandoc
find $DIST -type d | xargs chmod 755
cp $ARTIFACTS/$EXE $DEST/bin/
strip $DEST/bin/$EXE
cp /mnt/man/$EXE.1 $DEST/share/man/man1/$EXE.1
gzip -9 $DEST/share/man/man1/$EXE.1
cp $ARTIFACTS/pandoc $DEST/bin/
cd $DEST/bin
strip pandoc
ln -s pandoc pandoc-server
cd /mnt
cp /mnt/man/pandoc.1 $DEST/share/man/man1/pandoc.1
gzip -9 $DEST/share/man/man1/pandoc.1
cp /mnt/man/pandoc-server.1 $DEST/share/man/man1/pandoc-server.1
gzip -9 $DEST/share/man/man1/pandoc-server.1
cp /mnt/COPYRIGHT $COPYRIGHT
echo "" >> $COPYRIGHT
INSTALLED_SIZE=$(du -k -s $DEST | awk '{print $1}')
mkdir $DIST/DEBIAN
perl -pe "s/VERSION/$DEBVER/" /mnt/linux/$EXE.control.in | \
perl -pe "s/VERSION/$DEBVER/" /mnt/linux/control.in | \
perl -pe "s/ARCHITECTURE/$ARCHITECTURE/" | \
perl -pe "s/INSTALLED_SIZE/$INSTALLED_SIZE/" \
> $DIST/DEBIAN/control
@ -73,26 +75,28 @@ make_deb() {
cp $BASE.deb $ARTIFACTS/
}
# Make tarball for EXE
# Make tarball for pandoc
make_tarball() {
TARGET=$EXE-$VERSION
TARGET=pandoc-$VERSION
cd $ARTIFACTS
rm -rf $TARGET
mkdir $TARGET
mkdir $TARGET/bin $TARGET/share $TARGET/share/man $TARGET/share/man/man1
cp /mnt/man/$EXE.1 $TARGET/share/man/man1
mv $EXE $TARGET/bin
strip $TARGET/bin/$EXE
gzip -9 $TARGET/share/man/man1/$EXE.1
cp /mnt/man/pandoc.1 $TARGET/share/man/man1
cp /mnt/man/pandoc-server.1 $TARGET/share/man/man1
mv pandoc $TARGET/bin
cd $TARGET/bin
strip pandoc
ln -s pandoc pandoc-server
cd $ARTIFACTS
gzip -9 $TARGET/share/man/man1/pandoc.1
gzip -9 $TARGET/share/man/man1/pandoc-server.1
tar cvzf $TARGET-linux-$ARCHITECTURE.tar.gz $TARGET
rm -r $TARGET
}
for EXE in pandoc pandoc-server
do
make_deb
make_tarball
done
make_deb
make_tarball
exit 0

View file

@ -1,9 +0,0 @@
Package: pandoc-server
Version: VERSION
Section: text
Priority: optional
Architecture: ARCHITECTURE
Installed-Size: INSTALLED_SIZE
Depends: libc6 (>= 2.13), libgmp10, zlib1g (>= 1:1.1.4)
Maintainer: John MacFarlane <jgm@berkeley.edu>
Description: HTTP server for pandoc document format converter

View file

@ -429,10 +429,6 @@ flag lua53
Description: Embed Lua 5.3 instead of 5.4.
Default: False
flag server
Description: Build pandoc-server executable.
Default: False
flag nightly
Description: Add '-nightly-COMPILEDATE' to the output of '--version'.
Default: False
@ -538,7 +534,10 @@ library
xml-types >= 0.3 && < 0.4,
yaml >= 0.11 && < 0.12,
zip-archive >= 0.2.3.4 && < 0.5,
zlib >= 0.5 && < 0.7
zlib >= 0.5 && < 0.7,
servant-server,
wai >= 0.3
if !os(windows)
build-depends: unix >= 2.4 && < 2.8
if flag(nightly)
@ -564,6 +563,7 @@ library
Text.Pandoc.MediaBag,
Text.Pandoc.Error,
Text.Pandoc.Filter,
Text.Pandoc.Server,
Text.Pandoc.Readers,
Text.Pandoc.Readers.HTML,
Text.Pandoc.Readers.LaTeX,
@ -789,32 +789,8 @@ executable pandoc
main-is: pandoc.hs
buildable: True
other-modules: Paths_pandoc
executable pandoc-server
import: common-executable
main-is: Main.hs
other-modules: PandocServer
hs-source-dirs: server
if flag(server)
build-depends: base,
pandoc,
aeson,
text,
containers,
data-default,
bytestring,
skylighting,
base64 >= 0.4,
doctemplates,
servant-server,
wai >= 0.3,
wai-extra >= 3.0.24,
warp,
optparse-applicative
buildable: True
else
buildable: False
build-depends: wai-extra >= 3.0.24,
warp
test-suite test-pandoc
import: common-executable

View file

@ -1,54 +0,0 @@
module Main where
import PandocServer (app)
import Text.Pandoc (pandocVersion)
import Control.Monad (when)
import qualified Network.Wai.Handler.CGI as CGI
import qualified Network.Wai.Handler.Warp as Warp
import Network.Wai.Middleware.Timeout (timeout)
import System.Environment (getProgName)
import Options.Applicative
import System.Exit (exitWith, ExitCode(ExitSuccess))
import Data.Text as T
data Opts = Opts
{ optPort :: Warp.Port,
optTimeout :: Int, -- seconds
optVersion :: Bool }
options :: Parser Opts
options = Opts
<$> option auto
( long "port"
<> value 3030
<> metavar "PORT"
<> help "Port to serve on" )
<*> option auto
( long "timeout"
<> value 2
<> metavar "SECONDS"
<> help "Seconds timeout" )
<*> flag False True
( long "version"
<> help "Print version" )
main :: IO ()
main = do
progname <- getProgName
let optspec = info (options <**> helper)
( fullDesc
<> progDesc "Run a pandoc server"
<> header "pandoc-server - text conversion server" )
opts <- execParser optspec
when (optVersion opts) $ do
putStrLn $ progname <> " " <> T.unpack pandocVersion
exitWith ExitSuccess
let port = optPort opts
let app' = timeout (optTimeout opts) app
if progname == "pandoc-server.cgi"
then -- operate as a CGI script
CGI.run app'
else -- operate as a persistent server
Warp.run port app'

View file

@ -1,301 +0,0 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module PandocServer
( app
, Params(..)
) where
import Data.Aeson
import Data.Aeson.TH
import Network.Wai
import Servant
import Text.DocTemplates as DocTemplates
import Text.Pandoc
import Text.Pandoc.Citeproc (processCitations)
import Text.Pandoc.Highlighting (lookupHighlightingStyle)
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import Data.Maybe (fromMaybe)
import Data.Char (isAlphaNum)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BL
import Data.ByteString.Base64 (decodeBase64, encodeBase64)
import Data.Default
import Data.Map (Map)
import Data.Set (Set)
import Skylighting (defaultSyntaxMap)
newtype Blob = Blob BL.ByteString
deriving (Show, Eq)
instance ToJSON Blob where
toJSON (Blob bs) = toJSON (encodeBase64 $ BL.toStrict bs)
instance FromJSON Blob where
parseJSON = withText "Blob" $ \t -> do
let inp = UTF8.fromText t
case decodeBase64 inp of
Right bs -> return $ Blob $ BL.fromStrict bs
Left _ -> -- treat as regular text
return $ Blob $ BL.fromStrict inp
-- This is the data to be supplied by the JSON payload
-- of requests. Maybe values may be omitted and will be
-- given default values.
data Params = Params
{ text :: Text
, from :: Maybe Text
, to :: Maybe Text
, wrapText :: Maybe WrapOption
, columns :: Maybe Int
, standalone :: Maybe Bool
, template :: Maybe Text
, tabStop :: Maybe Int
, indentedCodeClasses :: Maybe [Text]
, abbreviations :: Maybe (Set Text)
, defaultImageExtension :: Maybe Text
, trackChanges :: Maybe TrackChanges
, stripComments :: Maybe Bool
, citeproc :: Maybe Bool
, variables :: Maybe (DocTemplates.Context Text)
, tableOfContents :: Maybe Bool
, incremental :: Maybe Bool
, htmlMathMethod :: Maybe HTMLMathMethod
, numberSections :: Maybe Bool
, numberOffset :: Maybe [Int]
, sectionDivs :: Maybe Bool
, referenceLinks :: Maybe Bool
, dpi :: Maybe Int
, emailObfuscation :: Maybe ObfuscationMethod
, identifierPrefix :: Maybe Text
, citeMethod :: Maybe CiteMethod
, htmlQTags :: Maybe Bool
, slideLevel :: Maybe Int
, topLevelDivision :: Maybe TopLevelDivision
, listings :: Maybe Bool
, highlightStyle :: Maybe Text
, setextHeaders :: Maybe Bool
, epubSubdirectory :: Maybe Text
, epubFonts :: Maybe [FilePath]
, epubMetadata :: Maybe Text
, epubChapterLevel :: Maybe Int
, tocDepth :: Maybe Int
, referenceDoc :: Maybe FilePath
, referenceLocation :: Maybe ReferenceLocation
, preferAscii :: Maybe Bool
, files :: Maybe (Map FilePath Blob)
} deriving (Show)
instance Default Params where
def = Params
{ text = ""
, from = Nothing
, to = Nothing
, wrapText = Nothing
, columns = Nothing
, standalone = Nothing
, template = Nothing
, tabStop = Nothing
, indentedCodeClasses = Nothing
, abbreviations = Nothing
, defaultImageExtension = Nothing
, trackChanges = Nothing
, stripComments = Nothing
, citeproc = Nothing
, variables = Nothing
, tableOfContents = Nothing
, incremental = Nothing
, htmlMathMethod = Nothing
, numberSections = Nothing
, numberOffset = Nothing
, sectionDivs = Nothing
, referenceLinks = Nothing
, dpi = Nothing
, emailObfuscation = Nothing
, identifierPrefix = Nothing
, citeMethod = Nothing
, htmlQTags = Nothing
, slideLevel = Nothing
, topLevelDivision = Nothing
, listings = Nothing
, highlightStyle = Nothing
, setextHeaders = Nothing
, epubSubdirectory = Nothing
, epubMetadata = Nothing
, epubChapterLevel = Nothing
, epubFonts = Nothing
, tocDepth = Nothing
, referenceDoc = Nothing
, referenceLocation = Nothing
, preferAscii = Nothing
, files = Nothing
}
-- TODO:
-- shiftHeadingLevelBy
-- metadata
-- selfContained
-- embedResources
-- epubCoverImage
-- stripEmptyParagraphs
-- titlePrefix
-- ipynbOutput
-- eol
-- csl
-- bibliography
-- citationAbbreviations
-- Automatically derive code to convert to/from JSON.
$(deriveJSON defaultOptions ''Params)
-- This is the API. The "/convert" endpoint takes a request body
-- consisting of a JSON-encoded Params structure and responds to
-- Get requests with either plain text or JSON, depending on the
-- Accept header.
type API =
ReqBody '[JSON] Params :> Post '[PlainText, JSON] Text
:<|>
ReqBody '[JSON] Params :> Post '[OctetStream] BS.ByteString
:<|>
"batch" :> ReqBody '[JSON] [Params] :> Post '[JSON] [Text]
:<|>
"babelmark" :> QueryParam' '[Required] "text" Text :> QueryParam "from" Text :> QueryParam "to" Text :> QueryFlag "standalone" :> Get '[JSON] Value
:<|>
"version" :> Get '[PlainText, JSON] Text
app :: Application
app = serve api server
api :: Proxy API
api = Proxy
server :: Server API
server = convert
:<|> convertBytes
:<|> mapM convert
:<|> babelmark -- for babelmark which expects {"html": "", "version": ""}
:<|> pure pandocVersion
where
babelmark text' from' to' standalone' = do
res <- convert def{ text = text',
from = from', to = to',
standalone = Just standalone' }
return $ toJSON $ object [ "html" .= res, "version" .= pandocVersion ]
-- We use runPure for the pandoc conversions, which ensures that
-- they will do no IO. This makes the server safe to use. However,
-- it will mean that features requiring IO, like RST includes, will not work.
-- Changing this to
-- handleErr =<< liftIO (runIO (convert' params))
-- will allow the IO operations.
convert params = handleErr $
runPure (convert' id (encodeBase64 . BL.toStrict) params)
convertBytes params = handleErr $
runPure (convert' UTF8.fromText BL.toStrict params)
convert' :: PandocMonad m
=> (Text -> a) -> (BL.ByteString -> a) -> Params -> m a
convert' textHandler bsHandler params = do
let readerFormat = fromMaybe "markdown" $ from params
let writerFormat = fromMaybe "html" $ to params
(readerSpec, readerExts) <- getReader readerFormat
(writerSpec, writerExts) <- getWriter writerFormat
let binaryOutput = case writerSpec of
ByteStringWriter{} -> True
_ -> False
let isStandalone = fromMaybe binaryOutput (standalone params)
let toformat = T.toLower $ T.takeWhile isAlphaNum $ writerFormat
hlStyle <- traverse (lookupHighlightingStyle . T.unpack)
$ highlightStyle params
mbTemplate <- if isStandalone
then case template params of
Nothing -> Just <$>
compileDefaultTemplate toformat
Just t -> Just <$>
compileCustomTemplate toformat t
else return Nothing
let readeropts = def{ readerExtensions = readerExts
, readerStandalone = isStandalone
, readerTabStop = fromMaybe 4 (tabStop params)
, readerIndentedCodeClasses = fromMaybe []
(indentedCodeClasses params)
, readerAbbreviations =
fromMaybe mempty (abbreviations params)
, readerDefaultImageExtension =
fromMaybe mempty (defaultImageExtension params)
, readerTrackChanges =
fromMaybe AcceptChanges (trackChanges params)
, readerStripComments =
fromMaybe False (stripComments params)
}
let writeropts =
def{ writerExtensions = writerExts
, writerTabStop = fromMaybe 4 (tabStop params)
, writerWrapText = fromMaybe WrapAuto (wrapText params)
, writerColumns = fromMaybe 72 (columns params)
, writerTemplate = mbTemplate
, writerSyntaxMap = defaultSyntaxMap
, writerVariables = fromMaybe mempty (variables params)
, writerTableOfContents = fromMaybe False (tableOfContents params)
, writerIncremental = fromMaybe False (incremental params)
, writerHTMLMathMethod =
fromMaybe PlainMath (htmlMathMethod params)
, writerNumberSections = fromMaybe False (numberSections params)
, writerNumberOffset = fromMaybe [] (numberOffset params)
, writerSectionDivs = fromMaybe False (sectionDivs params)
, writerReferenceLinks = fromMaybe False (referenceLinks params)
, writerDpi = fromMaybe 96 (dpi params)
, writerEmailObfuscation =
fromMaybe NoObfuscation (emailObfuscation params)
, writerIdentifierPrefix =
fromMaybe mempty (identifierPrefix params)
, writerCiteMethod = fromMaybe Citeproc (citeMethod params)
, writerHtmlQTags = fromMaybe False (htmlQTags params)
, writerSlideLevel = slideLevel params
, writerTopLevelDivision =
fromMaybe TopLevelDefault (topLevelDivision params)
, writerListings = fromMaybe False (listings params)
, writerHighlightStyle = hlStyle
, writerSetextHeaders = fromMaybe False (setextHeaders params)
, writerEpubSubdirectory =
fromMaybe "EPUB" (epubSubdirectory params)
, writerEpubMetadata = epubMetadata params
, writerEpubFonts = fromMaybe [] (epubFonts params)
, writerEpubChapterLevel = fromMaybe 1 (epubChapterLevel params)
, writerTOCDepth = fromMaybe 3 (tocDepth params)
, writerReferenceDoc = referenceDoc params
, writerReferenceLocation =
fromMaybe EndOfDocument (referenceLocation params)
, writerPreferAscii = fromMaybe False (preferAscii params)
}
let reader = case readerSpec of
TextReader r -> r readeropts
ByteStringReader r -> \t -> do
let eitherbs = decodeBase64 $ UTF8.fromText t
case eitherbs of
Left errt -> throwError $ PandocSomeError errt
Right bs -> r readeropts $ BL.fromStrict bs
let writer = case writerSpec of
TextWriter w -> fmap textHandler . w writeropts
ByteStringWriter w -> fmap bsHandler . w writeropts
reader (text params) >>=
(if citeproc params == Just True
then processCitations
else return) >>=
writer
handleErr (Right t) = return t
handleErr (Left err) = throwError $
err500 { errBody = TLE.encodeUtf8 $ TL.fromStrict $ renderError err }
compileCustomTemplate toformat t = do
res <- runWithPartials $ compileTemplate ("custom." <> T.unpack toformat) t
case res of
Left e -> throwError $ PandocTemplateError (T.pack e)
Right tpl -> return tpl

357
src/Text/Pandoc/Server.hs Normal file
View file

@ -0,0 +1,357 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Pandoc.Server
( app
, ServerOpts(..)
, Params(..)
, Blob(..)
, parseServerOpts
) where
import Data.Aeson
import Network.Wai
import Servant
import Text.DocTemplates as DocTemplates
import Text.Pandoc
import Text.Pandoc.Citeproc (processCitations)
import Text.Pandoc.Highlighting (lookupHighlightingStyle)
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import Data.Maybe (fromMaybe)
import Data.Char (isAlphaNum)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BL
import Data.ByteString.Base64 (decodeBase64, encodeBase64)
import Data.Default
import Control.Monad (when, foldM)
import qualified Data.Set as Set
import Skylighting (defaultSyntaxMap)
import qualified Data.Map as M
import System.Console.GetOpt
import System.Environment (getArgs, getProgName)
import qualified Control.Exception as E
import Text.Pandoc.Shared (safeStrRead, headerShift, filterIpynbOutput,
eastAsianLineBreakFilter, stripEmptyParagraphs)
import Text.Pandoc.App.Opt ( IpynbOutput (..), Opt(..), defaultOpts )
import Text.Pandoc.Filter (Filter(..))
import Text.Pandoc.Builder (setMeta)
import Text.Pandoc.SelfContained (makeSelfContained)
import System.Exit
data ServerOpts =
ServerOpts
{ serverPort :: Int
, serverTimeout :: Int }
deriving (Show)
defaultServerOpts :: ServerOpts
defaultServerOpts = ServerOpts { serverPort = 3030, serverTimeout = 2 }
cliOptions :: [OptDescr (ServerOpts -> IO ServerOpts)]
cliOptions =
[ Option ['p'] ["port"]
(ReqArg (\s opts -> case safeStrRead s of
Just i -> return opts{ serverPort = i }
Nothing ->
E.throwIO $ PandocOptionError $ T.pack
s <> " is not a number") "NUMBER")
"port number"
, Option ['t'] ["timeout"]
(ReqArg (\s opts -> case safeStrRead s of
Just i -> return opts{ serverTimeout = i }
Nothing ->
E.throwIO $ PandocOptionError $ T.pack
s <> " is not a number") "NUMBER")
"timeout (seconds)"
, Option ['h'] ["help"]
(NoArg (\_ -> do
prg <- getProgName
let header = "Usage: " <> prg <> " [OPTION...]"
putStrLn $ usageInfo header cliOptions
exitWith ExitSuccess))
"help message"
, Option ['v'] ["version"]
(NoArg (\_ -> do
prg <- getProgName
putStrLn $ prg <> " " <> T.unpack pandocVersion
exitWith ExitSuccess))
"version info"
]
parseServerOpts :: IO ServerOpts
parseServerOpts = do
args <- getArgs
let handleUnknownOpt x = "Unknown option: " <> x
case getOpt' Permute cliOptions args of
(os, ns, unrecognizedOpts, es) -> do
when (not (null es) || not (null unrecognizedOpts)) $
E.throwIO $ PandocOptionError $ T.pack $
concat es ++ unlines (map handleUnknownOpt unrecognizedOpts) ++
("Try --help for more information.")
when (not (null ns)) $
E.throwIO $ PandocOptionError $ T.pack $
"Unknown arguments: " <> unwords ns
foldM (flip ($)) defaultServerOpts os
newtype Blob = Blob BL.ByteString
deriving (Show, Eq)
instance ToJSON Blob where
toJSON (Blob bs) = toJSON (encodeBase64 $ BL.toStrict bs)
instance FromJSON Blob where
parseJSON = withText "Blob" $ \t -> do
let inp = UTF8.fromText t
case decodeBase64 inp of
Right bs -> return $ Blob $ BL.fromStrict bs
Left _ -> -- treat as regular text
return $ Blob $ BL.fromStrict inp
-- This is the data to be supplied by the JSON payload
-- of requests. Maybe values may be omitted and will be
-- given default values.
data Params = Params
{ options :: Opt
, text :: Text
, files :: Maybe (M.Map FilePath Blob)
} deriving (Show)
instance Default Params where
def = Params
{ options = defaultOpts
, text = mempty
, files = Nothing
}
-- Automatically derive code to convert to/from JSON.
instance FromJSON Params where
parseJSON = withObject "Params" $ \o ->
Params
<$> parseJSON (Object o)
<*> o .: "text"
<*> o .:? "files"
-- This is the API. The "/convert" endpoint takes a request body
-- consisting of a JSON-encoded Params structure and responds to
-- Get requests with either plain text or JSON, depending on the
-- Accept header.
type API =
ReqBody '[JSON] Params :> Post '[PlainText, JSON] Text
:<|>
ReqBody '[JSON] Params :> Post '[OctetStream] BS.ByteString
:<|>
"batch" :> ReqBody '[JSON] [Params] :> Post '[JSON] [Text]
:<|>
"babelmark" :> QueryParam' '[Required] "text" Text :> QueryParam "from" Text :> QueryParam "to" Text :> QueryFlag "standalone" :> Get '[JSON] Value
:<|>
"version" :> Get '[PlainText, JSON] Text
app :: Application
app = serve api server
api :: Proxy API
api = Proxy
server :: Server API
server = convert
:<|> convertBytes
:<|> mapM convert
:<|> babelmark -- for babelmark which expects {"html": "", "version": ""}
:<|> pure pandocVersion
where
babelmark text' from' to' standalone' = do
res <- convert def{ text = text',
options = defaultOpts{
optFrom = from',
optTo = to',
optStandalone = standalone' }
}
return $ toJSON $ object [ "html" .= res, "version" .= pandocVersion ]
-- We use runPure for the pandoc conversions, which ensures that
-- they will do no IO. This makes the server safe to use. However,
-- it will mean that features requiring IO, like RST includes, will not work.
-- Changing this to
-- handleErr =<< liftIO (runIO (convert' params))
-- will allow the IO operations.
convert params = handleErr $
runPure (convert' id (encodeBase64 . BL.toStrict) params)
convertBytes params = handleErr $
runPure (convert' UTF8.fromText BL.toStrict params)
convert' :: (Text -> a) -> (BL.ByteString -> a) -> Params -> PandocPure a
convert' textHandler bsHandler params = do
curtime <- getCurrentTime
-- put files params in ersatz file system
let addFile :: FilePath -> Blob -> FileTree -> FileTree
addFile fp (Blob lbs) =
insertInFileTree fp FileInfo{ infoFileMTime = curtime
, infoFileContents = BL.toStrict lbs }
case files params of
Nothing -> return ()
Just fs -> do
let filetree = M.foldrWithKey addFile mempty fs
modifyPureState $ \st -> st{ stFiles = filetree }
let opts = options params
let readerFormat = fromMaybe "markdown" $ optFrom opts
let writerFormat = fromMaybe "html" $ optTo opts
(readerSpec, readerExts) <- getReader readerFormat
(writerSpec, writerExts) <- getWriter writerFormat
let isStandalone = optStandalone opts
let toformat = T.toLower $ T.takeWhile isAlphaNum $ writerFormat
hlStyle <- traverse (lookupHighlightingStyle . T.unpack)
$ optHighlightStyle opts
mbTemplate <- if isStandalone
then case optTemplate opts of
Nothing -> Just <$>
compileDefaultTemplate toformat
Just t -> Just <$>
compileCustomTemplate toformat t
else return Nothing
abbrevs <- Set.fromList . filter (not . T.null) . T.lines . UTF8.toText <$>
case optAbbreviations opts of
Nothing -> readDataFile "abbreviations"
Just f -> readFileStrict f
let readeropts = def{ readerExtensions = readerExts
, readerStandalone = isStandalone
, readerTabStop = optTabStop opts
, readerIndentedCodeClasses =
optIndentedCodeClasses opts
, readerAbbreviations = abbrevs
, readerDefaultImageExtension =
optDefaultImageExtension opts
, readerTrackChanges = optTrackChanges opts
, readerStripComments = optStripComments opts
}
let writeropts =
def{ writerExtensions = writerExts
, writerTabStop = optTabStop opts
, writerWrapText = optWrap opts
, writerColumns = optColumns opts
, writerTemplate = mbTemplate
, writerSyntaxMap = defaultSyntaxMap
, writerVariables = optVariables opts
, writerTableOfContents = optTableOfContents opts
, writerIncremental = optIncremental opts
, writerHTMLMathMethod = optHTMLMathMethod opts
, writerNumberSections = optNumberSections opts
, writerNumberOffset = optNumberOffset opts
, writerSectionDivs = optSectionDivs opts
, writerReferenceLinks = optReferenceLinks opts
, writerDpi = optDpi opts
, writerEmailObfuscation = optEmailObfuscation opts
, writerIdentifierPrefix = optIdentifierPrefix opts
, writerCiteMethod = optCiteMethod opts
, writerHtmlQTags = optHtmlQTags opts
, writerSlideLevel = optSlideLevel opts
, writerTopLevelDivision = optTopLevelDivision opts
, writerListings = optListings opts
, writerHighlightStyle = hlStyle
, writerSetextHeaders = optSetextHeaders opts
, writerEpubSubdirectory = T.pack $ optEpubSubdirectory opts
, writerEpubMetadata = T.pack <$> optEpubMetadata opts
, writerEpubFonts = optEpubFonts opts
, writerEpubChapterLevel = optEpubChapterLevel opts
, writerTOCDepth = optTOCDepth opts
, writerReferenceDoc = optReferenceDoc opts
, writerReferenceLocation = optReferenceLocation opts
, writerPreferAscii = optAscii opts
}
let reader = case readerSpec of
TextReader r -> r readeropts
ByteStringReader r -> \t -> do
let eitherbs = decodeBase64 $ UTF8.fromText t
case eitherbs of
Left errt -> throwError $ PandocSomeError errt
Right bs -> r readeropts $ BL.fromStrict bs
let writer = case writerSpec of
TextWriter w ->
fmap textHandler .
(\d -> w writeropts d >>=
if optEmbedResources opts && htmlFormat (optTo opts)
then makeSelfContained
else return)
ByteStringWriter w -> fmap bsHandler . w writeropts
let transforms :: Pandoc -> Pandoc
transforms = (case optShiftHeadingLevelBy opts of
0 -> id
x -> headerShift x) .
(case optStripEmptyParagraphs opts of
True -> stripEmptyParagraphs
False -> id) .
(if extensionEnabled Ext_east_asian_line_breaks
readerExts &&
not (extensionEnabled Ext_east_asian_line_breaks
writerExts &&
optWrap opts == WrapPreserve)
then eastAsianLineBreakFilter
else id) .
(case optIpynbOutput opts of
IpynbOutputAll -> id
IpynbOutputNone -> filterIpynbOutput Nothing
IpynbOutputBest -> filterIpynbOutput (Just $
case optTo opts of
Just "latex" -> Format "latex"
Just "beamer" -> Format "latex"
Nothing -> Format "html"
Just f
| htmlFormat (optTo opts) -> Format "html"
| otherwise -> Format f))
let meta = (case optBibliography opts of
[] -> id
fs -> setMeta "bibliography" (MetaList
(map (MetaString . T.pack) fs))) .
maybe id (setMeta "csl" . MetaString . T.pack)
(optCSL opts) .
maybe id (setMeta "citation-abbreviations" . MetaString .
T.pack)
(optCitationAbbreviations opts) $
optMetadata opts
let addMetadata m' (Pandoc m bs) = Pandoc (m <> m') bs
let hasCiteprocFilter [] = False
hasCiteprocFilter (CiteprocFilter:_) = True
hasCiteprocFilter (_:xs) = hasCiteprocFilter xs
reader (text params) >>=
return . transforms . addMetadata meta >>=
(if hasCiteprocFilter (optFilters opts)
then processCitations
else return) >>=
writer
htmlFormat :: Maybe Text -> Bool
htmlFormat Nothing = True
htmlFormat (Just f) =
any (`T.isPrefixOf` f)
["html","html4","html5","s5","slidy", "slideous","dzslides","revealjs"]
handleErr (Right t) = return t
handleErr (Left err) = throwError $
err500 { errBody = TLE.encodeUtf8 $ TL.fromStrict $ renderError err }
compileCustomTemplate toformat t = do
res <- runWithPartials $ compileTemplate ("custom." <> T.unpack toformat)
(T.pack t)
case res of
Left e -> throwError $ PandocTemplateError (T.pack e)
Right tpl -> return tpl

View file

@ -1,6 +1,5 @@
flags:
pandoc:
server: false
embed_data_files: true
QuickCheck:
old-random: false