From f0abbe7533db3e2c14066bddbb5d52ade1ef0685 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 23 Mar 2017 21:24:01 +0100
Subject: [PATCH] Allow creation of pdf via groff ms and pdfroff.

    pandoc -t ms -o output.pdf input.txt
---
 MANUAL.txt                    | 14 ++++++++------
 pandoc.cabal                  |  9 +++++----
 src/Text/Pandoc/App.hs        | 12 ++++++++----
 src/Text/Pandoc/PDF.hs        | 33 ++++++++++++++++++++++++++++++++-
 src/Text/Pandoc/Writers/Ms.hs | 23 +++--------------------
 5 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/MANUAL.txt b/MANUAL.txt
index 520e8f5d5..b224ae761 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -21,11 +21,11 @@ write plain text, [Markdown], [CommonMark], [PHP Markdown Extra],
 [HTML5], [LaTeX] \(including [`beamer`] slide shows\), [ConTeXt], [RTF], [OPML],
 [DocBook], [OpenDocument], [ODT], [Word docx], [GNU Texinfo], [MediaWiki
 markup], [DokuWiki markup], [ZimWiki markup], [Haddock markup],
-[EPUB] \(v2 or v3\), [FictionBook2], [Textile], [groff man] pages,
+[EPUB] \(v2 or v3\), [FictionBook2], [Textile], [groff man], [groff ms],
 [Emacs Org mode], [AsciiDoc], [InDesign ICML], [TEI Simple], and [Slidy],
 [Slideous], [DZSlides], [reveal.js] or [S5] HTML slide shows. It can also
-produce [PDF] output on systems where LaTeX, ConTeXt, or `wkhtmltopdf` is
-installed.
+produce [PDF] output on systems where LaTeX, ConTeXt, `pdfroff`,
+or `wkhtmltopdf` is installed.
 
 Pandoc's enhanced version of Markdown includes syntax for [footnotes],
 [tables], flexible [ordered lists], [definition lists], [fenced code blocks],
@@ -83,6 +83,7 @@ Markdown can be expected to be lossy.
 [TWiki markup]: http://twiki.org/cgi-bin/view/TWiki/TextFormattingRules
 [Haddock markup]: https://www.haskell.org/haddock/doc/html/ch03s08.html
 [groff man]: http://man7.org/linux/man-pages/man7/groff_man.7.html
+[groff ms]: http://man7.org/linux/man-pages/man7/groff_ms.7.html
 [Haskell]: https://www.haskell.org
 [GNU Texinfo]: http://www.gnu.org/software/texinfo/
 [Emacs Org mode]: http://orgmode.org
@@ -201,9 +202,10 @@ if added to the template or included in any header file. The
 optionally be used for [citation rendering]. These are included
 with all recent versions of [TeX Live].
 
-Alternatively, pandoc can use ConTeXt or `wkhtmltopdf` to create a PDF.
-To do this, specify an output file with a `.pdf` extension,
-as before, but add `-t context` or `-t html5` to the command line.
+Alternatively, pandoc can use ConTeXt, `wkhtmltopdf`, or
+`pdfroff` to create a PDF.  To do this, specify an output file
+with a `.pdf` extension, as before, but add `-t context`, `-t
+html5`, or `-t ms` to the command line.
 
 PDF output can be controlled using [variables for LaTeX] (if
 LaTeX is used) and [variables for ConTeXt] (if ConTeXt is used).
diff --git a/pandoc.cabal b/pandoc.cabal
index b59befd75..b56f29b48 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -22,10 +22,11 @@ Description:     Pandoc is a Haskell library for converting from one markup
                  it can write Markdown, reStructuredText, XHTML, HTML 5,
                  LaTeX, ConTeXt, DocBook, OPML, TEI, OpenDocument, ODT,
                  Word docx, RTF, MediaWiki, DokuWiki, ZimWiki, Textile,
-                 groff man pages, plain text, Emacs Org-Mode, AsciiDoc,
-                 Haddock markup, EPUB (v2 and v3), FictionBook2, InDesign ICML,
-                 Muse, and several kinds of HTML/javascript slide shows (S5, Slidy,
-                 Slideous, DZSlides, reveal.js).
+                 groff man, groff ms, plain text, Emacs Org-Mode, AsciiDoc,
+                 Haddock markup, EPUB (v2 and v3), FictionBook2,
+                 InDesign ICML, Muse, and several kinds of
+                 HTML/javascript slide shows (S5, Slidy, Slideous,
+                 DZSlides, reveal.js).
                  .
                  In contrast to most existing tools for converting Markdown
                  to HTML, pandoc has a modular design: it consists of a set of
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index b9cd04631..29a8add3d 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -164,6 +164,7 @@ convertWithOpts opts = do
   let laTeXOutput = format `elem` ["latex", "beamer"]
   let conTeXtOutput = format == "context"
   let html5Output = format == "html5" || format == "html"
+  let msOutput = format == "ms"
 
   -- disabling the custom writer for now
   writer <- if ".lua" `isSuffixOf` format
@@ -398,15 +399,18 @@ convertWithOpts opts = do
       ByteStringWriter f -> f writerOptions doc' >>= writeFnBinary outputFile
       StringWriter f
         | pdfOutput -> do
-                -- make sure writer is latex or beamer or context or html5
-                unless (laTeXOutput || conTeXtOutput || html5Output) $
+                -- make sure writer is latex, beamer, context, html5 or ms
+                unless (laTeXOutput || conTeXtOutput || html5Output ||
+                        msOutput) $
                   err 47 $ "cannot produce pdf output with " ++ format ++
                            " writer"
 
                 let pdfprog = case () of
                                 _ | conTeXtOutput -> "context"
-                                _ | html5Output   -> "wkhtmltopdf"
-                                _ -> optLaTeXEngine opts
+                                  | html5Output   -> "wkhtmltopdf"
+                                  | html5Output   -> "wkhtmltopdf"
+                                  | msOutput      -> "pdfroff"
+                                  | otherwise     -> optLaTeXEngine opts
                 -- check for pdf creating program
                 mbPdfProg <- liftIO $ findExecutable pdfprog
                 when (isNothing mbPdfProg) $
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 43110abf1..f1274686d 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -74,7 +74,7 @@ changePathSeparators = intercalate "/" . splitDirectories
 
 makePDF :: MonadIO m
         => String              -- ^ pdf creator (pdflatex, lualatex,
-                               -- xelatex, context, wkhtmltopdf)
+                               -- xelatex, context, wkhtmltopdf, pdfroff)
         -> (WriterOptions -> Pandoc -> PandocIO String)  -- ^ writer
         -> WriterOptions       -- ^ options
         -> Verbosity           -- ^ verbosity level
@@ -106,6 +106,12 @@ makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = liftIO $ do
               setVerbosity verbosity
               writer opts doc
   html2pdf verbosity args source
+makePDF "pdfroff" writer opts verbosity _mediabag doc = liftIO $ do
+  source <- runIOorExplode $ do
+              setVerbosity verbosity
+              writer opts doc
+  let args   = ["-ms", "-e", "-k", "-i"]
+  ms2pdf verbosity args source
 makePDF program writer opts verbosity mediabag doc = do
   let withTemp = if takeBaseName program == "context"
                     then withTempDirectory "."
@@ -295,6 +301,31 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do
                    else return Nothing
          return (exit, out, pdf)
 
+ms2pdf :: Verbosity
+       -> [String]
+       -> String
+       -> IO (Either ByteString ByteString)
+ms2pdf verbosity args source = do
+  env' <- getEnvironment
+  when (verbosity >= INFO) $ do
+    putStrLn "[makePDF] Command line:"
+    putStrLn $ "pdfroff " ++ " " ++ unwords (map show args)
+    putStr "\n"
+    putStrLn "[makePDF] Environment:"
+    mapM_ print env'
+    putStr "\n"
+    putStrLn $ "[makePDF] Contents:\n"
+    putStr source
+    putStr "\n"
+  (exit, out) <- pipeProcess (Just env') "pdfroff" args
+                     (UTF8.fromStringLazy source)
+  when (verbosity >= INFO) $ do
+    B.hPutStr stdout out
+    putStr "\n"
+  return $ case exit of
+             ExitFailure _ -> Left out
+             ExitSuccess   -> Right out
+
 html2pdf  :: Verbosity    -- ^ Verbosity level
           -> [String]     -- ^ Args to wkhtmltopdf
           -> String       -- ^ HTML5 source
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index e326f19ab..4e6ae0951 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -37,28 +37,11 @@ TODO:
 [ ] tight/loose list distinction
 [ ] internal hyperlinks (this seems to be possible since
     they exist in the groff manual PDF version)
-[ ] better handling of accented characters and other non-ascii
-    characters (e.g. curly quotes).
-    Note:  recent versions of groff (more recent than standard
-    on many systems) include a -k option which runs preconv.
-    preconv basically converts non-ascii characters
-    to \[uXXXX] entities.  Since we can't assume that the local
-    groff has the -k option, we could have any invocation of
-    groff in Text.Pandoc.PDF filter the input through a Haskell
-    function that does what preconv does.
-    On the other hand:  only recent groffs have -Tpdf.  so
-    if we want compatibility with older groffs, we need to to
-    -Tps and pipe through ps2pdf (can we assume it's available?).
-    A big advantage of gropdf:  it supports the tag
-    \X'pdf: pdfpic file alignment width height line-length'
-    and also seems to support bookmarks.
-    See also the pdfroff shell script that comes with more
-    recent versions of groff.
-[ ] add via groff option to PDF module
-[ ] better handling of images, perhaps converting to eps when
-    going to PDF?
 [ ] better template, with configurable page number, table of contents,
     columns, etc.
+[ ] support for images? gropdf (and maybe pdfroff) supports the tag
+    \X'pdf: pdfpic file alignment width height line-length'
+    and also seems to support bookmarks.
 -}
 
 module Text.Pandoc.Writers.Ms ( writeMs ) where