Shared: add filterIpynbOutput. [API change]

Add command line option `--ipynb-output=all|none|best`. Closes #5339.
2019-02-28 20:28:16 -08:00 · 2019-02-28 20:28:16 -08:00 · 77faccb505
commit 77faccb505
parent 7aeabd8430
5 changed files with 71 additions and 10 deletions
--- a/MANUAL.txt
+++ b/MANUAL.txt
@ -1192,6 +1192,15 @@ Options affecting specific writers {.options}
    the EPUB-specific contents.  The default is `EPUB`.  To put
    the EPUB contents in the top level, use an empty string.

+`--ipynb-output=all|none|best`
+
+:   Determines how ipynb output cells are treated. `all` means
+    that all of the data formats included in the original are
+    preserved.  `none` means that the contents of data cells
+    are omitted.  `best` causes pandoc to try to pick the
+    richest data block in each output cell that is compatible
+    with the output format.  The default is `best`.
+
 `--pdf-engine=`*PROGRAM*

 :   Use the specified engine when producing PDF output.
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@ -73,7 +73,7 @@ import Text.Pandoc.PDF (makePDF)
 import Text.Pandoc.Readers.Markdown (yamlToMeta)
 import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
 import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
-         headerShift, isURI, tabFilter, uriPathToPath)
+         headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput)
 import qualified Text.Pandoc.UTF8 as UTF8
 #ifndef _WINDOWS
 import System.Posix.IO (stdOutput)
@ -247,8 +247,20 @@ convertWithOpts opts = do
                              (writerExtensions writerOptions) &&
                              writerWrapText writerOptions == WrapPreserve)
                         then (eastAsianLineBreakFilter :)
-                         else id) $
-                     []
+                         else id) .
+                     (case optIpynbOutput opts of
+                       "all"    -> id
+                       "none"   -> (filterIpynbOutput Nothing :)
+                       "best"   -> (filterIpynbOutput (Just $
+                                     if htmlFormat writerName
+                                        then Format "html"
+                                        else
+                                          case writerName of
+                                            "latex"  -> Format "latex"
+                                            "beamer" -> Format "latex"
+                                            _        -> Format writerName) :)
+                       _  -> id)  -- should not happen
+                     $ []

    let sourceToDoc :: [FilePath] -> PandocIO Pandoc
        sourceToDoc sources' =
@ -293,15 +305,12 @@ convertWithOpts opts = do
                                     TL.unpack (TE.decodeUtf8With TE.lenientDecode err')

        Nothing -> do
-                let htmlFormat = format `elem`
-                      ["html","html4","html5","s5","slidy",
-                       "slideous","dzslides","revealjs"]
-                    addNl = if standalone
+                let addNl = if standalone
                               then id
                               else (<> T.singleton '\n')
                output <- addNl <$> f writerOptions doc
                writerFn eol outputFile =<<
-                  if optSelfContained opts && htmlFormat
+                  if optSelfContained opts && htmlFormat writerName
                     -- TODO not maximally efficient; change type
                     -- of makeSelfContained so it works w/ Text
                     then T.pack <$> makeSelfContained (T.unpack output)
@ -309,6 +318,10 @@ convertWithOpts opts = do

 type Transform = Pandoc -> Pandoc

+htmlFormat :: String -> Bool
+htmlFormat = (`elem` ["html","html4","html5","s5","slidy",
+                      "slideous","dzslides","revealjs"])
+
 isTextFormat :: String -> Bool
 isTextFormat s = s `notElem` ["odt","docx","epub2","epub3","epub","pptx"]

--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@ -643,7 +643,17 @@ options =
                 "NUMBER")
                 "" -- "Header level at which to split chapters in EPUB"

-    , Option "" ["pdf-engine"]
+    , Option "" ["ipynb-output"]
+                 (ReqArg
+                  (\arg opt ->
+                    if arg `notElem` ["all","none","best"]
+                       then E.throwIO $ PandocOptionError $
+                             "ipynb-output must be all, none, or best"
+                       else return opt { optIpynbOutput = arg })
+                 "all|none|best")
+                 "" -- "Starting number for sections, subsections, etc."
+
+     , Option "" ["pdf-engine"]
                 (ReqArg
                  (\arg opt -> do
                     let b = takeBaseName arg
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@ -123,6 +123,7 @@ data Opt = Opt
    , optFileScope             :: Bool         -- ^ Parse input files before combining
    , optTitlePrefix           :: Maybe String     -- ^ Prefix for title
    , optCss                   :: [FilePath]       -- ^ CSS files to link to
+    , optIpynbOutput           :: String           -- ^ Maybe f to use best data; Nothing to omit
    , optIncludeBeforeBody     :: [FilePath]       -- ^ Files to include before
    , optIncludeAfterBody      :: [FilePath]       -- ^ Files to include after body
    , optIncludeInHeader       :: [FilePath]       -- ^ Files to include in header
@ -196,6 +197,7 @@ defaultOpts = Opt
    , optFileScope             = False
    , optTitlePrefix           = Nothing
    , optCss                   = []
+    , optIpynbOutput           = "best"
    , optIncludeBeforeBody     = []
    , optIncludeAfterBody      = []
    , optIncludeInHeader       = []
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@ -86,6 +86,7 @@ module Text.Pandoc.Shared (
                     eastAsianLineBreakFilter,
                     underlineSpan,
                     splitSentences,
+                     filterIpynbOutput,
                     -- * TagSoup HTML handling
                     renderTags',
                     -- * File handling
@ -122,12 +123,13 @@ import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum,
                  generalCategory, GeneralCategory(NonSpacingMark,
                  SpacingCombiningMark, EnclosingMark, ConnectorPunctuation))
 import Data.Data (Data, Typeable)
-import Data.List (find, intercalate, intersperse, stripPrefix)
+import Data.List (find, intercalate, intersperse, stripPrefix, sortBy)
 import qualified Data.Map as M
 import Data.Maybe (mapMaybe)
 import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr)
 import qualified Data.Set as Set
 import qualified Data.Text as T
+import Data.Ord (comparing)
 import Data.Version (showVersion)
 import Network.URI (URI (uriScheme), escapeURIString, parseURI)
 import Paths_pandoc (version)
@ -689,6 +691,31 @@ splitSentences xs =
  let (sent, rest) = breakSentence xs
  in  if null rest then [sent] else sent : splitSentences rest

+-- | Process ipynb output cells.  If mode is Nothing,
+-- remove all output.  If mode is Just format, select
+-- best output for the format.
+filterIpynbOutput :: Maybe Format -> Pandoc -> Pandoc
+filterIpynbOutput mode = walk go
+  where go (Div (ident, ("output":os), kvs) bs) =
+          case mode of
+            Nothing  -> Div (ident, ("output":os), kvs) []
+            Just fmt -> Div (ident, ("output":os), kvs) $
+              take 1 $ sortBy (comparing rank) bs
+                where
+                  rank (RawBlock (Format "html") _)
+                    | fmt == Format "html" = (1 :: Int)
+                    | fmt == Format "markdown" = 2
+                    | otherwise = 3
+                  rank (RawBlock (Format "latex") _)
+                    | fmt == Format "latex" = 1
+                    | fmt == Format "markdown" = 2
+                    | otherwise = 3
+                  rank (RawBlock f _)
+                    | fmt == f = 1
+                    | otherwise = 3
+                  rank _ = 2
+        go x = x
+
 --
 -- TagSoup HTML handling
 --