Shared: add filterIpynbOutput. [API change]

Add command line option `--ipynb-output=all|none|best`.

Closes #5339.
This commit is contained in:
John MacFarlane 2019-02-28 20:28:16 -08:00
parent 7aeabd8430
commit 77faccb505
5 changed files with 71 additions and 10 deletions

View file

@ -1192,6 +1192,15 @@ Options affecting specific writers {.options}
the EPUB-specific contents. The default is `EPUB`. To put
the EPUB contents in the top level, use an empty string.
`--ipynb-output=all|none|best`
: Determines how ipynb output cells are treated. `all` means
that all of the data formats included in the original are
preserved. `none` means that the contents of data cells
are omitted. `best` causes pandoc to try to pick the
richest data block in each output cell that is compatible
with the output format. The default is `best`.
`--pdf-engine=`*PROGRAM*
: Use the specified engine when producing PDF output.

View file

@ -73,7 +73,7 @@ import Text.Pandoc.PDF (makePDF)
import Text.Pandoc.Readers.Markdown (yamlToMeta)
import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
headerShift, isURI, tabFilter, uriPathToPath)
headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput)
import qualified Text.Pandoc.UTF8 as UTF8
#ifndef _WINDOWS
import System.Posix.IO (stdOutput)
@ -247,8 +247,20 @@ convertWithOpts opts = do
(writerExtensions writerOptions) &&
writerWrapText writerOptions == WrapPreserve)
then (eastAsianLineBreakFilter :)
else id) $
[]
else id) .
(case optIpynbOutput opts of
"all" -> id
"none" -> (filterIpynbOutput Nothing :)
"best" -> (filterIpynbOutput (Just $
if htmlFormat writerName
then Format "html"
else
case writerName of
"latex" -> Format "latex"
"beamer" -> Format "latex"
_ -> Format writerName) :)
_ -> id) -- should not happen
$ []
let sourceToDoc :: [FilePath] -> PandocIO Pandoc
sourceToDoc sources' =
@ -293,15 +305,12 @@ convertWithOpts opts = do
TL.unpack (TE.decodeUtf8With TE.lenientDecode err')
Nothing -> do
let htmlFormat = format `elem`
["html","html4","html5","s5","slidy",
"slideous","dzslides","revealjs"]
addNl = if standalone
let addNl = if standalone
then id
else (<> T.singleton '\n')
output <- addNl <$> f writerOptions doc
writerFn eol outputFile =<<
if optSelfContained opts && htmlFormat
if optSelfContained opts && htmlFormat writerName
-- TODO not maximally efficient; change type
-- of makeSelfContained so it works w/ Text
then T.pack <$> makeSelfContained (T.unpack output)
@ -309,6 +318,10 @@ convertWithOpts opts = do
type Transform = Pandoc -> Pandoc
htmlFormat :: String -> Bool
htmlFormat = (`elem` ["html","html4","html5","s5","slidy",
"slideous","dzslides","revealjs"])
isTextFormat :: String -> Bool
isTextFormat s = s `notElem` ["odt","docx","epub2","epub3","epub","pptx"]

View file

@ -643,7 +643,17 @@ options =
"NUMBER")
"" -- "Header level at which to split chapters in EPUB"
, Option "" ["pdf-engine"]
, Option "" ["ipynb-output"]
(ReqArg
(\arg opt ->
if arg `notElem` ["all","none","best"]
then E.throwIO $ PandocOptionError $
"ipynb-output must be all, none, or best"
else return opt { optIpynbOutput = arg })
"all|none|best")
"" -- "Starting number for sections, subsections, etc."
, Option "" ["pdf-engine"]
(ReqArg
(\arg opt -> do
let b = takeBaseName arg

View file

@ -123,6 +123,7 @@ data Opt = Opt
, optFileScope :: Bool -- ^ Parse input files before combining
, optTitlePrefix :: Maybe String -- ^ Prefix for title
, optCss :: [FilePath] -- ^ CSS files to link to
, optIpynbOutput :: String -- ^ Maybe f to use best data; Nothing to omit
, optIncludeBeforeBody :: [FilePath] -- ^ Files to include before
, optIncludeAfterBody :: [FilePath] -- ^ Files to include after body
, optIncludeInHeader :: [FilePath] -- ^ Files to include in header
@ -196,6 +197,7 @@ defaultOpts = Opt
, optFileScope = False
, optTitlePrefix = Nothing
, optCss = []
, optIpynbOutput = "best"
, optIncludeBeforeBody = []
, optIncludeAfterBody = []
, optIncludeInHeader = []

View file

@ -86,6 +86,7 @@ module Text.Pandoc.Shared (
eastAsianLineBreakFilter,
underlineSpan,
splitSentences,
filterIpynbOutput,
-- * TagSoup HTML handling
renderTags',
-- * File handling
@ -122,12 +123,13 @@ import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum,
generalCategory, GeneralCategory(NonSpacingMark,
SpacingCombiningMark, EnclosingMark, ConnectorPunctuation))
import Data.Data (Data, Typeable)
import Data.List (find, intercalate, intersperse, stripPrefix)
import Data.List (find, intercalate, intersperse, stripPrefix, sortBy)
import qualified Data.Map as M
import Data.Maybe (mapMaybe)
import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr)
import qualified Data.Set as Set
import qualified Data.Text as T
import Data.Ord (comparing)
import Data.Version (showVersion)
import Network.URI (URI (uriScheme), escapeURIString, parseURI)
import Paths_pandoc (version)
@ -689,6 +691,31 @@ splitSentences xs =
let (sent, rest) = breakSentence xs
in if null rest then [sent] else sent : splitSentences rest
-- | Process ipynb output cells. If mode is Nothing,
-- remove all output. If mode is Just format, select
-- best output for the format.
filterIpynbOutput :: Maybe Format -> Pandoc -> Pandoc
filterIpynbOutput mode = walk go
where go (Div (ident, ("output":os), kvs) bs) =
case mode of
Nothing -> Div (ident, ("output":os), kvs) []
Just fmt -> Div (ident, ("output":os), kvs) $
take 1 $ sortBy (comparing rank) bs
where
rank (RawBlock (Format "html") _)
| fmt == Format "html" = (1 :: Int)
| fmt == Format "markdown" = 2
| otherwise = 3
rank (RawBlock (Format "latex") _)
| fmt == Format "latex" = 1
| fmt == Format "markdown" = 2
| otherwise = 3
rank (RawBlock f _)
| fmt == f = 1
| otherwise = 3
rank _ = 2
go x = x
--
-- TagSoup HTML handling
--