T.P.App: extract submodule T.P.App.FormatHeuristics

Format guessing is used for input and output options and should be
shared.
This commit is contained in:
Albert Krewinkel 2018-11-07 21:29:48 +01:00
parent 62a5f6fa85
commit 12f6cf13ad
No known key found for this signature in database
GPG key ID: 388DC0B21F631124
3 changed files with 96 additions and 57 deletions

View file

@ -522,6 +522,7 @@ library
Text.Pandoc.BCP47,
Text.Pandoc.Class
other-modules: Text.Pandoc.App.CommandLineOptions,
Text.Pandoc.App.FormatHeuristics,
Text.Pandoc.App.Opt,
Text.Pandoc.App.OutputSettings,
Text.Pandoc.Filter.JSON,

View file

@ -62,6 +62,7 @@ import System.FilePath
import System.IO (nativeNewline, stdout)
import qualified System.IO as IO (Newline (..))
import Text.Pandoc
import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts)
import Text.Pandoc.App.CommandLineOptions (parseOptions, options)
import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings)
@ -337,63 +338,6 @@ readMetaValue s = case YAML.decodeStrict (UTF8.fromString s) of
-> MetaBool b
_ -> MetaString s
-- Determine default reader based on source file extensions.
formatFromFilePaths :: String -> [FilePath] -> String
formatFromFilePaths fallback [] = fallback
formatFromFilePaths fallback (x:xs) =
case formatFromFilePath x of
Just f -> f
Nothing -> formatFromFilePaths fallback xs
-- Determine format based on file extension
formatFromFilePath :: FilePath -> Maybe String
formatFromFilePath x =
case takeExtension (map toLower x) of
".adoc" -> Just "asciidoc"
".asciidoc" -> Just "asciidoc"
".context" -> Just "context"
".ctx" -> Just "context"
".db" -> Just "docbook"
".doc" -> Just "doc" -- so we get an "unknown reader" error
".docx" -> Just "docx"
".dokuwiki" -> Just "dokuwiki"
".epub" -> Just "epub"
".fb2" -> Just "fb2"
".htm" -> Just "html"
".html" -> Just "html"
".icml" -> Just "icml"
".json" -> Just "json"
".latex" -> Just "latex"
".lhs" -> Just "markdown+lhs"
".ltx" -> Just "latex"
".markdown" -> Just "markdown"
".md" -> Just "markdown"
".ms" -> Just "ms"
".muse" -> Just "muse"
".native" -> Just "native"
".odt" -> Just "odt"
".opml" -> Just "opml"
".org" -> Just "org"
".pdf" -> Just "pdf" -- so we get an "unknown reader" error
".pptx" -> Just "pptx"
".roff" -> Just "ms"
".rst" -> Just "rst"
".rtf" -> Just "rtf"
".s5" -> Just "s5"
".t2t" -> Just "t2t"
".tei" -> Just "tei"
".tei.xml" -> Just "tei"
".tex" -> Just "latex"
".texi" -> Just "texinfo"
".texinfo" -> Just "texinfo"
".text" -> Just "markdown"
".textile" -> Just "textile"
".txt" -> Just "markdown"
".wiki" -> Just "mediawiki"
".xhtml" -> Just "html"
['.',y] | y `elem` ['1'..'9'] -> Just "man"
_ -> Nothing
-- Transformations of a Pandoc document post-parsing:
applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc

View file

@ -0,0 +1,94 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-
Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.App.FormatHeuristics
Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley@edu>
Stability : alpha
Portability : portable
Guess the format of a file from its name.
-}
module Text.Pandoc.App.FormatHeuristics
( formatFromFilePaths
) where
import Prelude
import Data.Char (toLower)
import System.FilePath (takeExtension)
-- Determine default reader based on source file extensions.
formatFromFilePaths :: String -> [FilePath] -> String
formatFromFilePaths fallback [] = fallback
formatFromFilePaths fallback (x:xs) =
case formatFromFilePath x of
Just f -> f
Nothing -> formatFromFilePaths fallback xs
-- Determine format based on file extension
formatFromFilePath :: FilePath -> Maybe String
formatFromFilePath x =
case takeExtension (map toLower x) of
".adoc" -> Just "asciidoc"
".asciidoc" -> Just "asciidoc"
".context" -> Just "context"
".ctx" -> Just "context"
".db" -> Just "docbook"
".doc" -> Just "doc" -- so we get an "unknown reader" error
".docx" -> Just "docx"
".dokuwiki" -> Just "dokuwiki"
".epub" -> Just "epub"
".fb2" -> Just "fb2"
".htm" -> Just "html"
".html" -> Just "html"
".icml" -> Just "icml"
".json" -> Just "json"
".latex" -> Just "latex"
".lhs" -> Just "markdown+lhs"
".ltx" -> Just "latex"
".markdown" -> Just "markdown"
".md" -> Just "markdown"
".ms" -> Just "ms"
".muse" -> Just "muse"
".native" -> Just "native"
".odt" -> Just "odt"
".opml" -> Just "opml"
".org" -> Just "org"
".pdf" -> Just "pdf" -- so we get an "unknown reader" error
".pptx" -> Just "pptx"
".roff" -> Just "ms"
".rst" -> Just "rst"
".rtf" -> Just "rtf"
".s5" -> Just "s5"
".t2t" -> Just "t2t"
".tei" -> Just "tei"
".tei.xml" -> Just "tei"
".tex" -> Just "latex"
".texi" -> Just "texinfo"
".texinfo" -> Just "texinfo"
".text" -> Just "markdown"
".textile" -> Just "textile"
".txt" -> Just "markdown"
".wiki" -> Just "mediawiki"
".xhtml" -> Just "html"
['.',y] | y `elem` ['1'..'9'] -> Just "man"
_ -> Nothing