2017-10-27 20:28:29 -07:00
|
|
|
|
{-# LANGUAGE CPP #-}
|
|
|
|
|
{-# LANGUAGE DeriveDataTypeable #-}
|
|
|
|
|
{-# LANGUAGE FlexibleContexts #-}
|
|
|
|
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
2017-10-27 23:13:55 -07:00
|
|
|
|
|
2017-10-27 20:28:29 -07:00
|
|
|
|
{-# LANGUAGE ScopedTypeVariables #-}
|
|
|
|
|
{-# LANGUAGE ViewPatterns #-}
|
2007-11-03 23:27:58 +00:00
|
|
|
|
{-
|
2017-05-13 23:30:13 +02:00
|
|
|
|
Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
|
-}
|
|
|
|
|
|
|
|
|
|
{- |
|
|
|
|
|
Module : Text.Pandoc.Shared
|
2017-05-13 23:30:13 +02:00
|
|
|
|
Copyright : Copyright (C) 2006-2017 John MacFarlane
|
2012-07-26 22:32:53 -07:00
|
|
|
|
License : GNU GPL, version 2 or above
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
|
|
|
Stability : alpha
|
|
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
|
|
Utility functions and definitions used by the various Pandoc modules.
|
|
|
|
|
-}
|
2010-03-23 15:05:33 -07:00
|
|
|
|
module Text.Pandoc.Shared (
|
2007-11-03 23:27:58 +00:00
|
|
|
|
-- * List processing
|
|
|
|
|
splitBy,
|
|
|
|
|
splitByIndices,
|
2012-01-27 00:37:46 -08:00
|
|
|
|
splitStringByIndices,
|
2007-11-03 23:27:58 +00:00
|
|
|
|
substitute,
|
2014-06-03 11:00:54 -07:00
|
|
|
|
ordNub,
|
2007-11-03 23:27:58 +00:00
|
|
|
|
-- * Text processing
|
|
|
|
|
backslashEscapes,
|
|
|
|
|
escapeStringUsing,
|
|
|
|
|
stripTrailingNewlines,
|
2012-09-29 17:09:34 -04:00
|
|
|
|
trim,
|
|
|
|
|
triml,
|
|
|
|
|
trimr,
|
2007-11-03 23:27:58 +00:00
|
|
|
|
stripFirstAndLast,
|
|
|
|
|
camelCaseToHyphenated,
|
|
|
|
|
toRomanNumeral,
|
2010-03-23 15:05:33 -07:00
|
|
|
|
escapeURI,
|
2010-07-06 23:17:06 -07:00
|
|
|
|
tabFilter,
|
2017-06-20 21:52:13 +02:00
|
|
|
|
crFilter,
|
2012-01-28 15:54:05 -08:00
|
|
|
|
-- * Date/time
|
|
|
|
|
normalizeDate,
|
2007-11-03 23:27:58 +00:00
|
|
|
|
-- * Pandoc block and inline list processing
|
|
|
|
|
orderedListMarkers,
|
2014-06-16 20:45:54 +01:00
|
|
|
|
extractSpaces,
|
2014-07-13 10:13:22 -07:00
|
|
|
|
removeFormatting,
|
2017-01-15 22:34:14 +01:00
|
|
|
|
deNote,
|
2010-11-27 07:08:06 -08:00
|
|
|
|
stringify,
|
2014-08-03 16:48:55 +04:00
|
|
|
|
capitalize,
|
2017-01-27 21:36:45 +01:00
|
|
|
|
compactify,
|
|
|
|
|
compactifyDL,
|
2016-10-13 08:46:38 +02:00
|
|
|
|
linesToPara,
|
2007-11-03 23:27:58 +00:00
|
|
|
|
Element (..),
|
|
|
|
|
hierarchicalize,
|
2010-03-16 06:45:52 +00:00
|
|
|
|
uniqueIdent,
|
2016-10-12 17:42:30 +02:00
|
|
|
|
inlineListToIdentifier,
|
2007-11-03 23:27:58 +00:00
|
|
|
|
isHeaderBlock,
|
2010-07-11 20:03:55 -07:00
|
|
|
|
headerShift,
|
2013-01-07 20:12:05 -08:00
|
|
|
|
isTightList,
|
2013-05-10 22:53:35 -07:00
|
|
|
|
addMetaField,
|
|
|
|
|
makeMeta,
|
2017-05-30 10:22:48 +02:00
|
|
|
|
eastAsianLineBreakFilter,
|
2017-10-27 18:45:00 -04:00
|
|
|
|
underlineSpan,
|
2012-08-15 09:42:16 -07:00
|
|
|
|
-- * TagSoup HTML handling
|
|
|
|
|
renderTags',
|
2008-07-31 23:16:02 +00:00
|
|
|
|
-- * File handling
|
2009-12-31 01:11:23 +00:00
|
|
|
|
inDirectory,
|
2014-08-08 20:10:58 +01:00
|
|
|
|
collapseFilePath,
|
2016-10-12 17:42:30 +02:00
|
|
|
|
filteredFilesFromArchive,
|
2017-05-23 09:48:11 +02:00
|
|
|
|
-- * URI handling
|
|
|
|
|
schemes,
|
|
|
|
|
isURI,
|
2012-01-29 23:54:00 -08:00
|
|
|
|
-- * Error handling
|
2015-02-18 21:05:47 +00:00
|
|
|
|
mapLeft,
|
2016-06-22 13:04:25 -04:00
|
|
|
|
-- * for squashing blocks
|
|
|
|
|
blocksToInlines,
|
2012-08-09 07:52:39 -07:00
|
|
|
|
-- * Safe read
|
2014-07-30 12:29:04 -07:00
|
|
|
|
safeRead,
|
|
|
|
|
-- * Temp directory
|
2015-09-25 03:54:41 +08:00
|
|
|
|
withTempDir,
|
|
|
|
|
-- * Version
|
|
|
|
|
pandocVersion
|
2007-11-03 23:27:58 +00:00
|
|
|
|
) where
|
|
|
|
|
|
2017-10-27 20:28:29 -07:00
|
|
|
|
import Codec.Archive.Zip
|
|
|
|
|
import qualified Control.Exception as E
|
|
|
|
|
import Control.Monad (MonadPlus (..), msum, unless)
|
|
|
|
|
import qualified Control.Monad.State.Strict as S
|
|
|
|
|
import qualified Data.ByteString.Lazy as BL
|
|
|
|
|
import Data.Char (isAlpha, isDigit, isLetter, isLower, isSpace, isUpper,
|
|
|
|
|
toLower)
|
2017-10-27 21:44:22 -07:00
|
|
|
|
import Data.Data (Data, Typeable)
|
2017-10-27 20:28:29 -07:00
|
|
|
|
import Data.List (find, intercalate, stripPrefix)
|
2013-05-10 22:53:35 -07:00
|
|
|
|
import qualified Data.Map as M
|
2017-10-27 20:28:29 -07:00
|
|
|
|
import Data.Maybe (mapMaybe)
|
|
|
|
|
import Data.Monoid ((<>))
|
|
|
|
|
import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr)
|
2014-06-03 11:00:54 -07:00
|
|
|
|
import qualified Data.Set as Set
|
2017-10-27 20:28:29 -07:00
|
|
|
|
import qualified Data.Text as T
|
|
|
|
|
import Data.Version (showVersion)
|
|
|
|
|
import Network.URI (URI (uriScheme), escapeURIString, parseURI)
|
|
|
|
|
import Paths_pandoc (version)
|
2008-07-31 23:16:02 +00:00
|
|
|
|
import System.Directory
|
2017-10-27 20:28:29 -07:00
|
|
|
|
import System.FilePath (isPathSeparator, splitDirectories)
|
2015-09-26 22:40:58 -07:00
|
|
|
|
import qualified System.FilePath.Posix as Posix
|
2014-07-30 12:29:04 -07:00
|
|
|
|
import System.IO.Temp
|
2017-10-27 20:28:29 -07:00
|
|
|
|
import Text.HTML.TagSoup (RenderOptions (..), Tag (..), renderOptions,
|
|
|
|
|
renderTagsOptions)
|
|
|
|
|
import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue (..))
|
|
|
|
|
import qualified Text.Pandoc.Builder as B
|
|
|
|
|
import Text.Pandoc.Compat.Time
|
|
|
|
|
import Text.Pandoc.Definition
|
|
|
|
|
import Text.Pandoc.Generic (bottomUp)
|
|
|
|
|
import Text.Pandoc.Pretty (charWidth)
|
|
|
|
|
import Text.Pandoc.Walk
|
2015-07-30 22:39:25 +01:00
|
|
|
|
|
2015-09-25 03:54:41 +08:00
|
|
|
|
-- | Version number of pandoc library.
|
|
|
|
|
pandocVersion :: String
|
|
|
|
|
pandocVersion = showVersion version
|
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
|
--
|
|
|
|
|
-- List processing
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
-- | Split list by groups of one or more sep.
|
2010-12-21 08:41:24 -08:00
|
|
|
|
splitBy :: (a -> Bool) -> [a] -> [[a]]
|
2007-11-03 23:27:58 +00:00
|
|
|
|
splitBy _ [] = []
|
2010-12-21 08:41:24 -08:00
|
|
|
|
splitBy isSep lst =
|
|
|
|
|
let (first, rest) = break isSep lst
|
|
|
|
|
rest' = dropWhile isSep rest
|
2017-10-27 23:13:55 -07:00
|
|
|
|
in first:splitBy isSep rest'
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
splitByIndices :: [Int] -> [a] -> [[a]]
|
|
|
|
|
splitByIndices [] lst = [lst]
|
2017-10-27 23:13:55 -07:00
|
|
|
|
splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x) xs) rest
|
2012-01-27 00:37:46 -08:00
|
|
|
|
where (first, rest) = splitAt x lst
|
|
|
|
|
|
|
|
|
|
-- | Split string into chunks divided at specified indices.
|
|
|
|
|
splitStringByIndices :: [Int] -> [Char] -> [[Char]]
|
|
|
|
|
splitStringByIndices [] lst = [lst]
|
|
|
|
|
splitStringByIndices (x:xs) lst =
|
|
|
|
|
let (first, rest) = splitAt' x lst in
|
2017-10-27 23:13:55 -07:00
|
|
|
|
first : splitStringByIndices (map (\y -> y - x) xs) rest
|
2012-01-27 00:37:46 -08:00
|
|
|
|
|
|
|
|
|
splitAt' :: Int -> [Char] -> ([Char],[Char])
|
|
|
|
|
splitAt' _ [] = ([],[])
|
|
|
|
|
splitAt' n xs | n <= 0 = ([],xs)
|
|
|
|
|
splitAt' n (x:xs) = (x:ys,zs)
|
|
|
|
|
where (ys,zs) = splitAt' (n - charWidth x) xs
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | Replace each occurrence of one sublist in a list with another.
|
|
|
|
|
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
|
|
|
|
|
substitute _ _ [] = []
|
2010-07-11 12:22:18 -07:00
|
|
|
|
substitute [] _ xs = xs
|
|
|
|
|
substitute target replacement lst@(x:xs) =
|
2014-08-03 14:44:39 +04:00
|
|
|
|
case stripPrefix target lst of
|
|
|
|
|
Just lst' -> replacement ++ substitute target replacement lst'
|
|
|
|
|
Nothing -> x : substitute target replacement xs
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
2014-06-03 11:00:54 -07:00
|
|
|
|
ordNub :: (Ord a) => [a] -> [a]
|
|
|
|
|
ordNub l = go Set.empty l
|
|
|
|
|
where
|
|
|
|
|
go _ [] = []
|
|
|
|
|
go s (x:xs) = if x `Set.member` s then go s xs
|
|
|
|
|
else x : go (Set.insert x s) xs
|
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
|
--
|
|
|
|
|
-- Text processing
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
-- | Returns an association list of backslash escapes for the
|
|
|
|
|
-- designated characters.
|
|
|
|
|
backslashEscapes :: [Char] -- ^ list of special characters to escape
|
|
|
|
|
-> [(Char, String)]
|
|
|
|
|
backslashEscapes = map (\ch -> (ch, ['\\',ch]))
|
|
|
|
|
|
|
|
|
|
-- | Escape a string of characters, using an association list of
|
|
|
|
|
-- characters and strings.
|
|
|
|
|
escapeStringUsing :: [(Char, String)] -> String -> String
|
|
|
|
|
escapeStringUsing _ [] = ""
|
2012-07-26 22:32:53 -07:00
|
|
|
|
escapeStringUsing escapeTable (x:xs) =
|
2017-10-27 23:13:55 -07:00
|
|
|
|
case lookup x escapeTable of
|
2017-10-27 20:28:29 -07:00
|
|
|
|
Just str -> str ++ rest
|
|
|
|
|
Nothing -> x:rest
|
2007-11-03 23:27:58 +00:00
|
|
|
|
where rest = escapeStringUsing escapeTable xs
|
|
|
|
|
|
|
|
|
|
-- | Strip trailing newlines from string.
|
|
|
|
|
stripTrailingNewlines :: String -> String
|
|
|
|
|
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
|
|
|
|
|
|
|
|
|
|
-- | Remove leading and trailing space (including newlines) from string.
|
2012-09-29 17:09:34 -04:00
|
|
|
|
trim :: String -> String
|
|
|
|
|
trim = triml . trimr
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | Remove leading space (including newlines) from string.
|
2012-09-29 17:09:34 -04:00
|
|
|
|
triml :: String -> String
|
2015-11-09 10:08:22 -08:00
|
|
|
|
triml = dropWhile (`elem` " \r\n\t")
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | Remove trailing space (including newlines) from string.
|
2012-09-29 17:09:34 -04:00
|
|
|
|
trimr :: String -> String
|
|
|
|
|
trimr = reverse . triml . reverse
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | Strip leading and trailing characters from string
|
|
|
|
|
stripFirstAndLast :: String -> String
|
|
|
|
|
stripFirstAndLast str =
|
2017-10-27 23:13:55 -07:00
|
|
|
|
drop 1 $ take (length str - 1) str
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
2012-07-26 22:32:53 -07:00
|
|
|
|
-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
|
2007-11-03 23:27:58 +00:00
|
|
|
|
camelCaseToHyphenated :: String -> String
|
|
|
|
|
camelCaseToHyphenated [] = ""
|
|
|
|
|
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
|
2017-10-27 23:13:55 -07:00
|
|
|
|
a:'-':toLower b:camelCaseToHyphenated rest
|
|
|
|
|
camelCaseToHyphenated (a:rest) = toLower a:camelCaseToHyphenated rest
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | Convert number < 4000 to uppercase roman numeral.
|
|
|
|
|
toRomanNumeral :: Int -> String
|
2017-02-15 02:00:23 +04:00
|
|
|
|
toRomanNumeral x
|
|
|
|
|
| x >= 4000 || x < 0 = "?"
|
|
|
|
|
| x >= 1000 = "M" ++ toRomanNumeral (x - 1000)
|
|
|
|
|
| x >= 900 = "CM" ++ toRomanNumeral (x - 900)
|
|
|
|
|
| x >= 500 = "D" ++ toRomanNumeral (x - 500)
|
|
|
|
|
| x >= 400 = "CD" ++ toRomanNumeral (x - 400)
|
|
|
|
|
| x >= 100 = "C" ++ toRomanNumeral (x - 100)
|
|
|
|
|
| x >= 90 = "XC" ++ toRomanNumeral (x - 90)
|
|
|
|
|
| x >= 50 = "L" ++ toRomanNumeral (x - 50)
|
|
|
|
|
| x >= 40 = "XL" ++ toRomanNumeral (x - 40)
|
|
|
|
|
| x >= 10 = "X" ++ toRomanNumeral (x - 10)
|
|
|
|
|
| x == 9 = "IX"
|
|
|
|
|
| x >= 5 = "V" ++ toRomanNumeral (x - 5)
|
|
|
|
|
| x == 4 = "IV"
|
|
|
|
|
| x >= 1 = "I" ++ toRomanNumeral (x - 1)
|
|
|
|
|
| otherwise = ""
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
Percent-encode more special characters in URLs.
HTML, LaTeX writers adjusted.
The special characters are '<','>','|','"','{','}','[',']','^', '`'.
Closes #1640, #2377.
2015-10-11 17:06:26 -07:00
|
|
|
|
-- | Escape whitespace and some punctuation characters in URI.
|
2010-03-23 15:05:33 -07:00
|
|
|
|
escapeURI :: String -> String
|
Percent-encode more special characters in URLs.
HTML, LaTeX writers adjusted.
The special characters are '<','>','|','"','{','}','[',']','^', '`'.
Closes #1640, #2377.
2015-10-11 17:06:26 -07:00
|
|
|
|
escapeURI = escapeURIString (not . needsEscaping)
|
|
|
|
|
where needsEscaping c = isSpace c || c `elem`
|
|
|
|
|
['<','>','|','"','{','}','[',']','^', '`']
|
|
|
|
|
|
2017-06-20 21:52:13 +02:00
|
|
|
|
-- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0.
|
2010-07-06 23:17:06 -07:00
|
|
|
|
tabFilter :: Int -- ^ Tab stop
|
2017-06-10 15:22:25 +02:00
|
|
|
|
-> T.Text -- ^ Input
|
|
|
|
|
-> T.Text
|
2017-06-20 21:52:13 +02:00
|
|
|
|
tabFilter 0 = id
|
|
|
|
|
tabFilter tabStop = T.unlines . map go . T.lines
|
2017-06-10 15:22:25 +02:00
|
|
|
|
where go s =
|
|
|
|
|
let (s1, s2) = T.break (== '\t') s
|
|
|
|
|
in if T.null s2
|
|
|
|
|
then s1
|
|
|
|
|
else s1 <> T.replicate
|
|
|
|
|
(tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
|
|
|
|
|
<> go (T.drop 1 s2)
|
2010-07-06 23:17:06 -07:00
|
|
|
|
|
2017-06-20 21:52:13 +02:00
|
|
|
|
-- | Strip out DOS line endings.
|
|
|
|
|
crFilter :: T.Text -> T.Text
|
|
|
|
|
crFilter = T.filter (/= '\r')
|
|
|
|
|
|
2012-01-28 15:54:05 -08:00
|
|
|
|
--
|
|
|
|
|
-- Date/time
|
|
|
|
|
--
|
|
|
|
|
|
2016-07-09 15:37:47 -04:00
|
|
|
|
-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We
|
|
|
|
|
-- limit years to the range 1601-9999 (ISO 8601 accepts greater than
|
2016-07-09 17:03:39 -04:00
|
|
|
|
-- or equal to 1583, but MS Word only accepts dates starting 1601).
|
2012-01-28 15:54:05 -08:00
|
|
|
|
normalizeDate :: String -> Maybe String
|
2016-07-09 17:03:39 -04:00
|
|
|
|
normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
|
|
|
|
|
(msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
|
|
|
|
|
where rejectBadYear day = case toGregorian day of
|
|
|
|
|
(y, _, _) | y >= 1601 && y <= 9999 -> Just day
|
2017-10-27 20:28:29 -07:00
|
|
|
|
_ -> Nothing
|
2016-07-09 15:37:47 -04:00
|
|
|
|
parsetimeWith =
|
2015-10-14 10:05:17 -07:00
|
|
|
|
#if MIN_VERSION_time(1,5,0)
|
|
|
|
|
parseTimeM True defaultTimeLocale
|
|
|
|
|
#else
|
|
|
|
|
parseTime defaultTimeLocale
|
|
|
|
|
#endif
|
2016-07-09 15:37:47 -04:00
|
|
|
|
formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
|
2016-07-09 11:13:25 -04:00
|
|
|
|
"%d %B %Y", "%b. %d, %Y", "%B %d, %Y",
|
|
|
|
|
"%Y%m%d", "%Y%m", "%Y"]
|
2012-01-28 15:54:05 -08:00
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
|
--
|
|
|
|
|
-- Pandoc block and inline list processing
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
-- | Generate infinite lazy list of markers for an ordered list,
|
|
|
|
|
-- depending on list attributes.
|
|
|
|
|
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
|
2012-07-26 22:32:53 -07:00
|
|
|
|
orderedListMarkers (start, numstyle, numdelim) =
|
2007-11-03 23:27:58 +00:00
|
|
|
|
let singleton c = [c]
|
|
|
|
|
nums = case numstyle of
|
|
|
|
|
DefaultStyle -> map show [start..]
|
2010-07-11 22:47:52 -07:00
|
|
|
|
Example -> map show [start..]
|
2007-11-03 23:27:58 +00:00
|
|
|
|
Decimal -> map show [start..]
|
2012-07-26 22:32:53 -07:00
|
|
|
|
UpperAlpha -> drop (start - 1) $ cycle $
|
2007-11-03 23:27:58 +00:00
|
|
|
|
map singleton ['A'..'Z']
|
|
|
|
|
LowerAlpha -> drop (start - 1) $ cycle $
|
|
|
|
|
map singleton ['a'..'z']
|
|
|
|
|
UpperRoman -> map toRomanNumeral [start..]
|
|
|
|
|
LowerRoman -> map (map toLower . toRomanNumeral) [start..]
|
|
|
|
|
inDelim str = case numdelim of
|
|
|
|
|
DefaultDelim -> str ++ "."
|
|
|
|
|
Period -> str ++ "."
|
|
|
|
|
OneParen -> str ++ ")"
|
|
|
|
|
TwoParens -> "(" ++ str ++ ")"
|
|
|
|
|
in map inDelim nums
|
|
|
|
|
|
2014-06-16 20:45:54 +01:00
|
|
|
|
-- | Extract the leading and trailing spaces from inside an inline element
|
2015-12-11 15:58:11 -08:00
|
|
|
|
-- and place them outside the element. SoftBreaks count as Spaces for
|
|
|
|
|
-- these purposes.
|
2014-06-16 20:45:54 +01:00
|
|
|
|
extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
|
2014-07-12 22:57:22 -07:00
|
|
|
|
extractSpaces f is =
|
2014-06-16 20:45:54 +01:00
|
|
|
|
let contents = B.unMany is
|
|
|
|
|
left = case viewl contents of
|
2015-12-11 15:58:11 -08:00
|
|
|
|
(Space :< _) -> B.space
|
|
|
|
|
(SoftBreak :< _) -> B.softbreak
|
|
|
|
|
_ -> mempty
|
2014-06-16 20:45:54 +01:00
|
|
|
|
right = case viewr contents of
|
2015-12-11 15:58:11 -08:00
|
|
|
|
(_ :> Space) -> B.space
|
|
|
|
|
(_ :> SoftBreak) -> B.softbreak
|
|
|
|
|
_ -> mempty in
|
2014-06-16 20:45:54 +01:00
|
|
|
|
(left <> f (B.trimInlines . B.Many $ contents) <> right)
|
|
|
|
|
|
2014-07-13 15:10:27 -07:00
|
|
|
|
-- | Extract inlines, removing formatting.
|
2014-07-13 14:56:20 -07:00
|
|
|
|
removeFormatting :: Walkable Inline a => a -> [Inline]
|
2017-10-08 21:55:57 -07:00
|
|
|
|
removeFormatting = query go . walk (deNote . deQuote)
|
2014-07-13 10:13:22 -07:00
|
|
|
|
where go :: Inline -> [Inline]
|
2017-10-27 20:28:29 -07:00
|
|
|
|
go (Str xs) = [Str xs]
|
|
|
|
|
go Space = [Space]
|
|
|
|
|
go SoftBreak = [SoftBreak]
|
|
|
|
|
go (Code _ x) = [Str x]
|
|
|
|
|
go (Math _ x) = [Str x]
|
|
|
|
|
go LineBreak = [Space]
|
|
|
|
|
go _ = []
|
2017-01-15 22:15:35 +01:00
|
|
|
|
|
|
|
|
|
deNote :: Inline -> Inline
|
|
|
|
|
deNote (Note _) = Str ""
|
|
|
|
|
deNote x = x
|
2014-07-13 10:13:22 -07:00
|
|
|
|
|
2017-10-08 21:55:57 -07:00
|
|
|
|
deQuote :: Inline -> Inline
|
|
|
|
|
deQuote (Quoted SingleQuote xs) =
|
|
|
|
|
Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"])
|
|
|
|
|
deQuote (Quoted DoubleQuote xs) =
|
|
|
|
|
Span ("",[],[]) (Str "\8220" : xs ++ [Str "\8221"])
|
|
|
|
|
deQuote x = x
|
|
|
|
|
|
2013-08-28 08:43:51 -07:00
|
|
|
|
-- | Convert pandoc structure to a string with formatting removed.
|
2013-08-16 13:22:27 -07:00
|
|
|
|
-- Footnotes are skipped (since we don't want their contents in link
|
|
|
|
|
-- labels).
|
2013-08-28 08:43:51 -07:00
|
|
|
|
stringify :: Walkable Inline a => a -> String
|
2017-10-08 21:55:57 -07:00
|
|
|
|
stringify = query go . walk (deNote . deQuote)
|
2010-11-27 07:08:06 -08:00
|
|
|
|
where go :: Inline -> [Char]
|
2017-10-27 20:28:29 -07:00
|
|
|
|
go Space = " "
|
|
|
|
|
go SoftBreak = " "
|
|
|
|
|
go (Str x) = x
|
|
|
|
|
go (Code _ x) = x
|
|
|
|
|
go (Math _ x) = x
|
2015-08-10 16:58:47 -07:00
|
|
|
|
go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
|
2017-10-27 20:28:29 -07:00
|
|
|
|
go LineBreak = " "
|
|
|
|
|
go _ = ""
|
2010-11-27 07:08:06 -08:00
|
|
|
|
|
2014-08-03 16:48:55 +04:00
|
|
|
|
-- | Bring all regular text in a pandoc structure to uppercase.
|
2014-08-08 20:10:58 +01:00
|
|
|
|
--
|
2014-08-03 16:48:55 +04:00
|
|
|
|
-- This function correctly handles cases where a lowercase character doesn't
|
|
|
|
|
-- match to a single uppercase character – e.g. “Straße” would be converted
|
|
|
|
|
-- to “STRASSE”, not “STRAßE”.
|
|
|
|
|
capitalize :: Walkable Inline a => a -> a
|
|
|
|
|
capitalize = walk go
|
|
|
|
|
where go :: Inline -> Inline
|
|
|
|
|
go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
|
|
|
|
|
go x = x
|
|
|
|
|
|
2012-09-27 17:22:17 -07:00
|
|
|
|
-- | Change final list item from @Para@ to @Plain@ if the list contains
|
|
|
|
|
-- no other @Para@ blocks. Like compactify, but operates on @Blocks@ rather
|
|
|
|
|
-- than @[Block]@.
|
2017-01-27 21:36:45 +01:00
|
|
|
|
compactify :: [Blocks] -- ^ List of list items (each a list of blocks)
|
2012-09-27 17:22:17 -07:00
|
|
|
|
-> [Blocks]
|
2017-01-27 21:36:45 +01:00
|
|
|
|
compactify [] = []
|
|
|
|
|
compactify items =
|
2012-09-27 17:22:17 -07:00
|
|
|
|
let (others, final) = (init items, last items)
|
|
|
|
|
in case reverse (B.toList final) of
|
|
|
|
|
(Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
|
|
|
|
|
-- if this is only Para, change to Plain
|
|
|
|
|
[_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
|
|
|
|
|
_ -> items
|
|
|
|
|
_ -> items
|
|
|
|
|
|
2017-01-27 21:36:45 +01:00
|
|
|
|
-- | Like @compactify@, but acts on items of definition lists.
|
|
|
|
|
compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
|
|
|
|
|
compactifyDL items =
|
2014-04-19 14:48:35 +02:00
|
|
|
|
let defs = concatMap snd items
|
2014-07-25 10:53:04 -07:00
|
|
|
|
in case reverse (concatMap B.toList defs) of
|
|
|
|
|
(Para x:xs)
|
|
|
|
|
| not (any isPara xs) ->
|
|
|
|
|
let (t,ds) = last items
|
|
|
|
|
lastDef = B.toList $ last ds
|
|
|
|
|
ds' = init ds ++
|
|
|
|
|
if null lastDef
|
|
|
|
|
then [B.fromList lastDef]
|
|
|
|
|
else [B.fromList $ init lastDef ++ [Plain x]]
|
|
|
|
|
in init items ++ [(t, ds')]
|
|
|
|
|
| otherwise -> items
|
|
|
|
|
_ -> items
|
2014-04-19 14:48:35 +02:00
|
|
|
|
|
2016-10-13 08:46:38 +02:00
|
|
|
|
-- | Combine a list of lines by adding hard linebreaks.
|
|
|
|
|
combineLines :: [[Inline]] -> [Inline]
|
|
|
|
|
combineLines = intercalate [LineBreak]
|
|
|
|
|
|
|
|
|
|
-- | Convert a list of lines into a paragraph with hard line breaks. This is
|
|
|
|
|
-- useful e.g. for rudimentary support of LineBlock elements in writers.
|
|
|
|
|
linesToPara :: [[Inline]] -> Block
|
|
|
|
|
linesToPara = Para . combineLines
|
|
|
|
|
|
2009-11-01 02:38:18 +00:00
|
|
|
|
isPara :: Block -> Bool
|
|
|
|
|
isPara (Para _) = True
|
|
|
|
|
isPara _ = False
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | Data structure for defining hierarchical Pandoc documents
|
2012-07-26 22:32:53 -07:00
|
|
|
|
data Element = Blk Block
|
2013-02-12 20:13:23 -08:00
|
|
|
|
| Sec Int [Int] Attr [Inline] [Element]
|
|
|
|
|
-- lvl num attributes label contents
|
2009-04-25 00:29:58 +00:00
|
|
|
|
deriving (Eq, Read, Show, Typeable, Data)
|
|
|
|
|
|
2013-08-10 18:13:38 -07:00
|
|
|
|
instance Walkable Inline Element where
|
|
|
|
|
walk f (Blk x) = Blk (walk f x)
|
|
|
|
|
walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
|
|
|
|
|
walkM f (Blk x) = Blk `fmap` walkM f x
|
|
|
|
|
walkM f (Sec lev nums attr ils elts) = do
|
|
|
|
|
ils' <- walkM f ils
|
|
|
|
|
elts' <- walkM f elts
|
|
|
|
|
return $ Sec lev nums attr ils' elts'
|
2017-10-27 20:28:29 -07:00
|
|
|
|
query f (Blk x) = query f x
|
2013-08-10 18:13:38 -07:00
|
|
|
|
query f (Sec _ _ _ ils elts) = query f ils <> query f elts
|
|
|
|
|
|
|
|
|
|
instance Walkable Block Element where
|
|
|
|
|
walk f (Blk x) = Blk (walk f x)
|
|
|
|
|
walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
|
|
|
|
|
walkM f (Blk x) = Blk `fmap` walkM f x
|
|
|
|
|
walkM f (Sec lev nums attr ils elts) = do
|
|
|
|
|
ils' <- walkM f ils
|
|
|
|
|
elts' <- walkM f elts
|
|
|
|
|
return $ Sec lev nums attr ils' elts'
|
2017-10-27 20:28:29 -07:00
|
|
|
|
query f (Blk x) = query f x
|
2013-08-10 18:13:38 -07:00
|
|
|
|
query f (Sec _ _ _ ils elts) = query f ils <> query f elts
|
|
|
|
|
|
|
|
|
|
|
2010-03-28 22:29:31 -07:00
|
|
|
|
-- | Convert Pandoc inline list to plain text identifier. HTML
|
|
|
|
|
-- identifiers must start with a letter, and may contain only
|
2010-07-04 23:26:04 -07:00
|
|
|
|
-- letters, digits, and the characters _-.
|
2009-04-25 00:29:58 +00:00
|
|
|
|
inlineListToIdentifier :: [Inline] -> String
|
2010-03-28 22:29:31 -07:00
|
|
|
|
inlineListToIdentifier =
|
2010-12-19 10:13:36 -08:00
|
|
|
|
dropWhile (not . isAlpha) . intercalate "-" . words .
|
|
|
|
|
map (nbspToSp . toLower) .
|
2015-11-09 10:08:22 -08:00
|
|
|
|
filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
|
2010-12-19 10:13:36 -08:00
|
|
|
|
stringify
|
2017-10-27 20:28:29 -07:00
|
|
|
|
where nbspToSp '\160' = ' '
|
|
|
|
|
nbspToSp x = x
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
|
|
|
|
|
hierarchicalize :: [Block] -> [Element]
|
2014-07-21 20:47:18 -07:00
|
|
|
|
hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
|
|
|
|
|
|
|
|
|
|
hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
|
|
|
|
|
hierarchicalizeWithIds [] = return []
|
2017-10-27 23:13:55 -07:00
|
|
|
|
hierarchicalizeWithIds (Header level attr@(_,classes,_) title':xs) = do
|
2014-07-21 20:47:18 -07:00
|
|
|
|
lastnum <- S.get
|
|
|
|
|
let lastnum' = take level lastnum
|
|
|
|
|
let newnum = case length lastnum' of
|
|
|
|
|
x | "unnumbered" `elem` classes -> []
|
|
|
|
|
| x >= level -> init lastnum' ++ [last lastnum' + 1]
|
|
|
|
|
| otherwise -> lastnum ++
|
|
|
|
|
replicate (level - length lastnum - 1) 0 ++ [1]
|
|
|
|
|
unless (null newnum) $ S.put newnum
|
2009-04-25 00:29:58 +00:00
|
|
|
|
let (sectionContents, rest) = break (headerLtEq level) xs
|
2014-07-21 20:47:18 -07:00
|
|
|
|
sectionContents' <- hierarchicalizeWithIds sectionContents
|
|
|
|
|
rest' <- hierarchicalizeWithIds rest
|
2013-02-12 20:13:23 -08:00
|
|
|
|
return $ Sec level newnum attr title' sectionContents' : rest'
|
2017-10-27 23:13:55 -07:00
|
|
|
|
hierarchicalizeWithIds (Div ("",["references"],[])
|
|
|
|
|
(Header level (ident,classes,kvs) title' : xs):ys) =
|
|
|
|
|
hierarchicalizeWithIds (Header level (ident,("references":classes),kvs)
|
|
|
|
|
title' : (xs ++ ys))
|
2014-07-21 20:47:18 -07:00
|
|
|
|
hierarchicalizeWithIds (x:rest) = do
|
|
|
|
|
rest' <- hierarchicalizeWithIds rest
|
2017-10-27 23:13:55 -07:00
|
|
|
|
return $ Blk x : rest'
|
2009-04-25 00:29:58 +00:00
|
|
|
|
|
|
|
|
|
headerLtEq :: Int -> Block -> Bool
|
2017-10-27 20:28:29 -07:00
|
|
|
|
headerLtEq level (Header l _ _) = l <= level
|
|
|
|
|
headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level
|
|
|
|
|
headerLtEq _ _ = False
|
2009-04-25 00:29:58 +00:00
|
|
|
|
|
2010-03-16 06:45:52 +00:00
|
|
|
|
-- | Generate a unique identifier from a list of inlines.
|
|
|
|
|
-- Second argument is a list of already used identifiers.
|
2016-01-22 10:16:47 -08:00
|
|
|
|
uniqueIdent :: [Inline] -> Set.Set String -> String
|
2015-07-23 09:06:14 +02:00
|
|
|
|
uniqueIdent title' usedIdents
|
|
|
|
|
= let baseIdent = case inlineListToIdentifier title' of
|
2017-10-27 20:28:29 -07:00
|
|
|
|
"" -> "section"
|
|
|
|
|
x -> x
|
2015-07-23 09:06:14 +02:00
|
|
|
|
numIdent n = baseIdent ++ "-" ++ show n
|
2016-01-22 10:16:47 -08:00
|
|
|
|
in if baseIdent `Set.member` usedIdents
|
|
|
|
|
then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of
|
2009-04-25 00:29:58 +00:00
|
|
|
|
Just x -> numIdent x
|
|
|
|
|
Nothing -> baseIdent -- if we have more than 60,000, allow repeats
|
2015-07-23 09:06:14 +02:00
|
|
|
|
else baseIdent
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | True if block is a Header block.
|
|
|
|
|
isHeaderBlock :: Block -> Bool
|
2017-11-01 14:20:03 +03:00
|
|
|
|
isHeaderBlock Header{} = True
|
|
|
|
|
isHeaderBlock _ = False
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
2010-07-11 20:03:55 -07:00
|
|
|
|
-- | Shift header levels up or down.
|
|
|
|
|
headerShift :: Int -> Pandoc -> Pandoc
|
2013-08-10 18:45:00 -07:00
|
|
|
|
headerShift n = walk shift
|
2010-07-11 20:03:55 -07:00
|
|
|
|
where shift :: Block -> Block
|
2012-10-29 22:45:52 -07:00
|
|
|
|
shift (Header level attr inner) = Header (level + n) attr inner
|
|
|
|
|
shift x = x
|
2010-07-11 20:03:55 -07:00
|
|
|
|
|
2013-01-07 20:12:05 -08:00
|
|
|
|
-- | Detect if a list is tight.
|
|
|
|
|
isTightList :: [[Block]] -> Bool
|
2013-12-19 17:06:27 -05:00
|
|
|
|
isTightList = all firstIsPlain
|
2013-01-07 20:12:05 -08:00
|
|
|
|
where firstIsPlain (Plain _ : _) = True
|
|
|
|
|
firstIsPlain _ = False
|
|
|
|
|
|
2013-05-10 22:53:35 -07:00
|
|
|
|
-- | Set a field of a 'Meta' object. If the field already has a value,
|
|
|
|
|
-- convert it into a list with the new value appended to the old value(s).
|
|
|
|
|
addMetaField :: ToMetaValue a
|
|
|
|
|
=> String
|
|
|
|
|
-> a
|
|
|
|
|
-> Meta
|
|
|
|
|
-> Meta
|
|
|
|
|
addMetaField key val (Meta meta) =
|
|
|
|
|
Meta $ M.insertWith combine key (toMetaValue val) meta
|
2014-05-12 13:05:42 -07:00
|
|
|
|
where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
|
2013-05-10 22:53:35 -07:00
|
|
|
|
combine newval x = MetaList [x, newval]
|
2017-10-27 20:28:29 -07:00
|
|
|
|
tolist (MetaList ys) = ys
|
|
|
|
|
tolist y = [y]
|
2013-05-10 22:53:35 -07:00
|
|
|
|
|
|
|
|
|
-- | Create 'Meta' from old-style title, authors, date. This is
|
|
|
|
|
-- provided to ease the transition from the old API.
|
|
|
|
|
makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
|
|
|
|
|
makeMeta title authors date =
|
|
|
|
|
addMetaField "title" (B.fromList title)
|
|
|
|
|
$ addMetaField "author" (map B.fromList authors)
|
2017-10-27 23:13:55 -07:00
|
|
|
|
$ addMetaField "date" (B.fromList date) nullMeta
|
2013-05-10 22:53:35 -07:00
|
|
|
|
|
2017-05-30 10:22:48 +02:00
|
|
|
|
-- | Remove soft breaks between East Asian characters.
|
|
|
|
|
eastAsianLineBreakFilter :: Pandoc -> Pandoc
|
|
|
|
|
eastAsianLineBreakFilter = bottomUp go
|
|
|
|
|
where go (x:SoftBreak:y:zs) =
|
|
|
|
|
case (stringify x, stringify y) of
|
2017-10-27 23:13:55 -07:00
|
|
|
|
(xs@(_:_), c:_)
|
2017-05-30 10:22:48 +02:00
|
|
|
|
| charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
|
|
|
|
|
_ -> x:SoftBreak:y:zs
|
|
|
|
|
go xs = xs
|
|
|
|
|
|
2017-10-27 18:45:00 -04:00
|
|
|
|
-- | Builder for underline.
|
|
|
|
|
-- This probably belongs in Builder.hs in pandoc-types.
|
|
|
|
|
-- Will be replaced once Underline is an element.
|
|
|
|
|
underlineSpan :: Inlines -> Inlines
|
|
|
|
|
underlineSpan = B.spanWith ("", ["underline"], [])
|
|
|
|
|
|
|
|
|
|
|
2012-08-15 09:42:16 -07:00
|
|
|
|
--
|
|
|
|
|
-- TagSoup HTML handling
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
-- | Render HTML tags.
|
|
|
|
|
renderTags' :: [Tag String] -> String
|
|
|
|
|
renderTags' = renderTagsOptions
|
2013-12-19 20:19:24 -05:00
|
|
|
|
renderOptions{ optMinimize = matchTags ["hr", "br", "img",
|
|
|
|
|
"meta", "link"]
|
|
|
|
|
, optRawTag = matchTags ["script", "style"] }
|
2017-11-01 14:20:03 +03:00
|
|
|
|
where matchTags tags = flip elem tags . map toLower
|
2012-08-15 09:42:16 -07:00
|
|
|
|
|
2008-08-02 17:22:55 +00:00
|
|
|
|
--
|
|
|
|
|
-- File handling
|
|
|
|
|
--
|
|
|
|
|
|
2008-09-04 02:51:28 +00:00
|
|
|
|
-- | Perform an IO action in a directory, returning to starting directory.
|
|
|
|
|
inDirectory :: FilePath -> IO a -> IO a
|
2014-10-08 23:25:01 +02:00
|
|
|
|
inDirectory path action = E.bracket
|
|
|
|
|
getCurrentDirectory
|
|
|
|
|
setCurrentDirectory
|
|
|
|
|
(const $ setCurrentDirectory path >> action)
|
2009-12-31 01:11:23 +00:00
|
|
|
|
|
2012-01-29 23:54:00 -08:00
|
|
|
|
--
|
|
|
|
|
-- Error reporting
|
|
|
|
|
--
|
|
|
|
|
|
2015-02-18 21:05:47 +00:00
|
|
|
|
mapLeft :: (a -> b) -> Either a c -> Either b c
|
2017-10-27 20:28:29 -07:00
|
|
|
|
mapLeft f (Left x) = Left (f x)
|
2015-02-18 21:05:47 +00:00
|
|
|
|
mapLeft _ (Right x) = Right x
|
|
|
|
|
|
2014-08-08 20:10:58 +01:00
|
|
|
|
-- | Remove intermediate "." and ".." directories from a path.
|
|
|
|
|
--
|
2014-08-13 13:59:18 -07:00
|
|
|
|
-- > collapseFilePath "./foo" == "foo"
|
|
|
|
|
-- > collapseFilePath "/bar/../baz" == "/baz"
|
|
|
|
|
-- > collapseFilePath "/../baz" == "/../baz"
|
|
|
|
|
-- > collapseFilePath "parent/foo/baz/../bar" == "parent/foo/bar"
|
|
|
|
|
-- > collapseFilePath "parent/foo/baz/../../bar" == "parent/bar"
|
|
|
|
|
-- > collapseFilePath "parent/foo/.." == "parent"
|
|
|
|
|
-- > collapseFilePath "/parent/foo/../../bar" == "/bar"
|
2014-08-08 20:10:58 +01:00
|
|
|
|
collapseFilePath :: FilePath -> FilePath
|
2015-09-26 22:40:58 -07:00
|
|
|
|
collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
|
2014-08-08 20:10:58 +01:00
|
|
|
|
where
|
|
|
|
|
go rs "." = rs
|
|
|
|
|
go r@(p:rs) ".." = case p of
|
2017-10-27 23:13:55 -07:00
|
|
|
|
".." -> "..":r
|
|
|
|
|
(checkPathSeperator -> Just True) -> "..":r
|
2017-10-27 20:28:29 -07:00
|
|
|
|
_ -> rs
|
2015-09-26 22:40:58 -07:00
|
|
|
|
go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
|
2014-08-08 20:10:58 +01:00
|
|
|
|
go rs x = x:rs
|
2017-10-27 20:28:29 -07:00
|
|
|
|
isSingleton [] = Nothing
|
2014-09-25 12:42:53 +01:00
|
|
|
|
isSingleton [x] = Just x
|
2017-10-27 20:28:29 -07:00
|
|
|
|
isSingleton _ = Nothing
|
2014-09-25 12:42:53 +01:00
|
|
|
|
checkPathSeperator = fmap isPathSeparator . isSingleton
|
2014-08-08 20:10:58 +01:00
|
|
|
|
|
2016-10-12 17:42:30 +02:00
|
|
|
|
--
|
|
|
|
|
-- File selection from the archive
|
|
|
|
|
--
|
|
|
|
|
filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, BL.ByteString)]
|
|
|
|
|
filteredFilesFromArchive zf f =
|
|
|
|
|
mapMaybe (fileAndBinary zf) (filter f (filesInArchive zf))
|
|
|
|
|
where
|
|
|
|
|
fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
|
2016-10-17 16:58:53 +02:00
|
|
|
|
fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)
|
2016-10-12 17:42:30 +02:00
|
|
|
|
|
2017-05-23 09:48:11 +02:00
|
|
|
|
|
|
|
|
|
--
|
|
|
|
|
-- IANA URIs
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus
|
|
|
|
|
-- the unofficial schemes doi, javascript, isbn, pmid.
|
|
|
|
|
schemes :: Set.Set String
|
|
|
|
|
schemes = Set.fromList
|
|
|
|
|
-- Official IANA schemes
|
|
|
|
|
[ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs"
|
|
|
|
|
, "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin"
|
|
|
|
|
, "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension"
|
|
|
|
|
, "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs"
|
|
|
|
|
, "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle"
|
|
|
|
|
, "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed"
|
|
|
|
|
, "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg"
|
|
|
|
|
, "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham"
|
|
|
|
|
, "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon"
|
|
|
|
|
, "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6"
|
|
|
|
|
, "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs"
|
|
|
|
|
, "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap"
|
|
|
|
|
, "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market"
|
|
|
|
|
, "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access"
|
|
|
|
|
, "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel"
|
|
|
|
|
, "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath"
|
|
|
|
|
, "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint"
|
|
|
|
|
, "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller"
|
|
|
|
|
, "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode"
|
|
|
|
|
, "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular"
|
|
|
|
|
, "ms-settings-cloudstorage", "ms-settings-connectabledevices"
|
|
|
|
|
, "ms-settings-displays-topology", "ms-settings-emailandaccounts"
|
|
|
|
|
, "ms-settings-language", "ms-settings-location", "ms-settings-lock"
|
|
|
|
|
, "ms-settings-nfctransactions", "ms-settings-notifications"
|
|
|
|
|
, "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity"
|
|
|
|
|
, "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace"
|
|
|
|
|
, "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad"
|
|
|
|
|
, "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word"
|
|
|
|
|
, "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs"
|
|
|
|
|
, "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd"
|
|
|
|
|
, "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop"
|
|
|
|
|
, "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis"
|
|
|
|
|
, "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp"
|
|
|
|
|
, "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn"
|
|
|
|
|
, "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews"
|
|
|
|
|
, "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam"
|
|
|
|
|
, "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid"
|
|
|
|
|
, "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn"
|
|
|
|
|
, "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi"
|
|
|
|
|
, "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid"
|
|
|
|
|
, "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire"
|
|
|
|
|
, "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r"
|
|
|
|
|
, "z39.50s"
|
|
|
|
|
-- Inofficial schemes
|
|
|
|
|
, "doi", "isbn", "javascript", "pmid"
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
-- | Check if the string is a valid URL with a IANA or frequently used but
|
|
|
|
|
-- unofficial scheme (see @schemes@).
|
|
|
|
|
isURI :: String -> Bool
|
|
|
|
|
isURI = maybe False hasKnownScheme . parseURI
|
|
|
|
|
where
|
2017-05-23 09:49:56 +02:00
|
|
|
|
hasKnownScheme = (`Set.member` schemes) . map toLower .
|
|
|
|
|
filter (/= ':') . uriScheme
|
2017-05-23 09:48:11 +02:00
|
|
|
|
|
2016-06-22 13:04:25 -04:00
|
|
|
|
---
|
|
|
|
|
--- Squash blocks into inlines
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
blockToInlines :: Block -> [Inline]
|
|
|
|
|
blockToInlines (Plain ils) = ils
|
|
|
|
|
blockToInlines (Para ils) = ils
|
2016-10-13 08:46:38 +02:00
|
|
|
|
blockToInlines (LineBlock lns) = combineLines lns
|
2016-06-22 13:04:25 -04:00
|
|
|
|
blockToInlines (CodeBlock attr str) = [Code attr str]
|
|
|
|
|
blockToInlines (RawBlock fmt str) = [RawInline fmt str]
|
2016-06-22 13:41:53 -04:00
|
|
|
|
blockToInlines (BlockQuote blks) = blocksToInlines blks
|
2016-06-22 13:04:25 -04:00
|
|
|
|
blockToInlines (OrderedList _ blkslst) =
|
|
|
|
|
concatMap blocksToInlines blkslst
|
|
|
|
|
blockToInlines (BulletList blkslst) =
|
|
|
|
|
concatMap blocksToInlines blkslst
|
|
|
|
|
blockToInlines (DefinitionList pairslst) =
|
|
|
|
|
concatMap f pairslst
|
|
|
|
|
where
|
|
|
|
|
f (ils, blkslst) = ils ++
|
|
|
|
|
[Str ":", Space] ++
|
2017-10-27 23:13:55 -07:00
|
|
|
|
concatMap blocksToInlines blkslst
|
2016-06-22 13:04:25 -04:00
|
|
|
|
blockToInlines (Header _ _ ils) = ils
|
2017-10-27 23:13:55 -07:00
|
|
|
|
blockToInlines HorizontalRule = []
|
2016-06-22 13:04:25 -04:00
|
|
|
|
blockToInlines (Table _ _ _ headers rows) =
|
|
|
|
|
intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl
|
|
|
|
|
where
|
|
|
|
|
tbl = headers : rows
|
|
|
|
|
blockToInlines (Div _ blks) = blocksToInlines blks
|
|
|
|
|
blockToInlines Null = []
|
|
|
|
|
|
|
|
|
|
blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline]
|
|
|
|
|
blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks
|
|
|
|
|
|
|
|
|
|
blocksToInlines :: [Block] -> [Inline]
|
|
|
|
|
blocksToInlines = blocksToInlinesWithSep [Space, Str "¶", Space]
|
2016-07-20 14:12:57 +02:00
|
|
|
|
|
2016-06-22 13:04:25 -04:00
|
|
|
|
|
2012-08-09 07:52:39 -07:00
|
|
|
|
--
|
|
|
|
|
-- Safe read
|
|
|
|
|
--
|
|
|
|
|
|
2015-02-18 18:40:36 +00:00
|
|
|
|
safeRead :: (MonadPlus m, Read a) => String -> m a
|
2012-08-09 07:52:39 -07:00
|
|
|
|
safeRead s = case reads s of
|
2012-08-09 20:19:06 -07:00
|
|
|
|
(d,x):_
|
|
|
|
|
| all isSpace x -> return d
|
2015-02-18 18:40:36 +00:00
|
|
|
|
_ -> mzero
|
2014-07-30 12:29:04 -07:00
|
|
|
|
|
|
|
|
|
--
|
|
|
|
|
-- Temp directory
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
withTempDir :: String -> (FilePath -> IO a) -> IO a
|
|
|
|
|
withTempDir =
|
|
|
|
|
#ifdef _WINDOWS
|
|
|
|
|
withTempDirectory "."
|
|
|
|
|
#else
|
|
|
|
|
withSystemTempDirectory
|
|
|
|
|
#endif
|