7807564d44
Generalised and move the extractSpaces function from `HTML.hs` to `Shared.hs` so that the docx reader can also use it.
745 lines
28 KiB
Haskell
745 lines
28 KiB
Haskell
{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
|
|
FlexibleContexts, ScopedTypeVariables #-}
|
|
{-
|
|
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
-}
|
|
|
|
{- |
|
|
Module : Text.Pandoc.Shared
|
|
Copyright : Copyright (C) 2006-2014 John MacFarlane
|
|
License : GNU GPL, version 2 or above
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
Stability : alpha
|
|
Portability : portable
|
|
|
|
Utility functions and definitions used by the various Pandoc modules.
|
|
-}
|
|
module Text.Pandoc.Shared (
|
|
-- * List processing
|
|
splitBy,
|
|
splitByIndices,
|
|
splitStringByIndices,
|
|
substitute,
|
|
ordNub,
|
|
-- * Text processing
|
|
backslashEscapes,
|
|
escapeStringUsing,
|
|
stripTrailingNewlines,
|
|
trim,
|
|
triml,
|
|
trimr,
|
|
stripFirstAndLast,
|
|
camelCaseToHyphenated,
|
|
toRomanNumeral,
|
|
escapeURI,
|
|
tabFilter,
|
|
-- * Date/time
|
|
normalizeDate,
|
|
-- * Pandoc block and inline list processing
|
|
orderedListMarkers,
|
|
normalizeSpaces,
|
|
extractSpaces,
|
|
normalize,
|
|
stringify,
|
|
compactify,
|
|
compactify',
|
|
compactify'DL,
|
|
Element (..),
|
|
hierarchicalize,
|
|
uniqueIdent,
|
|
isHeaderBlock,
|
|
headerShift,
|
|
isTightList,
|
|
addMetaField,
|
|
makeMeta,
|
|
-- * TagSoup HTML handling
|
|
renderTags',
|
|
-- * File handling
|
|
inDirectory,
|
|
readDataFile,
|
|
readDataFileUTF8,
|
|
fetchItem,
|
|
openURL,
|
|
-- * Error handling
|
|
err,
|
|
warn,
|
|
-- * Safe read
|
|
safeRead
|
|
) where
|
|
|
|
import Text.Pandoc.Definition
|
|
import Text.Pandoc.Walk
|
|
import Text.Pandoc.Generic
|
|
import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
|
|
import qualified Text.Pandoc.Builder as B
|
|
import qualified Text.Pandoc.UTF8 as UTF8
|
|
import System.Environment (getProgName)
|
|
import System.Exit (exitWith, ExitCode(..))
|
|
import Data.Char ( toLower, isLower, isUpper, isAlpha,
|
|
isLetter, isDigit, isSpace )
|
|
import Data.List ( find, isPrefixOf, intercalate )
|
|
import qualified Data.Map as M
|
|
import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
|
|
unEscapeString, parseURIReference )
|
|
import qualified Data.Set as Set
|
|
import System.Directory
|
|
import Text.Pandoc.MIME (getMimeType)
|
|
import System.FilePath ( (</>), takeExtension, dropExtension )
|
|
import Data.Generics (Typeable, Data)
|
|
import qualified Control.Monad.State as S
|
|
import qualified Control.Exception as E
|
|
import Control.Monad (msum, unless)
|
|
import Text.Pandoc.Pretty (charWidth)
|
|
import System.Locale (defaultTimeLocale)
|
|
import Data.Time
|
|
import System.IO (stderr)
|
|
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
|
|
renderOptions)
|
|
import qualified Data.ByteString as BS
|
|
import qualified Data.ByteString.Char8 as B8
|
|
import Text.Pandoc.Compat.Monoid
|
|
import Data.ByteString.Base64 (decodeLenient)
|
|
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
|
|
|
|
#ifdef EMBED_DATA_FILES
|
|
import Text.Pandoc.Data (dataFiles)
|
|
import System.FilePath ( joinPath, splitDirectories )
|
|
#else
|
|
import Paths_pandoc (getDataFileName)
|
|
#endif
|
|
#ifdef HTTP_CLIENT
|
|
import Data.ByteString.Lazy (toChunks)
|
|
import Network.HTTP.Client (httpLbs, parseUrl, withManager,
|
|
responseBody, responseHeaders,
|
|
Request(port,host))
|
|
import Network.HTTP.Client.Internal (addProxy)
|
|
import Network.HTTP.Client.TLS (tlsManagerSettings)
|
|
import System.Environment (getEnv)
|
|
import Network.HTTP.Types.Header ( hContentType)
|
|
import Network (withSocketsDo)
|
|
#else
|
|
import Network.URI (parseURI)
|
|
import Network.HTTP (findHeader, rspBody,
|
|
RequestMethod(..), HeaderName(..), mkRequest)
|
|
import Network.Browser (browse, setAllowRedirects, setOutHandler, request)
|
|
#endif
|
|
|
|
--
|
|
-- List processing
|
|
--
|
|
|
|
-- | Split list by groups of one or more sep.
|
|
splitBy :: (a -> Bool) -> [a] -> [[a]]
|
|
splitBy _ [] = []
|
|
splitBy isSep lst =
|
|
let (first, rest) = break isSep lst
|
|
rest' = dropWhile isSep rest
|
|
in first:(splitBy isSep rest')
|
|
|
|
splitByIndices :: [Int] -> [a] -> [[a]]
|
|
splitByIndices [] lst = [lst]
|
|
splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x) xs) rest)
|
|
where (first, rest) = splitAt x lst
|
|
|
|
-- | Split string into chunks divided at specified indices.
|
|
splitStringByIndices :: [Int] -> [Char] -> [[Char]]
|
|
splitStringByIndices [] lst = [lst]
|
|
splitStringByIndices (x:xs) lst =
|
|
let (first, rest) = splitAt' x lst in
|
|
first : (splitStringByIndices (map (\y -> y - x) xs) rest)
|
|
|
|
splitAt' :: Int -> [Char] -> ([Char],[Char])
|
|
splitAt' _ [] = ([],[])
|
|
splitAt' n xs | n <= 0 = ([],xs)
|
|
splitAt' n (x:xs) = (x:ys,zs)
|
|
where (ys,zs) = splitAt' (n - charWidth x) xs
|
|
|
|
-- | Replace each occurrence of one sublist in a list with another.
|
|
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
|
|
substitute _ _ [] = []
|
|
substitute [] _ xs = xs
|
|
substitute target replacement lst@(x:xs) =
|
|
if target `isPrefixOf` lst
|
|
then replacement ++ substitute target replacement (drop (length target) lst)
|
|
else x : substitute target replacement xs
|
|
|
|
ordNub :: (Ord a) => [a] -> [a]
|
|
ordNub l = go Set.empty l
|
|
where
|
|
go _ [] = []
|
|
go s (x:xs) = if x `Set.member` s then go s xs
|
|
else x : go (Set.insert x s) xs
|
|
|
|
--
|
|
-- Text processing
|
|
--
|
|
|
|
-- | Returns an association list of backslash escapes for the
|
|
-- designated characters.
|
|
backslashEscapes :: [Char] -- ^ list of special characters to escape
|
|
-> [(Char, String)]
|
|
backslashEscapes = map (\ch -> (ch, ['\\',ch]))
|
|
|
|
-- | Escape a string of characters, using an association list of
|
|
-- characters and strings.
|
|
escapeStringUsing :: [(Char, String)] -> String -> String
|
|
escapeStringUsing _ [] = ""
|
|
escapeStringUsing escapeTable (x:xs) =
|
|
case (lookup x escapeTable) of
|
|
Just str -> str ++ rest
|
|
Nothing -> x:rest
|
|
where rest = escapeStringUsing escapeTable xs
|
|
|
|
-- | Strip trailing newlines from string.
|
|
stripTrailingNewlines :: String -> String
|
|
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
|
|
|
|
-- | Remove leading and trailing space (including newlines) from string.
|
|
trim :: String -> String
|
|
trim = triml . trimr
|
|
|
|
-- | Remove leading space (including newlines) from string.
|
|
triml :: String -> String
|
|
triml = dropWhile (`elem` " \r\n\t")
|
|
|
|
-- | Remove trailing space (including newlines) from string.
|
|
trimr :: String -> String
|
|
trimr = reverse . triml . reverse
|
|
|
|
-- | Strip leading and trailing characters from string
|
|
stripFirstAndLast :: String -> String
|
|
stripFirstAndLast str =
|
|
drop 1 $ take ((length str) - 1) str
|
|
|
|
-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
|
|
camelCaseToHyphenated :: String -> String
|
|
camelCaseToHyphenated [] = ""
|
|
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
|
|
a:'-':(toLower b):(camelCaseToHyphenated rest)
|
|
camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
|
|
|
|
-- | Convert number < 4000 to uppercase roman numeral.
|
|
toRomanNumeral :: Int -> String
|
|
toRomanNumeral x =
|
|
if x >= 4000 || x < 0
|
|
then "?"
|
|
else case x of
|
|
_ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
|
|
_ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900)
|
|
_ | x >= 500 -> "D" ++ toRomanNumeral (x - 500)
|
|
_ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400)
|
|
_ | x >= 100 -> "C" ++ toRomanNumeral (x - 100)
|
|
_ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90)
|
|
_ | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
|
|
_ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
|
|
_ | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
|
|
_ | x == 9 -> "IX"
|
|
_ | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
|
|
_ | x == 4 -> "IV"
|
|
_ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
|
|
_ -> ""
|
|
|
|
-- | Escape whitespace in URI.
|
|
escapeURI :: String -> String
|
|
escapeURI = escapeURIString (not . isSpace)
|
|
|
|
-- | Convert tabs to spaces and filter out DOS line endings.
|
|
-- Tabs will be preserved if tab stop is set to 0.
|
|
tabFilter :: Int -- ^ Tab stop
|
|
-> String -- ^ Input
|
|
-> String
|
|
tabFilter tabStop =
|
|
let go _ [] = ""
|
|
go _ ('\n':xs) = '\n' : go tabStop xs
|
|
go _ ('\r':'\n':xs) = '\n' : go tabStop xs
|
|
go _ ('\r':xs) = '\n' : go tabStop xs
|
|
go spsToNextStop ('\t':xs) =
|
|
if tabStop == 0
|
|
then '\t' : go tabStop xs
|
|
else replicate spsToNextStop ' ' ++ go tabStop xs
|
|
go 1 (x:xs) =
|
|
x : go tabStop xs
|
|
go spsToNextStop (x:xs) =
|
|
x : go (spsToNextStop - 1) xs
|
|
in go tabStop
|
|
|
|
--
|
|
-- Date/time
|
|
--
|
|
|
|
-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format.
|
|
normalizeDate :: String -> Maybe String
|
|
normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
|
|
(msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day)
|
|
where parsetimeWith = parseTime defaultTimeLocale
|
|
formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
|
|
"%d %B %Y", "%b. %d, %Y", "%B %d, %Y", "%Y"]
|
|
|
|
--
|
|
-- Pandoc block and inline list processing
|
|
--
|
|
|
|
-- | Generate infinite lazy list of markers for an ordered list,
|
|
-- depending on list attributes.
|
|
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
|
|
orderedListMarkers (start, numstyle, numdelim) =
|
|
let singleton c = [c]
|
|
nums = case numstyle of
|
|
DefaultStyle -> map show [start..]
|
|
Example -> map show [start..]
|
|
Decimal -> map show [start..]
|
|
UpperAlpha -> drop (start - 1) $ cycle $
|
|
map singleton ['A'..'Z']
|
|
LowerAlpha -> drop (start - 1) $ cycle $
|
|
map singleton ['a'..'z']
|
|
UpperRoman -> map toRomanNumeral [start..]
|
|
LowerRoman -> map (map toLower . toRomanNumeral) [start..]
|
|
inDelim str = case numdelim of
|
|
DefaultDelim -> str ++ "."
|
|
Period -> str ++ "."
|
|
OneParen -> str ++ ")"
|
|
TwoParens -> "(" ++ str ++ ")"
|
|
in map inDelim nums
|
|
|
|
-- | Normalize a list of inline elements: remove leading and trailing
|
|
-- @Space@ elements, collapse double @Space@s into singles, and
|
|
-- remove empty Str elements.
|
|
normalizeSpaces :: [Inline] -> [Inline]
|
|
normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
|
|
where cleanup [] = []
|
|
cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of
|
|
[] -> []
|
|
(x:xs) -> Space : x : cleanup xs
|
|
cleanup ((Str ""):rest) = cleanup rest
|
|
cleanup (x:rest) = x : cleanup rest
|
|
|
|
isSpaceOrEmpty :: Inline -> Bool
|
|
isSpaceOrEmpty Space = True
|
|
isSpaceOrEmpty (Str "") = True
|
|
isSpaceOrEmpty _ = False
|
|
|
|
-- | Extract the leading and trailing spaces from inside an inline element
|
|
-- and place them outside the element.
|
|
|
|
extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
|
|
extractSpaces f is =
|
|
let contents = B.unMany is
|
|
left = case viewl contents of
|
|
(Space :< _) -> B.space
|
|
_ -> mempty
|
|
right = case viewr contents of
|
|
(_ :> Space) -> B.space
|
|
_ -> mempty in
|
|
(left <> f (B.trimInlines . B.Many $ contents) <> right)
|
|
|
|
-- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
|
|
-- combining adjacent 'Str's and 'Emph's, remove 'Null's and
|
|
-- empty elements, etc.
|
|
normalize :: (Eq a, Data a) => a -> a
|
|
normalize = topDown removeEmptyBlocks .
|
|
topDown consolidateInlines .
|
|
bottomUp (removeEmptyInlines . removeTrailingInlineSpaces)
|
|
|
|
removeEmptyBlocks :: [Block] -> [Block]
|
|
removeEmptyBlocks (Null : xs) = removeEmptyBlocks xs
|
|
removeEmptyBlocks (BulletList [] : xs) = removeEmptyBlocks xs
|
|
removeEmptyBlocks (OrderedList _ [] : xs) = removeEmptyBlocks xs
|
|
removeEmptyBlocks (DefinitionList [] : xs) = removeEmptyBlocks xs
|
|
removeEmptyBlocks (RawBlock _ [] : xs) = removeEmptyBlocks xs
|
|
removeEmptyBlocks (x:xs) = x : removeEmptyBlocks xs
|
|
removeEmptyBlocks [] = []
|
|
|
|
removeEmptyInlines :: [Inline] -> [Inline]
|
|
removeEmptyInlines (Emph [] : zs) = removeEmptyInlines zs
|
|
removeEmptyInlines (Strong [] : zs) = removeEmptyInlines zs
|
|
removeEmptyInlines (Subscript [] : zs) = removeEmptyInlines zs
|
|
removeEmptyInlines (Superscript [] : zs) = removeEmptyInlines zs
|
|
removeEmptyInlines (SmallCaps [] : zs) = removeEmptyInlines zs
|
|
removeEmptyInlines (Strikeout [] : zs) = removeEmptyInlines zs
|
|
removeEmptyInlines (RawInline _ [] : zs) = removeEmptyInlines zs
|
|
removeEmptyInlines (Code _ [] : zs) = removeEmptyInlines zs
|
|
removeEmptyInlines (Str "" : zs) = removeEmptyInlines zs
|
|
removeEmptyInlines (x : xs) = x : removeEmptyInlines xs
|
|
removeEmptyInlines [] = []
|
|
|
|
removeTrailingInlineSpaces :: [Inline] -> [Inline]
|
|
removeTrailingInlineSpaces = reverse . removeLeadingInlineSpaces . reverse
|
|
|
|
removeLeadingInlineSpaces :: [Inline] -> [Inline]
|
|
removeLeadingInlineSpaces = dropWhile isSpaceOrEmpty
|
|
|
|
consolidateInlines :: [Inline] -> [Inline]
|
|
consolidateInlines (Str x : ys) =
|
|
case concat (x : map fromStr strs) of
|
|
"" -> consolidateInlines rest
|
|
n -> Str n : consolidateInlines rest
|
|
where
|
|
(strs, rest) = span isStr ys
|
|
isStr (Str _) = True
|
|
isStr _ = False
|
|
fromStr (Str z) = z
|
|
fromStr _ = error "consolidateInlines - fromStr - not a Str"
|
|
consolidateInlines (Space : ys) = Space : rest
|
|
where isSp Space = True
|
|
isSp _ = False
|
|
rest = consolidateInlines $ dropWhile isSp ys
|
|
consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
|
|
Emph (xs ++ ys) : zs
|
|
consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $
|
|
Strong (xs ++ ys) : zs
|
|
consolidateInlines (Subscript xs : Subscript ys : zs) = consolidateInlines $
|
|
Subscript (xs ++ ys) : zs
|
|
consolidateInlines (Superscript xs : Superscript ys : zs) = consolidateInlines $
|
|
Superscript (xs ++ ys) : zs
|
|
consolidateInlines (SmallCaps xs : SmallCaps ys : zs) = consolidateInlines $
|
|
SmallCaps (xs ++ ys) : zs
|
|
consolidateInlines (Strikeout xs : Strikeout ys : zs) = consolidateInlines $
|
|
Strikeout (xs ++ ys) : zs
|
|
consolidateInlines (RawInline f x : RawInline f' y : zs) | f == f' =
|
|
consolidateInlines $ RawInline f (x ++ y) : zs
|
|
consolidateInlines (Code a1 x : Code a2 y : zs) | a1 == a2 =
|
|
consolidateInlines $ Code a1 (x ++ y) : zs
|
|
consolidateInlines (x : xs) = x : consolidateInlines xs
|
|
consolidateInlines [] = []
|
|
|
|
-- | Convert pandoc structure to a string with formatting removed.
|
|
-- Footnotes are skipped (since we don't want their contents in link
|
|
-- labels).
|
|
stringify :: Walkable Inline a => a -> String
|
|
stringify = query go . walk deNote
|
|
where go :: Inline -> [Char]
|
|
go Space = " "
|
|
go (Str x) = x
|
|
go (Code _ x) = x
|
|
go (Math _ x) = x
|
|
go LineBreak = " "
|
|
go _ = ""
|
|
deNote (Note _) = Str ""
|
|
deNote x = x
|
|
|
|
-- | Change final list item from @Para@ to @Plain@ if the list contains
|
|
-- no other @Para@ blocks.
|
|
compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
|
|
-> [[Block]]
|
|
compactify [] = []
|
|
compactify items =
|
|
case (init items, last items) of
|
|
(_,[]) -> items
|
|
(others, final) ->
|
|
case last final of
|
|
Para a -> case (filter isPara $ concat items) of
|
|
-- if this is only Para, change to Plain
|
|
[_] -> others ++ [init final ++ [Plain a]]
|
|
_ -> items
|
|
_ -> items
|
|
|
|
-- | Change final list item from @Para@ to @Plain@ if the list contains
|
|
-- no other @Para@ blocks. Like compactify, but operates on @Blocks@ rather
|
|
-- than @[Block]@.
|
|
compactify' :: [Blocks] -- ^ List of list items (each a list of blocks)
|
|
-> [Blocks]
|
|
compactify' [] = []
|
|
compactify' items =
|
|
let (others, final) = (init items, last items)
|
|
in case reverse (B.toList final) of
|
|
(Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
|
|
-- if this is only Para, change to Plain
|
|
[_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
|
|
_ -> items
|
|
_ -> items
|
|
|
|
-- | Like @compactify'@, but akts on items of definition lists.
|
|
compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
|
|
compactify'DL items =
|
|
let defs = concatMap snd items
|
|
defBlocks = reverse $ concatMap B.toList defs
|
|
in case defBlocks of
|
|
(Para x:_) -> if not $ any isPara (drop 1 defBlocks)
|
|
then let (t,ds) = last items
|
|
lastDef = B.toList $ last ds
|
|
ds' = init ds ++
|
|
[B.fromList $ init lastDef ++ [Plain x]]
|
|
in init items ++ [(t, ds')]
|
|
else items
|
|
_ -> items
|
|
|
|
isPara :: Block -> Bool
|
|
isPara (Para _) = True
|
|
isPara _ = False
|
|
|
|
-- | Data structure for defining hierarchical Pandoc documents
|
|
data Element = Blk Block
|
|
| Sec Int [Int] Attr [Inline] [Element]
|
|
-- lvl num attributes label contents
|
|
deriving (Eq, Read, Show, Typeable, Data)
|
|
|
|
instance Walkable Inline Element where
|
|
walk f (Blk x) = Blk (walk f x)
|
|
walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
|
|
walkM f (Blk x) = Blk `fmap` walkM f x
|
|
walkM f (Sec lev nums attr ils elts) = do
|
|
ils' <- walkM f ils
|
|
elts' <- walkM f elts
|
|
return $ Sec lev nums attr ils' elts'
|
|
query f (Blk x) = query f x
|
|
query f (Sec _ _ _ ils elts) = query f ils <> query f elts
|
|
|
|
instance Walkable Block Element where
|
|
walk f (Blk x) = Blk (walk f x)
|
|
walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
|
|
walkM f (Blk x) = Blk `fmap` walkM f x
|
|
walkM f (Sec lev nums attr ils elts) = do
|
|
ils' <- walkM f ils
|
|
elts' <- walkM f elts
|
|
return $ Sec lev nums attr ils' elts'
|
|
query f (Blk x) = query f x
|
|
query f (Sec _ _ _ ils elts) = query f ils <> query f elts
|
|
|
|
|
|
-- | Convert Pandoc inline list to plain text identifier. HTML
|
|
-- identifiers must start with a letter, and may contain only
|
|
-- letters, digits, and the characters _-.
|
|
inlineListToIdentifier :: [Inline] -> String
|
|
inlineListToIdentifier =
|
|
dropWhile (not . isAlpha) . intercalate "-" . words .
|
|
map (nbspToSp . toLower) .
|
|
filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
|
|
stringify
|
|
where nbspToSp '\160' = ' '
|
|
nbspToSp x = x
|
|
|
|
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
|
|
hierarchicalize :: [Block] -> [Element]
|
|
hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
|
|
|
|
hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
|
|
hierarchicalizeWithIds [] = return []
|
|
hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
|
|
lastnum <- S.get
|
|
let lastnum' = take level lastnum
|
|
let newnum = case length lastnum' of
|
|
x | "unnumbered" `elem` classes -> []
|
|
| x >= level -> init lastnum' ++ [last lastnum' + 1]
|
|
| otherwise -> lastnum ++
|
|
replicate (level - length lastnum - 1) 0 ++ [1]
|
|
unless (null newnum) $ S.put newnum
|
|
let (sectionContents, rest) = break (headerLtEq level) xs
|
|
sectionContents' <- hierarchicalizeWithIds sectionContents
|
|
rest' <- hierarchicalizeWithIds rest
|
|
return $ Sec level newnum attr title' sectionContents' : rest'
|
|
hierarchicalizeWithIds (x:rest) = do
|
|
rest' <- hierarchicalizeWithIds rest
|
|
return $ (Blk x) : rest'
|
|
|
|
headerLtEq :: Int -> Block -> Bool
|
|
headerLtEq level (Header l _ _) = l <= level
|
|
headerLtEq _ _ = False
|
|
|
|
-- | Generate a unique identifier from a list of inlines.
|
|
-- Second argument is a list of already used identifiers.
|
|
uniqueIdent :: [Inline] -> [String] -> String
|
|
uniqueIdent title' usedIdents =
|
|
let baseIdent = case inlineListToIdentifier title' of
|
|
"" -> "section"
|
|
x -> x
|
|
numIdent n = baseIdent ++ "-" ++ show n
|
|
in if baseIdent `elem` usedIdents
|
|
then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
|
|
Just x -> numIdent x
|
|
Nothing -> baseIdent -- if we have more than 60,000, allow repeats
|
|
else baseIdent
|
|
|
|
-- | True if block is a Header block.
|
|
isHeaderBlock :: Block -> Bool
|
|
isHeaderBlock (Header _ _ _) = True
|
|
isHeaderBlock _ = False
|
|
|
|
-- | Shift header levels up or down.
|
|
headerShift :: Int -> Pandoc -> Pandoc
|
|
headerShift n = walk shift
|
|
where shift :: Block -> Block
|
|
shift (Header level attr inner) = Header (level + n) attr inner
|
|
shift x = x
|
|
|
|
-- | Detect if a list is tight.
|
|
isTightList :: [[Block]] -> Bool
|
|
isTightList = all firstIsPlain
|
|
where firstIsPlain (Plain _ : _) = True
|
|
firstIsPlain _ = False
|
|
|
|
-- | Set a field of a 'Meta' object. If the field already has a value,
|
|
-- convert it into a list with the new value appended to the old value(s).
|
|
addMetaField :: ToMetaValue a
|
|
=> String
|
|
-> a
|
|
-> Meta
|
|
-> Meta
|
|
addMetaField key val (Meta meta) =
|
|
Meta $ M.insertWith combine key (toMetaValue val) meta
|
|
where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
|
|
combine newval x = MetaList [x, newval]
|
|
tolist (MetaList ys) = ys
|
|
tolist y = [y]
|
|
|
|
-- | Create 'Meta' from old-style title, authors, date. This is
|
|
-- provided to ease the transition from the old API.
|
|
makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
|
|
makeMeta title authors date =
|
|
addMetaField "title" (B.fromList title)
|
|
$ addMetaField "author" (map B.fromList authors)
|
|
$ addMetaField "date" (B.fromList date)
|
|
$ nullMeta
|
|
|
|
--
|
|
-- TagSoup HTML handling
|
|
--
|
|
|
|
-- | Render HTML tags.
|
|
renderTags' :: [Tag String] -> String
|
|
renderTags' = renderTagsOptions
|
|
renderOptions{ optMinimize = matchTags ["hr", "br", "img",
|
|
"meta", "link"]
|
|
, optRawTag = matchTags ["script", "style"] }
|
|
where matchTags = \tags -> flip elem tags . map toLower
|
|
|
|
--
|
|
-- File handling
|
|
--
|
|
|
|
-- | Perform an IO action in a directory, returning to starting directory.
|
|
inDirectory :: FilePath -> IO a -> IO a
|
|
inDirectory path action = do
|
|
oldDir <- getCurrentDirectory
|
|
setCurrentDirectory path
|
|
result <- action
|
|
setCurrentDirectory oldDir
|
|
return result
|
|
|
|
readDefaultDataFile :: FilePath -> IO BS.ByteString
|
|
readDefaultDataFile fname =
|
|
#ifdef EMBED_DATA_FILES
|
|
case lookup (makeCanonical fname) dataFiles of
|
|
Nothing -> err 97 $ "Could not find data file " ++ fname
|
|
Just contents -> return contents
|
|
where makeCanonical = joinPath . transformPathParts . splitDirectories
|
|
transformPathParts = reverse . foldl go []
|
|
go as "." = as
|
|
go (_:as) ".." = as
|
|
go as x = x : as
|
|
#else
|
|
getDataFileName ("data" </> fname) >>= checkExistence >>= BS.readFile
|
|
where checkExistence fn = do
|
|
exists <- doesFileExist fn
|
|
if exists
|
|
then return fn
|
|
else err 97 ("Could not find data file " ++ fname)
|
|
#endif
|
|
|
|
-- | Read file from specified user data directory or, if not found there, from
|
|
-- Cabal data directory.
|
|
readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString
|
|
readDataFile Nothing fname = readDefaultDataFile fname
|
|
readDataFile (Just userDir) fname = do
|
|
exists <- doesFileExist (userDir </> fname)
|
|
if exists
|
|
then BS.readFile (userDir </> fname)
|
|
else readDefaultDataFile fname
|
|
|
|
-- | Same as 'readDataFile' but returns a String instead of a ByteString.
|
|
readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
|
|
readDataFileUTF8 userDir fname =
|
|
UTF8.toString `fmap` readDataFile userDir fname
|
|
|
|
-- | Fetch an image or other item from the local filesystem or the net.
|
|
-- Returns raw content and maybe mime type.
|
|
fetchItem :: Maybe String -> String
|
|
-> IO (Either E.SomeException (BS.ByteString, Maybe String))
|
|
fetchItem sourceURL s
|
|
| isURI s = openURL s
|
|
| otherwise =
|
|
case sourceURL >>= parseURIReference of
|
|
Just u -> case parseURIReference s of
|
|
Just s' -> openURL $ show $
|
|
s' `nonStrictRelativeTo` u
|
|
Nothing -> openURL $ show u ++ "/" ++ s
|
|
Nothing -> E.try readLocalFile
|
|
where readLocalFile = do
|
|
let mime = case takeExtension s of
|
|
".gz" -> getMimeType $ dropExtension s
|
|
x -> getMimeType x
|
|
cont <- BS.readFile s
|
|
return (cont, mime)
|
|
|
|
-- | Read from a URL and return raw data and maybe mime type.
|
|
openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String))
|
|
openURL u
|
|
| "data:" `isPrefixOf` u =
|
|
let mime = takeWhile (/=',') $ drop 5 u
|
|
contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u
|
|
in return $ Right (decodeLenient contents, Just mime)
|
|
#ifdef HTTP_CLIENT
|
|
| otherwise = withSocketsDo $ E.try $ do
|
|
req <- parseUrl u
|
|
(proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
|
|
let req' = case proxy of
|
|
Left _ -> req
|
|
Right pr -> case parseUrl pr of
|
|
Just r -> addProxy (host r) (port r) req
|
|
Nothing -> req
|
|
resp <- withManager tlsManagerSettings $ httpLbs req'
|
|
return (BS.concat $ toChunks $ responseBody resp,
|
|
UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
|
|
#else
|
|
| otherwise = E.try $ getBodyAndMimeType `fmap` browse
|
|
(do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
|
|
setOutHandler $ const (return ())
|
|
setAllowRedirects True
|
|
request (getRequest' u'))
|
|
where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r)
|
|
getRequest' uriString = case parseURI uriString of
|
|
Nothing -> error ("Not a valid URL: " ++
|
|
uriString)
|
|
Just v -> mkRequest GET v
|
|
u' = escapeURIString (/= '|') u -- pipes are rejected by Network.URI
|
|
#endif
|
|
|
|
--
|
|
-- Error reporting
|
|
--
|
|
|
|
err :: Int -> String -> IO a
|
|
err exitCode msg = do
|
|
name <- getProgName
|
|
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
|
|
exitWith $ ExitFailure exitCode
|
|
return undefined
|
|
|
|
warn :: String -> IO ()
|
|
warn msg = do
|
|
name <- getProgName
|
|
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
|
|
|
|
--
|
|
-- Safe read
|
|
--
|
|
|
|
safeRead :: (Monad m, Read a) => String -> m a
|
|
safeRead s = case reads s of
|
|
(d,x):_
|
|
| all isSpace x -> return d
|
|
_ -> fail $ "Could not read `" ++ s ++ "'"
|