2013-08-28 08:43:51 -07:00
|
|
|
|
{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
|
2014-08-03 19:41:33 +04:00
|
|
|
|
FlexibleContexts, ScopedTypeVariables, PatternGuards #-}
|
2007-11-03 23:27:58 +00:00
|
|
|
|
{-
|
2014-05-08 21:50:20 +02:00
|
|
|
|
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
|
-}
|
|
|
|
|
|
|
|
|
|
{- |
|
|
|
|
|
Module : Text.Pandoc.Shared
|
2014-05-08 21:50:20 +02:00
|
|
|
|
Copyright : Copyright (C) 2006-2014 John MacFarlane
|
2012-07-26 22:32:53 -07:00
|
|
|
|
License : GNU GPL, version 2 or above
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
|
|
|
Stability : alpha
|
|
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
|
|
Utility functions and definitions used by the various Pandoc modules.
|
|
|
|
|
-}
|
2010-03-23 15:05:33 -07:00
|
|
|
|
module Text.Pandoc.Shared (
|
2007-11-03 23:27:58 +00:00
|
|
|
|
-- * List processing
|
|
|
|
|
splitBy,
|
|
|
|
|
splitByIndices,
|
2012-01-27 00:37:46 -08:00
|
|
|
|
splitStringByIndices,
|
2007-11-03 23:27:58 +00:00
|
|
|
|
substitute,
|
2014-06-03 11:00:54 -07:00
|
|
|
|
ordNub,
|
2007-11-03 23:27:58 +00:00
|
|
|
|
-- * Text processing
|
|
|
|
|
backslashEscapes,
|
|
|
|
|
escapeStringUsing,
|
|
|
|
|
stripTrailingNewlines,
|
2012-09-29 17:09:34 -04:00
|
|
|
|
trim,
|
|
|
|
|
triml,
|
|
|
|
|
trimr,
|
2007-11-03 23:27:58 +00:00
|
|
|
|
stripFirstAndLast,
|
|
|
|
|
camelCaseToHyphenated,
|
|
|
|
|
toRomanNumeral,
|
2010-03-23 15:05:33 -07:00
|
|
|
|
escapeURI,
|
2010-07-06 23:17:06 -07:00
|
|
|
|
tabFilter,
|
2012-01-28 15:54:05 -08:00
|
|
|
|
-- * Date/time
|
|
|
|
|
normalizeDate,
|
2007-11-03 23:27:58 +00:00
|
|
|
|
-- * Pandoc block and inline list processing
|
|
|
|
|
orderedListMarkers,
|
|
|
|
|
normalizeSpaces,
|
2014-06-16 20:45:54 +01:00
|
|
|
|
extractSpaces,
|
2010-12-14 20:04:37 -08:00
|
|
|
|
normalize,
|
2014-06-29 23:03:12 -07:00
|
|
|
|
normalizeInlines,
|
|
|
|
|
normalizeBlocks,
|
2014-07-13 10:13:22 -07:00
|
|
|
|
removeFormatting,
|
2010-11-27 07:08:06 -08:00
|
|
|
|
stringify,
|
2014-08-03 16:48:55 +04:00
|
|
|
|
capitalize,
|
2007-11-03 23:27:58 +00:00
|
|
|
|
compactify,
|
2012-09-27 17:22:17 -07:00
|
|
|
|
compactify',
|
2014-04-19 14:48:35 +02:00
|
|
|
|
compactify'DL,
|
2007-11-03 23:27:58 +00:00
|
|
|
|
Element (..),
|
|
|
|
|
hierarchicalize,
|
2010-03-16 06:45:52 +00:00
|
|
|
|
uniqueIdent,
|
2007-11-03 23:27:58 +00:00
|
|
|
|
isHeaderBlock,
|
2010-07-11 20:03:55 -07:00
|
|
|
|
headerShift,
|
2013-01-07 20:12:05 -08:00
|
|
|
|
isTightList,
|
2013-05-10 22:53:35 -07:00
|
|
|
|
addMetaField,
|
|
|
|
|
makeMeta,
|
2012-08-15 09:42:16 -07:00
|
|
|
|
-- * TagSoup HTML handling
|
|
|
|
|
renderTags',
|
2008-07-31 23:16:02 +00:00
|
|
|
|
-- * File handling
|
2009-12-31 01:11:23 +00:00
|
|
|
|
inDirectory,
|
2011-07-17 19:33:52 -07:00
|
|
|
|
readDataFile,
|
2012-12-29 17:44:02 -08:00
|
|
|
|
readDataFileUTF8,
|
2013-01-11 16:19:06 -08:00
|
|
|
|
fetchItem,
|
2014-07-30 13:47:07 -07:00
|
|
|
|
fetchItem',
|
2013-01-11 16:19:06 -08:00
|
|
|
|
openURL,
|
2014-08-08 20:10:58 +01:00
|
|
|
|
collapseFilePath,
|
2012-01-29 23:54:00 -08:00
|
|
|
|
-- * Error handling
|
|
|
|
|
err,
|
2012-08-09 07:52:39 -07:00
|
|
|
|
warn,
|
|
|
|
|
-- * Safe read
|
2014-07-30 12:29:04 -07:00
|
|
|
|
safeRead,
|
|
|
|
|
-- * Temp directory
|
|
|
|
|
withTempDir
|
2007-11-03 23:27:58 +00:00
|
|
|
|
) where
|
|
|
|
|
|
|
|
|
|
import Text.Pandoc.Definition
|
2013-08-10 18:13:38 -07:00
|
|
|
|
import Text.Pandoc.Walk
|
2014-07-31 12:00:21 -07:00
|
|
|
|
import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
|
2014-04-19 14:48:35 +02:00
|
|
|
|
import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
|
2012-09-27 17:22:17 -07:00
|
|
|
|
import qualified Text.Pandoc.Builder as B
|
2012-01-29 23:54:00 -08:00
|
|
|
|
import qualified Text.Pandoc.UTF8 as UTF8
|
|
|
|
|
import System.Environment (getProgName)
|
|
|
|
|
import System.Exit (exitWith, ExitCode(..))
|
2011-12-02 19:39:30 -08:00
|
|
|
|
import Data.Char ( toLower, isLower, isUpper, isAlpha,
|
|
|
|
|
isLetter, isDigit, isSpace )
|
2014-08-03 14:44:39 +04:00
|
|
|
|
import Data.List ( find, stripPrefix, intercalate )
|
2013-05-10 22:53:35 -07:00
|
|
|
|
import qualified Data.Map as M
|
2014-01-08 12:04:08 -08:00
|
|
|
|
import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
|
2014-08-02 16:09:17 -07:00
|
|
|
|
unEscapeString, parseURIReference, isAllowedInURI )
|
2014-06-03 11:00:54 -07:00
|
|
|
|
import qualified Data.Set as Set
|
2008-07-31 23:16:02 +00:00
|
|
|
|
import System.Directory
|
2014-08-08 20:10:58 +01:00
|
|
|
|
import System.FilePath (joinPath, splitDirectories)
|
2014-08-17 20:42:30 +04:00
|
|
|
|
import Text.Pandoc.MIME (MimeType, getMimeType)
|
2014-07-31 12:00:21 -07:00
|
|
|
|
import System.FilePath ( (</>), takeExtension, dropExtension)
|
2010-12-24 13:39:27 -08:00
|
|
|
|
import Data.Generics (Typeable, Data)
|
2009-04-25 00:29:58 +00:00
|
|
|
|
import qualified Control.Monad.State as S
|
2013-07-18 20:58:14 -07:00
|
|
|
|
import qualified Control.Exception as E
|
2014-07-31 12:00:21 -07:00
|
|
|
|
import Control.Monad (msum, unless)
|
2012-01-27 00:37:46 -08:00
|
|
|
|
import Text.Pandoc.Pretty (charWidth)
|
2012-01-28 15:54:05 -08:00
|
|
|
|
import System.Locale (defaultTimeLocale)
|
|
|
|
|
import Data.Time
|
2012-09-23 22:53:34 -07:00
|
|
|
|
import System.IO (stderr)
|
2014-07-30 12:29:04 -07:00
|
|
|
|
import System.IO.Temp
|
2012-08-15 09:42:16 -07:00
|
|
|
|
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
|
|
|
|
|
renderOptions)
|
2013-07-04 22:40:23 -07:00
|
|
|
|
import qualified Data.ByteString as BS
|
2013-05-28 12:48:17 -07:00
|
|
|
|
import qualified Data.ByteString.Char8 as B8
|
2013-08-10 18:13:38 -07:00
|
|
|
|
import Text.Pandoc.Compat.Monoid
|
2013-11-19 13:15:24 -08:00
|
|
|
|
import Data.ByteString.Base64 (decodeLenient)
|
2014-06-16 20:45:54 +01:00
|
|
|
|
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
|
2014-08-03 16:48:55 +04:00
|
|
|
|
import qualified Data.Text as T (toUpper, pack, unpack)
|
2014-08-31 11:17:53 -07:00
|
|
|
|
import Data.ByteString.Lazy (toChunks)
|
2013-05-10 22:53:35 -07:00
|
|
|
|
|
2012-12-29 17:44:02 -08:00
|
|
|
|
#ifdef EMBED_DATA_FILES
|
2013-01-23 08:14:23 -08:00
|
|
|
|
import Text.Pandoc.Data (dataFiles)
|
2012-12-29 17:44:02 -08:00
|
|
|
|
#else
|
|
|
|
|
import Paths_pandoc (getDataFileName)
|
|
|
|
|
#endif
|
2014-05-18 22:04:39 -07:00
|
|
|
|
#ifdef HTTP_CLIENT
|
|
|
|
|
import Network.HTTP.Client (httpLbs, parseUrl, withManager,
|
|
|
|
|
responseBody, responseHeaders,
|
|
|
|
|
Request(port,host))
|
|
|
|
|
import Network.HTTP.Client.Internal (addProxy)
|
|
|
|
|
import Network.HTTP.Client.TLS (tlsManagerSettings)
|
2014-04-05 10:58:32 -07:00
|
|
|
|
import System.Environment (getEnv)
|
2013-07-04 22:40:23 -07:00
|
|
|
|
import Network.HTTP.Types.Header ( hContentType)
|
2013-12-09 22:35:22 -08:00
|
|
|
|
import Network (withSocketsDo)
|
2013-07-04 22:40:23 -07:00
|
|
|
|
#else
|
|
|
|
|
import Network.URI (parseURI)
|
|
|
|
|
import Network.HTTP (findHeader, rspBody,
|
|
|
|
|
RequestMethod(..), HeaderName(..), mkRequest)
|
|
|
|
|
import Network.Browser (browse, setAllowRedirects, setOutHandler, request)
|
|
|
|
|
#endif
|
2010-03-22 19:29:37 -07:00
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
|
--
|
|
|
|
|
-- List processing
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
-- | Split list by groups of one or more sep.
|
2010-12-21 08:41:24 -08:00
|
|
|
|
splitBy :: (a -> Bool) -> [a] -> [[a]]
|
2007-11-03 23:27:58 +00:00
|
|
|
|
splitBy _ [] = []
|
2010-12-21 08:41:24 -08:00
|
|
|
|
splitBy isSep lst =
|
|
|
|
|
let (first, rest) = break isSep lst
|
|
|
|
|
rest' = dropWhile isSep rest
|
|
|
|
|
in first:(splitBy isSep rest')
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
splitByIndices :: [Int] -> [a] -> [[a]]
|
|
|
|
|
splitByIndices [] lst = [lst]
|
2012-01-27 00:37:46 -08:00
|
|
|
|
splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x) xs) rest)
|
|
|
|
|
where (first, rest) = splitAt x lst
|
|
|
|
|
|
|
|
|
|
-- | Split string into chunks divided at specified indices.
|
|
|
|
|
splitStringByIndices :: [Int] -> [Char] -> [[Char]]
|
|
|
|
|
splitStringByIndices [] lst = [lst]
|
|
|
|
|
splitStringByIndices (x:xs) lst =
|
|
|
|
|
let (first, rest) = splitAt' x lst in
|
|
|
|
|
first : (splitStringByIndices (map (\y -> y - x) xs) rest)
|
|
|
|
|
|
|
|
|
|
splitAt' :: Int -> [Char] -> ([Char],[Char])
|
|
|
|
|
splitAt' _ [] = ([],[])
|
|
|
|
|
splitAt' n xs | n <= 0 = ([],xs)
|
|
|
|
|
splitAt' n (x:xs) = (x:ys,zs)
|
|
|
|
|
where (ys,zs) = splitAt' (n - charWidth x) xs
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | Replace each occurrence of one sublist in a list with another.
|
|
|
|
|
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
|
|
|
|
|
substitute _ _ [] = []
|
2010-07-11 12:22:18 -07:00
|
|
|
|
substitute [] _ xs = xs
|
|
|
|
|
substitute target replacement lst@(x:xs) =
|
2014-08-03 14:44:39 +04:00
|
|
|
|
case stripPrefix target lst of
|
|
|
|
|
Just lst' -> replacement ++ substitute target replacement lst'
|
|
|
|
|
Nothing -> x : substitute target replacement xs
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
2014-06-03 11:00:54 -07:00
|
|
|
|
ordNub :: (Ord a) => [a] -> [a]
|
|
|
|
|
ordNub l = go Set.empty l
|
|
|
|
|
where
|
|
|
|
|
go _ [] = []
|
|
|
|
|
go s (x:xs) = if x `Set.member` s then go s xs
|
|
|
|
|
else x : go (Set.insert x s) xs
|
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
|
--
|
|
|
|
|
-- Text processing
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
-- | Returns an association list of backslash escapes for the
|
|
|
|
|
-- designated characters.
|
|
|
|
|
backslashEscapes :: [Char] -- ^ list of special characters to escape
|
|
|
|
|
-> [(Char, String)]
|
|
|
|
|
backslashEscapes = map (\ch -> (ch, ['\\',ch]))
|
|
|
|
|
|
|
|
|
|
-- | Escape a string of characters, using an association list of
|
|
|
|
|
-- characters and strings.
|
|
|
|
|
escapeStringUsing :: [(Char, String)] -> String -> String
|
|
|
|
|
escapeStringUsing _ [] = ""
|
2012-07-26 22:32:53 -07:00
|
|
|
|
escapeStringUsing escapeTable (x:xs) =
|
2007-11-03 23:27:58 +00:00
|
|
|
|
case (lookup x escapeTable) of
|
|
|
|
|
Just str -> str ++ rest
|
|
|
|
|
Nothing -> x:rest
|
|
|
|
|
where rest = escapeStringUsing escapeTable xs
|
|
|
|
|
|
|
|
|
|
-- | Strip trailing newlines from string.
|
|
|
|
|
stripTrailingNewlines :: String -> String
|
|
|
|
|
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
|
|
|
|
|
|
|
|
|
|
-- | Remove leading and trailing space (including newlines) from string.
|
2012-09-29 17:09:34 -04:00
|
|
|
|
trim :: String -> String
|
|
|
|
|
trim = triml . trimr
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | Remove leading space (including newlines) from string.
|
2012-09-29 17:09:34 -04:00
|
|
|
|
triml :: String -> String
|
|
|
|
|
triml = dropWhile (`elem` " \r\n\t")
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | Remove trailing space (including newlines) from string.
|
2012-09-29 17:09:34 -04:00
|
|
|
|
trimr :: String -> String
|
|
|
|
|
trimr = reverse . triml . reverse
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | Strip leading and trailing characters from string
|
|
|
|
|
stripFirstAndLast :: String -> String
|
|
|
|
|
stripFirstAndLast str =
|
|
|
|
|
drop 1 $ take ((length str) - 1) str
|
|
|
|
|
|
2012-07-26 22:32:53 -07:00
|
|
|
|
-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
|
2007-11-03 23:27:58 +00:00
|
|
|
|
camelCaseToHyphenated :: String -> String
|
|
|
|
|
camelCaseToHyphenated [] = ""
|
|
|
|
|
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
|
|
|
|
|
a:'-':(toLower b):(camelCaseToHyphenated rest)
|
|
|
|
|
camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
|
|
|
|
|
|
|
|
|
|
-- | Convert number < 4000 to uppercase roman numeral.
|
|
|
|
|
toRomanNumeral :: Int -> String
|
|
|
|
|
toRomanNumeral x =
|
|
|
|
|
if x >= 4000 || x < 0
|
|
|
|
|
then "?"
|
|
|
|
|
else case x of
|
|
|
|
|
_ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
|
|
|
|
|
_ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900)
|
|
|
|
|
_ | x >= 500 -> "D" ++ toRomanNumeral (x - 500)
|
|
|
|
|
_ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400)
|
|
|
|
|
_ | x >= 100 -> "C" ++ toRomanNumeral (x - 100)
|
|
|
|
|
_ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90)
|
|
|
|
|
_ | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
|
|
|
|
|
_ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
|
|
|
|
|
_ | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
|
2014-04-15 19:53:11 -07:00
|
|
|
|
_ | x == 9 -> "IX"
|
2007-11-03 23:27:58 +00:00
|
|
|
|
_ | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
|
2014-04-15 19:53:11 -07:00
|
|
|
|
_ | x == 4 -> "IV"
|
2007-11-03 23:27:58 +00:00
|
|
|
|
_ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
|
|
|
|
|
_ -> ""
|
|
|
|
|
|
2011-12-02 19:39:30 -08:00
|
|
|
|
-- | Escape whitespace in URI.
|
2010-03-23 15:05:33 -07:00
|
|
|
|
escapeURI :: String -> String
|
2011-12-02 19:39:30 -08:00
|
|
|
|
escapeURI = escapeURIString (not . isSpace)
|
2010-03-23 15:34:53 -07:00
|
|
|
|
|
2010-07-06 23:17:06 -07:00
|
|
|
|
-- | Convert tabs to spaces and filter out DOS line endings.
|
|
|
|
|
-- Tabs will be preserved if tab stop is set to 0.
|
|
|
|
|
tabFilter :: Int -- ^ Tab stop
|
|
|
|
|
-> String -- ^ Input
|
|
|
|
|
-> String
|
|
|
|
|
tabFilter tabStop =
|
|
|
|
|
let go _ [] = ""
|
|
|
|
|
go _ ('\n':xs) = '\n' : go tabStop xs
|
|
|
|
|
go _ ('\r':'\n':xs) = '\n' : go tabStop xs
|
|
|
|
|
go _ ('\r':xs) = '\n' : go tabStop xs
|
|
|
|
|
go spsToNextStop ('\t':xs) =
|
|
|
|
|
if tabStop == 0
|
|
|
|
|
then '\t' : go tabStop xs
|
|
|
|
|
else replicate spsToNextStop ' ' ++ go tabStop xs
|
|
|
|
|
go 1 (x:xs) =
|
|
|
|
|
x : go tabStop xs
|
|
|
|
|
go spsToNextStop (x:xs) =
|
|
|
|
|
x : go (spsToNextStop - 1) xs
|
|
|
|
|
in go tabStop
|
|
|
|
|
|
2012-01-28 15:54:05 -08:00
|
|
|
|
--
|
|
|
|
|
-- Date/time
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format.
|
|
|
|
|
normalizeDate :: String -> Maybe String
|
|
|
|
|
normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
|
|
|
|
|
(msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day)
|
|
|
|
|
where parsetimeWith = parseTime defaultTimeLocale
|
|
|
|
|
formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
|
2013-12-01 10:19:08 +02:00
|
|
|
|
"%d %B %Y", "%b. %d, %Y", "%B %d, %Y", "%Y"]
|
2012-01-28 15:54:05 -08:00
|
|
|
|
|
2007-11-03 23:27:58 +00:00
|
|
|
|
--
|
|
|
|
|
-- Pandoc block and inline list processing
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
-- | Generate infinite lazy list of markers for an ordered list,
|
|
|
|
|
-- depending on list attributes.
|
|
|
|
|
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
|
2012-07-26 22:32:53 -07:00
|
|
|
|
orderedListMarkers (start, numstyle, numdelim) =
|
2007-11-03 23:27:58 +00:00
|
|
|
|
let singleton c = [c]
|
|
|
|
|
nums = case numstyle of
|
|
|
|
|
DefaultStyle -> map show [start..]
|
2010-07-11 22:47:52 -07:00
|
|
|
|
Example -> map show [start..]
|
2007-11-03 23:27:58 +00:00
|
|
|
|
Decimal -> map show [start..]
|
2012-07-26 22:32:53 -07:00
|
|
|
|
UpperAlpha -> drop (start - 1) $ cycle $
|
2007-11-03 23:27:58 +00:00
|
|
|
|
map singleton ['A'..'Z']
|
|
|
|
|
LowerAlpha -> drop (start - 1) $ cycle $
|
|
|
|
|
map singleton ['a'..'z']
|
|
|
|
|
UpperRoman -> map toRomanNumeral [start..]
|
|
|
|
|
LowerRoman -> map (map toLower . toRomanNumeral) [start..]
|
|
|
|
|
inDelim str = case numdelim of
|
|
|
|
|
DefaultDelim -> str ++ "."
|
|
|
|
|
Period -> str ++ "."
|
|
|
|
|
OneParen -> str ++ ")"
|
|
|
|
|
TwoParens -> "(" ++ str ++ ")"
|
|
|
|
|
in map inDelim nums
|
|
|
|
|
|
|
|
|
|
-- | Normalize a list of inline elements: remove leading and trailing
|
|
|
|
|
-- @Space@ elements, collapse double @Space@s into singles, and
|
|
|
|
|
-- remove empty Str elements.
|
|
|
|
|
normalizeSpaces :: [Inline] -> [Inline]
|
2010-12-07 20:10:21 -08:00
|
|
|
|
normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
|
2012-07-24 22:12:18 -07:00
|
|
|
|
where cleanup [] = []
|
|
|
|
|
cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of
|
|
|
|
|
[] -> []
|
|
|
|
|
(x:xs) -> Space : x : cleanup xs
|
2010-12-07 20:10:21 -08:00
|
|
|
|
cleanup ((Str ""):rest) = cleanup rest
|
2012-07-24 22:12:18 -07:00
|
|
|
|
cleanup (x:rest) = x : cleanup rest
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
2011-02-04 13:22:31 -08:00
|
|
|
|
isSpaceOrEmpty :: Inline -> Bool
|
|
|
|
|
isSpaceOrEmpty Space = True
|
|
|
|
|
isSpaceOrEmpty (Str "") = True
|
|
|
|
|
isSpaceOrEmpty _ = False
|
|
|
|
|
|
2014-06-16 20:45:54 +01:00
|
|
|
|
-- | Extract the leading and trailing spaces from inside an inline element
|
2014-07-12 22:57:22 -07:00
|
|
|
|
-- and place them outside the element.
|
2014-06-16 20:45:54 +01:00
|
|
|
|
|
|
|
|
|
extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
|
2014-07-12 22:57:22 -07:00
|
|
|
|
extractSpaces f is =
|
2014-06-16 20:45:54 +01:00
|
|
|
|
let contents = B.unMany is
|
|
|
|
|
left = case viewl contents of
|
|
|
|
|
(Space :< _) -> B.space
|
|
|
|
|
_ -> mempty
|
|
|
|
|
right = case viewr contents of
|
|
|
|
|
(_ :> Space) -> B.space
|
|
|
|
|
_ -> mempty in
|
|
|
|
|
(left <> f (B.trimInlines . B.Many $ contents) <> right)
|
|
|
|
|
|
2010-12-14 20:04:37 -08:00
|
|
|
|
-- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
|
|
|
|
|
-- combining adjacent 'Str's and 'Emph's, remove 'Null's and
|
|
|
|
|
-- empty elements, etc.
|
2014-06-29 23:03:12 -07:00
|
|
|
|
normalize :: Pandoc -> Pandoc
|
|
|
|
|
normalize (Pandoc (Meta meta) blocks) =
|
|
|
|
|
Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks)
|
|
|
|
|
where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs
|
|
|
|
|
go (MetaBlocks xs) = MetaBlocks $ normalizeBlocks xs
|
|
|
|
|
go (MetaList ms) = MetaList $ map go ms
|
|
|
|
|
go (MetaMap m) = MetaMap $ M.map go m
|
|
|
|
|
go x = x
|
|
|
|
|
|
|
|
|
|
normalizeBlocks :: [Block] -> [Block]
|
|
|
|
|
normalizeBlocks (Null : xs) = normalizeBlocks xs
|
|
|
|
|
normalizeBlocks (Div attr bs : xs) =
|
|
|
|
|
Div attr (normalizeBlocks bs) : normalizeBlocks xs
|
|
|
|
|
normalizeBlocks (BlockQuote bs : xs) =
|
|
|
|
|
case normalizeBlocks bs of
|
|
|
|
|
[] -> normalizeBlocks xs
|
|
|
|
|
bs' -> BlockQuote bs' : normalizeBlocks xs
|
|
|
|
|
normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs
|
|
|
|
|
normalizeBlocks (BulletList items : xs) =
|
|
|
|
|
BulletList (map normalizeBlocks items) : normalizeBlocks xs
|
|
|
|
|
normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs
|
|
|
|
|
normalizeBlocks (OrderedList attr items : xs) =
|
|
|
|
|
OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs
|
|
|
|
|
normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs
|
|
|
|
|
normalizeBlocks (DefinitionList items : xs) =
|
|
|
|
|
DefinitionList (map go items) : normalizeBlocks xs
|
|
|
|
|
where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs)
|
|
|
|
|
normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs
|
2014-07-07 15:39:59 -06:00
|
|
|
|
normalizeBlocks (RawBlock f x : xs) =
|
|
|
|
|
case normalizeBlocks xs of
|
|
|
|
|
(RawBlock f' x' : rest) | f' == f ->
|
|
|
|
|
RawBlock f (x ++ ('\n':x')) : rest
|
|
|
|
|
rest -> RawBlock f x : rest
|
2014-06-29 23:03:12 -07:00
|
|
|
|
normalizeBlocks (Para ils : xs) =
|
|
|
|
|
case normalizeInlines ils of
|
|
|
|
|
[] -> normalizeBlocks xs
|
|
|
|
|
ils' -> Para ils' : normalizeBlocks xs
|
|
|
|
|
normalizeBlocks (Plain ils : xs) =
|
|
|
|
|
case normalizeInlines ils of
|
|
|
|
|
[] -> normalizeBlocks xs
|
|
|
|
|
ils' -> Plain ils' : normalizeBlocks xs
|
|
|
|
|
normalizeBlocks (Header lev attr ils : xs) =
|
|
|
|
|
Header lev attr (normalizeInlines ils) : normalizeBlocks xs
|
|
|
|
|
normalizeBlocks (Table capt aligns widths hdrs rows : xs) =
|
|
|
|
|
Table (normalizeInlines capt) aligns widths
|
|
|
|
|
(map normalizeBlocks hdrs) (map (map normalizeBlocks) rows)
|
|
|
|
|
: normalizeBlocks xs
|
|
|
|
|
normalizeBlocks (x:xs) = x : normalizeBlocks xs
|
|
|
|
|
normalizeBlocks [] = []
|
|
|
|
|
|
|
|
|
|
normalizeInlines :: [Inline] -> [Inline]
|
|
|
|
|
normalizeInlines (Str x : ys) =
|
2010-12-14 20:04:37 -08:00
|
|
|
|
case concat (x : map fromStr strs) of
|
2014-06-29 23:03:12 -07:00
|
|
|
|
"" -> rest
|
|
|
|
|
n -> Str n : rest
|
2010-12-14 20:04:37 -08:00
|
|
|
|
where
|
2014-06-29 23:03:12 -07:00
|
|
|
|
(strs, rest) = span isStr $ normalizeInlines ys
|
2010-12-14 20:04:37 -08:00
|
|
|
|
isStr (Str _) = True
|
|
|
|
|
isStr _ = False
|
|
|
|
|
fromStr (Str z) = z
|
2014-06-29 23:03:12 -07:00
|
|
|
|
fromStr _ = error "normalizeInlines - fromStr - not a Str"
|
|
|
|
|
normalizeInlines (Space : ys) =
|
|
|
|
|
if null rest
|
|
|
|
|
then []
|
|
|
|
|
else Space : rest
|
2011-12-02 19:39:30 -08:00
|
|
|
|
where isSp Space = True
|
|
|
|
|
isSp _ = False
|
2014-06-29 23:03:12 -07:00
|
|
|
|
rest = dropWhile isSp $ normalizeInlines ys
|
|
|
|
|
normalizeInlines (Emph xs : zs) =
|
|
|
|
|
case normalizeInlines zs of
|
|
|
|
|
(Emph ys : rest) -> normalizeInlines $
|
|
|
|
|
Emph (normalizeInlines $ xs ++ ys) : rest
|
|
|
|
|
rest -> case normalizeInlines xs of
|
|
|
|
|
[] -> rest
|
|
|
|
|
xs' -> Emph xs' : rest
|
|
|
|
|
normalizeInlines (Strong xs : zs) =
|
|
|
|
|
case normalizeInlines zs of
|
|
|
|
|
(Strong ys : rest) -> normalizeInlines $
|
|
|
|
|
Strong (normalizeInlines $ xs ++ ys) : rest
|
|
|
|
|
rest -> case normalizeInlines xs of
|
|
|
|
|
[] -> rest
|
|
|
|
|
xs' -> Strong xs' : rest
|
|
|
|
|
normalizeInlines (Subscript xs : zs) =
|
|
|
|
|
case normalizeInlines zs of
|
|
|
|
|
(Subscript ys : rest) -> normalizeInlines $
|
|
|
|
|
Subscript (normalizeInlines $ xs ++ ys) : rest
|
|
|
|
|
rest -> case normalizeInlines xs of
|
|
|
|
|
[] -> rest
|
|
|
|
|
xs' -> Subscript xs' : rest
|
|
|
|
|
normalizeInlines (Superscript xs : zs) =
|
|
|
|
|
case normalizeInlines zs of
|
|
|
|
|
(Superscript ys : rest) -> normalizeInlines $
|
|
|
|
|
Superscript (normalizeInlines $ xs ++ ys) : rest
|
|
|
|
|
rest -> case normalizeInlines xs of
|
|
|
|
|
[] -> rest
|
|
|
|
|
xs' -> Superscript xs' : rest
|
|
|
|
|
normalizeInlines (SmallCaps xs : zs) =
|
|
|
|
|
case normalizeInlines zs of
|
|
|
|
|
(SmallCaps ys : rest) -> normalizeInlines $
|
|
|
|
|
SmallCaps (normalizeInlines $ xs ++ ys) : rest
|
|
|
|
|
rest -> case normalizeInlines xs of
|
|
|
|
|
[] -> rest
|
|
|
|
|
xs' -> SmallCaps xs' : rest
|
|
|
|
|
normalizeInlines (Strikeout xs : zs) =
|
|
|
|
|
case normalizeInlines zs of
|
|
|
|
|
(Strikeout ys : rest) -> normalizeInlines $
|
|
|
|
|
Strikeout (normalizeInlines $ xs ++ ys) : rest
|
|
|
|
|
rest -> case normalizeInlines xs of
|
|
|
|
|
[] -> rest
|
|
|
|
|
xs' -> Strikeout xs' : rest
|
|
|
|
|
normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys
|
|
|
|
|
normalizeInlines (RawInline f xs : zs) =
|
|
|
|
|
case normalizeInlines zs of
|
|
|
|
|
(RawInline f' ys : rest) | f == f' -> normalizeInlines $
|
|
|
|
|
RawInline f (xs ++ ys) : rest
|
|
|
|
|
rest -> RawInline f xs : rest
|
|
|
|
|
normalizeInlines (Code _ "" : ys) = normalizeInlines ys
|
|
|
|
|
normalizeInlines (Code attr xs : zs) =
|
|
|
|
|
case normalizeInlines zs of
|
|
|
|
|
(Code attr' ys : rest) | attr == attr' -> normalizeInlines $
|
|
|
|
|
Code attr (xs ++ ys) : rest
|
|
|
|
|
rest -> Code attr xs : rest
|
|
|
|
|
-- allow empty spans, they may carry identifiers etc.
|
|
|
|
|
-- normalizeInlines (Span _ [] : ys) = normalizeInlines ys
|
|
|
|
|
normalizeInlines (Span attr xs : zs) =
|
|
|
|
|
case normalizeInlines zs of
|
|
|
|
|
(Span attr' ys : rest) | attr == attr' -> normalizeInlines $
|
|
|
|
|
Span attr (normalizeInlines $ xs ++ ys) : rest
|
|
|
|
|
rest -> Span attr (normalizeInlines xs) : rest
|
|
|
|
|
normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) :
|
|
|
|
|
normalizeInlines ys
|
|
|
|
|
normalizeInlines (Quoted qt ils : ys) =
|
|
|
|
|
Quoted qt (normalizeInlines ils) : normalizeInlines ys
|
|
|
|
|
normalizeInlines (Link ils t : ys) =
|
|
|
|
|
Link (normalizeInlines ils) t : normalizeInlines ys
|
|
|
|
|
normalizeInlines (Image ils t : ys) =
|
|
|
|
|
Image (normalizeInlines ils) t : normalizeInlines ys
|
|
|
|
|
normalizeInlines (Cite cs ils : ys) =
|
|
|
|
|
Cite cs (normalizeInlines ils) : normalizeInlines ys
|
|
|
|
|
normalizeInlines (x : xs) = x : normalizeInlines xs
|
|
|
|
|
normalizeInlines [] = []
|
2010-12-14 20:04:37 -08:00
|
|
|
|
|
2014-07-13 15:10:27 -07:00
|
|
|
|
-- | Extract inlines, removing formatting.
|
2014-07-13 14:56:20 -07:00
|
|
|
|
removeFormatting :: Walkable Inline a => a -> [Inline]
|
2014-07-13 10:13:22 -07:00
|
|
|
|
removeFormatting = query go . walk deNote
|
|
|
|
|
where go :: Inline -> [Inline]
|
|
|
|
|
go (Str xs) = [Str xs]
|
|
|
|
|
go Space = [Space]
|
|
|
|
|
go (Code _ x) = [Str x]
|
|
|
|
|
go (Math _ x) = [Str x]
|
|
|
|
|
go LineBreak = [Space]
|
|
|
|
|
go _ = []
|
|
|
|
|
deNote (Note _) = Str ""
|
|
|
|
|
deNote x = x
|
|
|
|
|
|
2013-08-28 08:43:51 -07:00
|
|
|
|
-- | Convert pandoc structure to a string with formatting removed.
|
2013-08-16 13:22:27 -07:00
|
|
|
|
-- Footnotes are skipped (since we don't want their contents in link
|
|
|
|
|
-- labels).
|
2013-08-28 08:43:51 -07:00
|
|
|
|
stringify :: Walkable Inline a => a -> String
|
2013-08-16 13:22:27 -07:00
|
|
|
|
stringify = query go . walk deNote
|
2010-11-27 07:08:06 -08:00
|
|
|
|
where go :: Inline -> [Char]
|
|
|
|
|
go Space = " "
|
|
|
|
|
go (Str x) = x
|
2011-01-26 20:44:25 -08:00
|
|
|
|
go (Code _ x) = x
|
2010-12-12 20:09:14 -08:00
|
|
|
|
go (Math _ x) = x
|
2010-12-19 10:13:36 -08:00
|
|
|
|
go LineBreak = " "
|
2010-11-27 07:08:06 -08:00
|
|
|
|
go _ = ""
|
2013-08-16 13:22:27 -07:00
|
|
|
|
deNote (Note _) = Str ""
|
|
|
|
|
deNote x = x
|
2010-11-27 07:08:06 -08:00
|
|
|
|
|
2014-08-03 16:48:55 +04:00
|
|
|
|
-- | Bring all regular text in a pandoc structure to uppercase.
|
2014-08-08 20:10:58 +01:00
|
|
|
|
--
|
2014-08-03 16:48:55 +04:00
|
|
|
|
-- This function correctly handles cases where a lowercase character doesn't
|
|
|
|
|
-- match to a single uppercase character – e.g. “Straße” would be converted
|
|
|
|
|
-- to “STRASSE”, not “STRAßE”.
|
|
|
|
|
capitalize :: Walkable Inline a => a -> a
|
|
|
|
|
capitalize = walk go
|
|
|
|
|
where go :: Inline -> Inline
|
|
|
|
|
go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
|
|
|
|
|
go x = x
|
|
|
|
|
|
2009-11-01 02:38:18 +00:00
|
|
|
|
-- | Change final list item from @Para@ to @Plain@ if the list contains
|
|
|
|
|
-- no other @Para@ blocks.
|
2007-11-03 23:27:58 +00:00
|
|
|
|
compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
|
|
|
|
|
-> [[Block]]
|
|
|
|
|
compactify [] = []
|
|
|
|
|
compactify items =
|
2009-11-01 02:38:18 +00:00
|
|
|
|
case (init items, last items) of
|
|
|
|
|
(_,[]) -> items
|
|
|
|
|
(others, final) ->
|
|
|
|
|
case last final of
|
|
|
|
|
Para a -> case (filter isPara $ concat items) of
|
|
|
|
|
-- if this is only Para, change to Plain
|
|
|
|
|
[_] -> others ++ [init final ++ [Plain a]]
|
|
|
|
|
_ -> items
|
|
|
|
|
_ -> items
|
|
|
|
|
|
2012-09-27 17:22:17 -07:00
|
|
|
|
-- | Change final list item from @Para@ to @Plain@ if the list contains
|
|
|
|
|
-- no other @Para@ blocks. Like compactify, but operates on @Blocks@ rather
|
|
|
|
|
-- than @[Block]@.
|
|
|
|
|
compactify' :: [Blocks] -- ^ List of list items (each a list of blocks)
|
|
|
|
|
-> [Blocks]
|
|
|
|
|
compactify' [] = []
|
|
|
|
|
compactify' items =
|
|
|
|
|
let (others, final) = (init items, last items)
|
|
|
|
|
in case reverse (B.toList final) of
|
|
|
|
|
(Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
|
|
|
|
|
-- if this is only Para, change to Plain
|
|
|
|
|
[_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
|
|
|
|
|
_ -> items
|
|
|
|
|
_ -> items
|
|
|
|
|
|
2014-07-25 10:53:04 -07:00
|
|
|
|
-- | Like @compactify'@, but acts on items of definition lists.
|
2014-04-19 14:48:35 +02:00
|
|
|
|
compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
|
|
|
|
|
compactify'DL items =
|
|
|
|
|
let defs = concatMap snd items
|
2014-07-25 10:53:04 -07:00
|
|
|
|
in case reverse (concatMap B.toList defs) of
|
|
|
|
|
(Para x:xs)
|
|
|
|
|
| not (any isPara xs) ->
|
|
|
|
|
let (t,ds) = last items
|
|
|
|
|
lastDef = B.toList $ last ds
|
|
|
|
|
ds' = init ds ++
|
|
|
|
|
if null lastDef
|
|
|
|
|
then [B.fromList lastDef]
|
|
|
|
|
else [B.fromList $ init lastDef ++ [Plain x]]
|
|
|
|
|
in init items ++ [(t, ds')]
|
|
|
|
|
| otherwise -> items
|
|
|
|
|
_ -> items
|
2014-04-19 14:48:35 +02:00
|
|
|
|
|
2009-11-01 02:38:18 +00:00
|
|
|
|
isPara :: Block -> Bool
|
|
|
|
|
isPara (Para _) = True
|
|
|
|
|
isPara _ = False
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | Data structure for defining hierarchical Pandoc documents
|
2012-07-26 22:32:53 -07:00
|
|
|
|
data Element = Blk Block
|
2013-02-12 20:13:23 -08:00
|
|
|
|
| Sec Int [Int] Attr [Inline] [Element]
|
|
|
|
|
-- lvl num attributes label contents
|
2009-04-25 00:29:58 +00:00
|
|
|
|
deriving (Eq, Read, Show, Typeable, Data)
|
|
|
|
|
|
2013-08-10 18:13:38 -07:00
|
|
|
|
instance Walkable Inline Element where
|
|
|
|
|
walk f (Blk x) = Blk (walk f x)
|
|
|
|
|
walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
|
|
|
|
|
walkM f (Blk x) = Blk `fmap` walkM f x
|
|
|
|
|
walkM f (Sec lev nums attr ils elts) = do
|
|
|
|
|
ils' <- walkM f ils
|
|
|
|
|
elts' <- walkM f elts
|
|
|
|
|
return $ Sec lev nums attr ils' elts'
|
|
|
|
|
query f (Blk x) = query f x
|
|
|
|
|
query f (Sec _ _ _ ils elts) = query f ils <> query f elts
|
|
|
|
|
|
|
|
|
|
instance Walkable Block Element where
|
|
|
|
|
walk f (Blk x) = Blk (walk f x)
|
|
|
|
|
walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
|
|
|
|
|
walkM f (Blk x) = Blk `fmap` walkM f x
|
|
|
|
|
walkM f (Sec lev nums attr ils elts) = do
|
|
|
|
|
ils' <- walkM f ils
|
|
|
|
|
elts' <- walkM f elts
|
|
|
|
|
return $ Sec lev nums attr ils' elts'
|
|
|
|
|
query f (Blk x) = query f x
|
|
|
|
|
query f (Sec _ _ _ ils elts) = query f ils <> query f elts
|
|
|
|
|
|
|
|
|
|
|
2010-03-28 22:29:31 -07:00
|
|
|
|
-- | Convert Pandoc inline list to plain text identifier. HTML
|
|
|
|
|
-- identifiers must start with a letter, and may contain only
|
2010-07-04 23:26:04 -07:00
|
|
|
|
-- letters, digits, and the characters _-.
|
2009-04-25 00:29:58 +00:00
|
|
|
|
inlineListToIdentifier :: [Inline] -> String
|
2010-03-28 22:29:31 -07:00
|
|
|
|
inlineListToIdentifier =
|
2010-12-19 10:13:36 -08:00
|
|
|
|
dropWhile (not . isAlpha) . intercalate "-" . words .
|
|
|
|
|
map (nbspToSp . toLower) .
|
|
|
|
|
filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
|
|
|
|
|
stringify
|
|
|
|
|
where nbspToSp '\160' = ' '
|
|
|
|
|
nbspToSp x = x
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
|
|
|
|
|
hierarchicalize :: [Block] -> [Element]
|
2014-07-21 20:47:18 -07:00
|
|
|
|
hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
|
|
|
|
|
|
|
|
|
|
hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
|
|
|
|
|
hierarchicalizeWithIds [] = return []
|
|
|
|
|
hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
|
|
|
|
|
lastnum <- S.get
|
|
|
|
|
let lastnum' = take level lastnum
|
|
|
|
|
let newnum = case length lastnum' of
|
|
|
|
|
x | "unnumbered" `elem` classes -> []
|
|
|
|
|
| x >= level -> init lastnum' ++ [last lastnum' + 1]
|
|
|
|
|
| otherwise -> lastnum ++
|
|
|
|
|
replicate (level - length lastnum - 1) 0 ++ [1]
|
|
|
|
|
unless (null newnum) $ S.put newnum
|
2009-04-25 00:29:58 +00:00
|
|
|
|
let (sectionContents, rest) = break (headerLtEq level) xs
|
2014-07-21 20:47:18 -07:00
|
|
|
|
sectionContents' <- hierarchicalizeWithIds sectionContents
|
|
|
|
|
rest' <- hierarchicalizeWithIds rest
|
2013-02-12 20:13:23 -08:00
|
|
|
|
return $ Sec level newnum attr title' sectionContents' : rest'
|
2014-07-21 20:47:18 -07:00
|
|
|
|
hierarchicalizeWithIds (x:rest) = do
|
|
|
|
|
rest' <- hierarchicalizeWithIds rest
|
2009-04-25 00:29:58 +00:00
|
|
|
|
return $ (Blk x) : rest'
|
|
|
|
|
|
|
|
|
|
headerLtEq :: Int -> Block -> Bool
|
2012-10-29 22:45:52 -07:00
|
|
|
|
headerLtEq level (Header l _ _) = l <= level
|
2009-04-25 00:29:58 +00:00
|
|
|
|
headerLtEq _ _ = False
|
|
|
|
|
|
2010-03-16 06:45:52 +00:00
|
|
|
|
-- | Generate a unique identifier from a list of inlines.
|
|
|
|
|
-- Second argument is a list of already used identifiers.
|
2009-04-25 00:29:58 +00:00
|
|
|
|
uniqueIdent :: [Inline] -> [String] -> String
|
|
|
|
|
uniqueIdent title' usedIdents =
|
2010-03-28 22:29:31 -07:00
|
|
|
|
let baseIdent = case inlineListToIdentifier title' of
|
|
|
|
|
"" -> "section"
|
|
|
|
|
x -> x
|
2009-04-25 00:29:58 +00:00
|
|
|
|
numIdent n = baseIdent ++ "-" ++ show n
|
|
|
|
|
in if baseIdent `elem` usedIdents
|
|
|
|
|
then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
|
|
|
|
|
Just x -> numIdent x
|
|
|
|
|
Nothing -> baseIdent -- if we have more than 60,000, allow repeats
|
|
|
|
|
else baseIdent
|
2007-11-03 23:27:58 +00:00
|
|
|
|
|
|
|
|
|
-- | True if block is a Header block.
|
|
|
|
|
isHeaderBlock :: Block -> Bool
|
2012-10-29 22:45:52 -07:00
|
|
|
|
isHeaderBlock (Header _ _ _) = True
|
2007-11-03 23:27:58 +00:00
|
|
|
|
isHeaderBlock _ = False
|
|
|
|
|
|
2010-07-11 20:03:55 -07:00
|
|
|
|
-- | Shift header levels up or down.
|
|
|
|
|
headerShift :: Int -> Pandoc -> Pandoc
|
2013-08-10 18:45:00 -07:00
|
|
|
|
headerShift n = walk shift
|
2010-07-11 20:03:55 -07:00
|
|
|
|
where shift :: Block -> Block
|
2012-10-29 22:45:52 -07:00
|
|
|
|
shift (Header level attr inner) = Header (level + n) attr inner
|
|
|
|
|
shift x = x
|
2010-07-11 20:03:55 -07:00
|
|
|
|
|
2013-01-07 20:12:05 -08:00
|
|
|
|
-- | Detect if a list is tight.
|
|
|
|
|
isTightList :: [[Block]] -> Bool
|
2013-12-19 17:06:27 -05:00
|
|
|
|
isTightList = all firstIsPlain
|
2013-01-07 20:12:05 -08:00
|
|
|
|
where firstIsPlain (Plain _ : _) = True
|
|
|
|
|
firstIsPlain _ = False
|
|
|
|
|
|
2013-05-10 22:53:35 -07:00
|
|
|
|
-- | Set a field of a 'Meta' object. If the field already has a value,
|
|
|
|
|
-- convert it into a list with the new value appended to the old value(s).
|
|
|
|
|
addMetaField :: ToMetaValue a
|
|
|
|
|
=> String
|
|
|
|
|
-> a
|
|
|
|
|
-> Meta
|
|
|
|
|
-> Meta
|
|
|
|
|
addMetaField key val (Meta meta) =
|
|
|
|
|
Meta $ M.insertWith combine key (toMetaValue val) meta
|
2014-05-12 13:05:42 -07:00
|
|
|
|
where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
|
2013-05-10 22:53:35 -07:00
|
|
|
|
combine newval x = MetaList [x, newval]
|
2014-05-12 13:05:42 -07:00
|
|
|
|
tolist (MetaList ys) = ys
|
|
|
|
|
tolist y = [y]
|
2013-05-10 22:53:35 -07:00
|
|
|
|
|
|
|
|
|
-- | Create 'Meta' from old-style title, authors, date. This is
|
|
|
|
|
-- provided to ease the transition from the old API.
|
|
|
|
|
makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
|
|
|
|
|
makeMeta title authors date =
|
|
|
|
|
addMetaField "title" (B.fromList title)
|
|
|
|
|
$ addMetaField "author" (map B.fromList authors)
|
|
|
|
|
$ addMetaField "date" (B.fromList date)
|
|
|
|
|
$ nullMeta
|
|
|
|
|
|
2012-08-15 09:42:16 -07:00
|
|
|
|
--
|
|
|
|
|
-- TagSoup HTML handling
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
-- | Render HTML tags.
|
|
|
|
|
renderTags' :: [Tag String] -> String
|
|
|
|
|
renderTags' = renderTagsOptions
|
2013-12-19 20:19:24 -05:00
|
|
|
|
renderOptions{ optMinimize = matchTags ["hr", "br", "img",
|
|
|
|
|
"meta", "link"]
|
|
|
|
|
, optRawTag = matchTags ["script", "style"] }
|
|
|
|
|
where matchTags = \tags -> flip elem tags . map toLower
|
2012-08-15 09:42:16 -07:00
|
|
|
|
|
2008-08-02 17:22:55 +00:00
|
|
|
|
--
|
|
|
|
|
-- File handling
|
|
|
|
|
--
|
|
|
|
|
|
2008-09-04 02:51:28 +00:00
|
|
|
|
-- | Perform an IO action in a directory, returning to starting directory.
|
|
|
|
|
inDirectory :: FilePath -> IO a -> IO a
|
|
|
|
|
inDirectory path action = do
|
|
|
|
|
oldDir <- getCurrentDirectory
|
|
|
|
|
setCurrentDirectory path
|
|
|
|
|
result <- action
|
|
|
|
|
setCurrentDirectory oldDir
|
|
|
|
|
return result
|
2009-12-31 01:11:23 +00:00
|
|
|
|
|
2013-07-04 22:40:23 -07:00
|
|
|
|
readDefaultDataFile :: FilePath -> IO BS.ByteString
|
2012-12-29 17:44:02 -08:00
|
|
|
|
readDefaultDataFile fname =
|
|
|
|
|
#ifdef EMBED_DATA_FILES
|
2013-04-19 23:02:38 -07:00
|
|
|
|
case lookup (makeCanonical fname) dataFiles of
|
2013-08-15 17:21:56 -07:00
|
|
|
|
Nothing -> err 97 $ "Could not find data file " ++ fname
|
2012-12-29 18:45:20 -08:00
|
|
|
|
Just contents -> return contents
|
2013-04-20 13:07:50 -07:00
|
|
|
|
where makeCanonical = joinPath . transformPathParts . splitDirectories
|
2013-04-19 23:02:38 -07:00
|
|
|
|
transformPathParts = reverse . foldl go []
|
|
|
|
|
go as "." = as
|
|
|
|
|
go (_:as) ".." = as
|
|
|
|
|
go as x = x : as
|
2012-12-29 17:44:02 -08:00
|
|
|
|
#else
|
2013-08-15 17:21:56 -07:00
|
|
|
|
getDataFileName ("data" </> fname) >>= checkExistence >>= BS.readFile
|
|
|
|
|
where checkExistence fn = do
|
|
|
|
|
exists <- doesFileExist fn
|
|
|
|
|
if exists
|
|
|
|
|
then return fn
|
|
|
|
|
else err 97 ("Could not find data file " ++ fname)
|
2012-12-29 17:44:02 -08:00
|
|
|
|
#endif
|
2010-11-19 22:13:30 -08:00
|
|
|
|
|
2010-01-14 05:54:38 +00:00
|
|
|
|
-- | Read file from specified user data directory or, if not found there, from
|
|
|
|
|
-- Cabal data directory.
|
2013-07-04 22:40:23 -07:00
|
|
|
|
readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString
|
2012-12-29 17:44:02 -08:00
|
|
|
|
readDataFile Nothing fname = readDefaultDataFile fname
|
|
|
|
|
readDataFile (Just userDir) fname = do
|
|
|
|
|
exists <- doesFileExist (userDir </> fname)
|
|
|
|
|
if exists
|
2013-07-04 22:40:23 -07:00
|
|
|
|
then BS.readFile (userDir </> fname)
|
2012-12-29 17:44:02 -08:00
|
|
|
|
else readDefaultDataFile fname
|
|
|
|
|
|
|
|
|
|
-- | Same as 'readDataFile' but returns a String instead of a ByteString.
|
|
|
|
|
readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
|
|
|
|
|
readDataFileUTF8 userDir fname =
|
|
|
|
|
UTF8.toString `fmap` readDataFile userDir fname
|
2012-01-29 23:54:00 -08:00
|
|
|
|
|
2013-01-11 16:19:06 -08:00
|
|
|
|
-- | Fetch an image or other item from the local filesystem or the net.
|
|
|
|
|
-- Returns raw content and maybe mime type.
|
Options: Changed `writerSourceDir` to `writerSourceURL` (now a Maybe).
Previously we used to store the directory of the first input file,
even if it was local, and used this as a base directory for
finding images in ODT, EPUB, Docx, and PDF.
This has been confusing to many users. It seems better to look for
images relative to the current working directory, even if the first
file argument is in another directory.
writerSourceURL is set to 'Just url' when the first command-line
argument is an absolute URL. (So, relative links will be resolved
in relation to the first page.) Otherwise, 'Nothing'.
The ODT, EPUB, Docx, and PDF writers have been modified accordingly.
Note that this change may break some existing workflows. If you
have been assuming that relative links will be interpreted relative
to the directory of the first file argument, you'll need to
make that the current directory before running pandoc.
Closes #942.
2013-08-11 15:58:09 -07:00
|
|
|
|
fetchItem :: Maybe String -> String
|
2014-08-17 20:42:30 +04:00
|
|
|
|
-> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
|
2014-08-02 16:09:17 -07:00
|
|
|
|
fetchItem sourceURL s =
|
|
|
|
|
case (sourceURL >>= parseURIReference . ensureEscaped, ensureEscaped s) of
|
|
|
|
|
(_, s') | isURI s' -> openURL s'
|
|
|
|
|
(Just u, s') -> -- try fetching from relative path at source
|
|
|
|
|
case parseURIReference s' of
|
|
|
|
|
Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
|
|
|
|
|
Nothing -> openURL s' -- will throw error
|
|
|
|
|
(Nothing, _) -> E.try readLocalFile -- get from local file system
|
Options: Changed `writerSourceDir` to `writerSourceURL` (now a Maybe).
Previously we used to store the directory of the first input file,
even if it was local, and used this as a base directory for
finding images in ODT, EPUB, Docx, and PDF.
This has been confusing to many users. It seems better to look for
images relative to the current working directory, even if the first
file argument is in another directory.
writerSourceURL is set to 'Just url' when the first command-line
argument is an absolute URL. (So, relative links will be resolved
in relation to the first page.) Otherwise, 'Nothing'.
The ODT, EPUB, Docx, and PDF writers have been modified accordingly.
Note that this change may break some existing workflows. If you
have been assuming that relative links will be interpreted relative
to the directory of the first file argument, you'll need to
make that the current directory before running pandoc.
Closes #942.
2013-08-11 15:58:09 -07:00
|
|
|
|
where readLocalFile = do
|
2014-08-02 16:32:11 -07:00
|
|
|
|
cont <- BS.readFile fp
|
2013-01-11 16:19:06 -08:00
|
|
|
|
return (cont, mime)
|
2014-08-02 16:09:17 -07:00
|
|
|
|
dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
|
2014-08-02 16:32:11 -07:00
|
|
|
|
fp = unEscapeString $ dropFragmentAndQuery s
|
|
|
|
|
mime = case takeExtension fp of
|
|
|
|
|
".gz" -> getMimeType $ dropExtension fp
|
|
|
|
|
x -> getMimeType x
|
2014-08-22 23:21:57 -07:00
|
|
|
|
ensureEscaped x@(_:':':'\\':_) = x -- likely windows path
|
|
|
|
|
ensureEscaped x = escapeURIString isAllowedInURI x
|
2013-01-11 11:30:31 -08:00
|
|
|
|
|
2014-07-30 13:47:07 -07:00
|
|
|
|
-- | Like 'fetchItem', but also looks for items in a 'MediaBag'.
|
2014-07-30 14:07:31 -07:00
|
|
|
|
fetchItem' :: MediaBag -> Maybe String -> String
|
2014-08-17 20:42:30 +04:00
|
|
|
|
-> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
|
2014-07-30 14:07:31 -07:00
|
|
|
|
fetchItem' media sourceURL s = do
|
2014-07-31 11:04:40 -07:00
|
|
|
|
case lookupMedia s media of
|
2014-07-30 13:47:07 -07:00
|
|
|
|
Nothing -> fetchItem sourceURL s
|
2014-07-31 11:04:40 -07:00
|
|
|
|
Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)
|
2014-07-30 13:47:07 -07:00
|
|
|
|
|
2013-01-11 16:19:06 -08:00
|
|
|
|
-- | Read from a URL and return raw data and maybe mime type.
|
2014-08-17 20:42:30 +04:00
|
|
|
|
openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
|
2013-05-28 12:48:17 -07:00
|
|
|
|
openURL u
|
2014-08-03 14:44:39 +04:00
|
|
|
|
| Just u' <- stripPrefix "data:" u =
|
|
|
|
|
let mime = takeWhile (/=',') u'
|
|
|
|
|
contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u'
|
2013-11-19 13:15:24 -08:00
|
|
|
|
in return $ Right (decodeLenient contents, Just mime)
|
2014-05-18 22:04:39 -07:00
|
|
|
|
#ifdef HTTP_CLIENT
|
2013-12-09 22:35:22 -08:00
|
|
|
|
| otherwise = withSocketsDo $ E.try $ do
|
2013-07-04 22:40:23 -07:00
|
|
|
|
req <- parseUrl u
|
2014-04-05 10:58:32 -07:00
|
|
|
|
(proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
|
|
|
|
|
let req' = case proxy of
|
|
|
|
|
Left _ -> req
|
|
|
|
|
Right pr -> case parseUrl pr of
|
|
|
|
|
Just r -> addProxy (host r) (port r) req
|
|
|
|
|
Nothing -> req
|
2014-05-18 22:04:39 -07:00
|
|
|
|
resp <- withManager tlsManagerSettings $ httpLbs req'
|
2013-07-04 22:40:23 -07:00
|
|
|
|
return (BS.concat $ toChunks $ responseBody resp,
|
|
|
|
|
UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
|
|
|
|
|
#else
|
2013-07-18 20:58:14 -07:00
|
|
|
|
| otherwise = E.try $ getBodyAndMimeType `fmap` browse
|
2013-07-30 08:38:13 -07:00
|
|
|
|
(do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
|
2013-05-04 21:53:06 -07:00
|
|
|
|
setOutHandler $ const (return ())
|
2013-05-01 10:55:06 -07:00
|
|
|
|
setAllowRedirects True
|
|
|
|
|
request (getRequest' u'))
|
2013-03-26 08:31:45 -07:00
|
|
|
|
where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r)
|
|
|
|
|
getRequest' uriString = case parseURI uriString of
|
|
|
|
|
Nothing -> error ("Not a valid URL: " ++
|
|
|
|
|
uriString)
|
|
|
|
|
Just v -> mkRequest GET v
|
2013-04-28 22:57:17 -07:00
|
|
|
|
u' = escapeURIString (/= '|') u -- pipes are rejected by Network.URI
|
2013-07-04 22:40:23 -07:00
|
|
|
|
#endif
|
2013-01-11 11:30:31 -08:00
|
|
|
|
|
2012-01-29 23:54:00 -08:00
|
|
|
|
--
|
|
|
|
|
-- Error reporting
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
err :: Int -> String -> IO a
|
|
|
|
|
err exitCode msg = do
|
|
|
|
|
name <- getProgName
|
2012-09-23 22:53:34 -07:00
|
|
|
|
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
|
2012-01-29 23:54:00 -08:00
|
|
|
|
exitWith $ ExitFailure exitCode
|
|
|
|
|
return undefined
|
|
|
|
|
|
|
|
|
|
warn :: String -> IO ()
|
|
|
|
|
warn msg = do
|
|
|
|
|
name <- getProgName
|
2012-09-23 22:53:34 -07:00
|
|
|
|
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
|
2012-08-09 07:52:39 -07:00
|
|
|
|
|
2014-08-08 20:10:58 +01:00
|
|
|
|
-- | Remove intermediate "." and ".." directories from a path.
|
|
|
|
|
--
|
2014-08-13 13:59:18 -07:00
|
|
|
|
-- > collapseFilePath "./foo" == "foo"
|
|
|
|
|
-- > collapseFilePath "/bar/../baz" == "/baz"
|
|
|
|
|
-- > collapseFilePath "/../baz" == "/../baz"
|
|
|
|
|
-- > collapseFilePath "parent/foo/baz/../bar" == "parent/foo/bar"
|
|
|
|
|
-- > collapseFilePath "parent/foo/baz/../../bar" == "parent/bar"
|
|
|
|
|
-- > collapseFilePath "parent/foo/.." == "parent"
|
|
|
|
|
-- > collapseFilePath "/parent/foo/../../bar" == "/bar"
|
2014-08-08 20:10:58 +01:00
|
|
|
|
collapseFilePath :: FilePath -> FilePath
|
|
|
|
|
collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories
|
|
|
|
|
where
|
|
|
|
|
go rs "." = rs
|
|
|
|
|
go r@(p:rs) ".." = case p of
|
|
|
|
|
".." -> ("..":r)
|
|
|
|
|
"/" -> ("..":r)
|
|
|
|
|
_ -> rs
|
|
|
|
|
go _ "/" = ["/"]
|
|
|
|
|
go rs x = x:rs
|
|
|
|
|
|
|
|
|
|
|
2012-08-09 07:52:39 -07:00
|
|
|
|
--
|
|
|
|
|
-- Safe read
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
safeRead :: (Monad m, Read a) => String -> m a
|
|
|
|
|
safeRead s = case reads s of
|
2012-08-09 20:19:06 -07:00
|
|
|
|
(d,x):_
|
|
|
|
|
| all isSpace x -> return d
|
|
|
|
|
_ -> fail $ "Could not read `" ++ s ++ "'"
|
2014-07-30 12:29:04 -07:00
|
|
|
|
|
|
|
|
|
--
|
|
|
|
|
-- Temp directory
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
withTempDir :: String -> (FilePath -> IO a) -> IO a
|
|
|
|
|
withTempDir =
|
|
|
|
|
#ifdef _WINDOWS
|
|
|
|
|
withTempDirectory "."
|
|
|
|
|
#else
|
|
|
|
|
withSystemTempDirectory
|
|
|
|
|
#endif
|