pandoc/src/Text/Pandoc/Shared.hs

{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
    FlexibleContexts, ScopedTypeVariables, PatternGuards,
    ViewPatterns #-}
{-
Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-}

{- |
   Module      : Text.Pandoc.Shared
   Copyright   : Copyright (C) 2006-2015 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Utility functions and definitions used by the various Pandoc modules.
-}
module Text.Pandoc.Shared (
                     -- * List processing
                     splitBy,
                     splitByIndices,
                     splitStringByIndices,
                     substitute,
                     ordNub,
                     -- * Text processing
                     backslashEscapes,
                     escapeStringUsing,
                     stripTrailingNewlines,
                     trim,
                     triml,
                     trimr,
                     stripFirstAndLast,
                     camelCaseToHyphenated,
                     toRomanNumeral,
                     escapeURI,
                     tabFilter,
                     -- * Date/time
                     normalizeDate,
                     -- * Pandoc block and inline list processing
                     orderedListMarkers,
                     normalizeSpaces,
                     extractSpaces,
                     normalize,
                     normalizeInlines,
                     normalizeBlocks,
                     removeFormatting,
                     stringify,
                     capitalize,
                     compactify,
                     compactify',
                     compactify'DL,
                     Element (..),
                     hierarchicalize,
                     uniqueIdent,
                     isHeaderBlock,
                     headerShift,
                     isTightList,
                     addMetaField,
                     makeMeta,
                     -- * TagSoup HTML handling
                     renderTags',
                     -- * File handling
                     inDirectory,
                     getDefaultReferenceDocx,
                     getDefaultReferenceODT,
                     readDataFile,
                     readDataFileUTF8,
                     fetchItem,
                     fetchItem',
                     openURL,
                     collapseFilePath,
                     -- * Error handling
                     err,
                     warn,
                     mapLeft,
                     hush,
                     -- * Safe read
                     safeRead,
                     -- * Temp directory
                     withTempDir,
                     -- * Version
                     pandocVersion
                    ) where

import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.UTF8 as UTF8
import System.Environment (getProgName)
import System.Exit (exitWith, ExitCode(..))
import Data.Char ( toLower, isLower, isUpper, isAlpha,
                   isLetter, isDigit, isSpace )
import Data.List ( find, stripPrefix, intercalate )
import Data.Version ( showVersion )
import qualified Data.Map as M
import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
                     unEscapeString, parseURIReference, isAllowedInURI )
import qualified Data.Set as Set
import System.Directory
import System.FilePath (splitDirectories, isPathSeparator)
import qualified System.FilePath.Posix as Posix
import Text.Pandoc.MIME (MimeType, getMimeType)
import System.FilePath ( (</>), takeExtension, dropExtension)
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import qualified Control.Exception as E
import Control.Applicative ((<$>))
import Control.Monad (msum, unless, MonadPlus(..))
import Text.Pandoc.Pretty (charWidth)
import Text.Pandoc.Compat.Locale (defaultTimeLocale)
import Data.Time
import Data.Time.Clock.POSIX
import System.IO (stderr)
import System.IO.Temp
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
         renderOptions)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
import Text.Pandoc.Compat.Monoid
import Data.ByteString.Base64 (decodeLenient)
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
import qualified Data.Text as T (toUpper, pack, unpack)
import Data.ByteString.Lazy (toChunks, fromChunks)
import qualified Data.ByteString.Lazy as BL
import Paths_pandoc (version)

import Codec.Archive.Zip

#ifdef EMBED_DATA_FILES
import Text.Pandoc.Data (dataFiles)
#else
import Paths_pandoc (getDataFileName)
#endif
#ifdef HTTP_CLIENT
import Network.HTTP.Client (httpLbs, parseUrl,
                            responseBody, responseHeaders,
                            Request(port,host))
#if MIN_VERSION_http_client(0,4,18)
import Network.HTTP.Client (newManager)
#else
import Network.HTTP.Client (withManager)
#endif
import Network.HTTP.Client.Internal (addProxy)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import System.Environment (getEnv)
import Network.HTTP.Types.Header ( hContentType)
import Network (withSocketsDo)
#else
import Network.URI (parseURI)
import Network.HTTP (findHeader, rspBody,
                     RequestMethod(..), HeaderName(..), mkRequest)
import Network.Browser (browse, setAllowRedirects, setOutHandler, request)
#endif

-- | Version number of pandoc library.
pandocVersion :: String
pandocVersion = showVersion version

--
-- List processing
--

-- | Split list by groups of one or more sep.
splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy _ [] = []
splitBy isSep lst =
  let (first, rest) = break isSep lst
      rest'         = dropWhile isSep rest
  in  first:(splitBy isSep rest')

splitByIndices :: [Int] -> [a] -> [[a]]
splitByIndices [] lst = [lst]
splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x)  xs) rest)
  where (first, rest) = splitAt x lst

-- | Split string into chunks divided at specified indices.
splitStringByIndices :: [Int] -> [Char] -> [[Char]]
splitStringByIndices [] lst = [lst]
splitStringByIndices (x:xs) lst =
  let (first, rest) = splitAt' x lst in
  first : (splitStringByIndices (map (\y -> y - x) xs) rest)

splitAt' :: Int -> [Char] -> ([Char],[Char])
splitAt' _ []          = ([],[])
splitAt' n xs | n <= 0 = ([],xs)
splitAt' n (x:xs)      = (x:ys,zs)
  where (ys,zs) = splitAt' (n - charWidth x) xs

-- | Replace each occurrence of one sublist in a list with another.
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
substitute _ _ [] = []
substitute [] _ xs = xs
substitute target replacement lst@(x:xs) =
    case stripPrefix target lst of
      Just lst' -> replacement ++ substitute target replacement lst'
      Nothing   -> x : substitute target replacement xs

ordNub :: (Ord a) => [a] -> [a]
ordNub l = go Set.empty l
  where
    go _ [] = []
    go s (x:xs) = if x `Set.member` s then go s xs
                                      else x : go (Set.insert x s) xs

--
-- Text processing
--

-- | Returns an association list of backslash escapes for the
-- designated characters.
backslashEscapes :: [Char]    -- ^ list of special characters to escape
                 -> [(Char, String)]
backslashEscapes = map (\ch -> (ch, ['\\',ch]))

-- | Escape a string of characters, using an association list of
-- characters and strings.
escapeStringUsing :: [(Char, String)] -> String -> String
escapeStringUsing _ [] = ""
escapeStringUsing escapeTable (x:xs) =
  case (lookup x escapeTable) of
       Just str  -> str ++ rest
       Nothing   -> x:rest
  where rest = escapeStringUsing escapeTable xs

-- | Strip trailing newlines from string.
stripTrailingNewlines :: String -> String
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse

-- | Remove leading and trailing space (including newlines) from string.
trim :: String -> String
trim = triml . trimr

-- | Remove leading space (including newlines) from string.
triml :: String -> String
triml = dropWhile (`elem` " \r\n\t")

-- | Remove trailing space (including newlines) from string.
trimr :: String -> String
trimr = reverse . triml . reverse

-- | Strip leading and trailing characters from string
stripFirstAndLast :: String -> String
stripFirstAndLast str =
  drop 1 $ take ((length str) - 1) str

-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
camelCaseToHyphenated :: String -> String
camelCaseToHyphenated [] = ""
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
  a:'-':(toLower b):(camelCaseToHyphenated rest)
camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)

-- | Convert number < 4000 to uppercase roman numeral.
toRomanNumeral :: Int -> String
toRomanNumeral x =
  if x >= 4000 || x < 0
     then "?"
     else case x of
              _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
              _ | x >= 900  -> "CM" ++ toRomanNumeral (x - 900)
              _ | x >= 500  -> "D" ++ toRomanNumeral (x - 500)
              _ | x >= 400  -> "CD" ++ toRomanNumeral (x - 400)
              _ | x >= 100  -> "C" ++ toRomanNumeral (x - 100)
              _ | x >= 90   -> "XC" ++ toRomanNumeral (x - 90)
              _ | x >= 50   -> "L"  ++ toRomanNumeral (x - 50)
              _ | x >= 40   -> "XL" ++ toRomanNumeral (x - 40)
              _ | x >= 10   -> "X" ++ toRomanNumeral (x - 10)
              _ | x == 9    -> "IX"
              _ | x >= 5    -> "V" ++ toRomanNumeral (x - 5)
              _ | x == 4    -> "IV"
              _ | x >= 1    -> "I" ++ toRomanNumeral (x - 1)
              _             -> ""

-- | Escape whitespace in URI.
escapeURI :: String -> String
escapeURI = escapeURIString (not . isSpace)

-- | Convert tabs to spaces and filter out DOS line endings.
-- Tabs will be preserved if tab stop is set to 0.
tabFilter :: Int       -- ^ Tab stop
          -> String    -- ^ Input
          -> String
tabFilter tabStop =
  let go _ [] = ""
      go _ ('\n':xs) = '\n' : go tabStop xs
      go _ ('\r':'\n':xs) = '\n' : go tabStop xs
      go _ ('\r':xs) = '\n' : go tabStop xs
      go spsToNextStop ('\t':xs) =
        if tabStop == 0
           then '\t' : go tabStop xs
           else replicate spsToNextStop ' ' ++ go tabStop xs
      go 1 (x:xs) =
        x : go tabStop xs
      go spsToNextStop (x:xs) =
        x : go (spsToNextStop - 1) xs
  in  go tabStop

--
-- Date/time
--

-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format.
normalizeDate :: String -> Maybe String
normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
  (msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day)
   where parsetimeWith = parseTime defaultTimeLocale
         formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
                    "%d %B %Y", "%b. %d, %Y", "%B %d, %Y", "%Y"]

--
-- Pandoc block and inline list processing
--

-- | Generate infinite lazy list of markers for an ordered list,
-- depending on list attributes.
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
orderedListMarkers (start, numstyle, numdelim) =
  let singleton c = [c]
      nums = case numstyle of
                     DefaultStyle -> map show [start..]
                     Example      -> map show [start..]
                     Decimal      -> map show [start..]
                     UpperAlpha   -> drop (start - 1) $ cycle $
                                     map singleton ['A'..'Z']
                     LowerAlpha   -> drop (start - 1) $ cycle $
                                     map singleton ['a'..'z']
                     UpperRoman   -> map toRomanNumeral [start..]
                     LowerRoman   -> map (map toLower . toRomanNumeral) [start..]
      inDelim str = case numdelim of
                            DefaultDelim -> str ++ "."
                            Period       -> str ++ "."
                            OneParen     -> str ++ ")"
                            TwoParens    -> "(" ++ str ++ ")"
  in  map inDelim nums

-- | Normalize a list of inline elements: remove leading and trailing
-- @Space@ elements, collapse double @Space@s into singles, and
-- remove empty Str elements.
normalizeSpaces :: [Inline] -> [Inline]
normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
 where  cleanup []              = []
        cleanup (Space:rest)    = case dropWhile isSpaceOrEmpty rest of
                                        []     -> []
                                        (x:xs) -> Space : x : cleanup xs
        cleanup ((Str ""):rest) = cleanup rest
        cleanup (x:rest)        = x : cleanup rest

isSpaceOrEmpty :: Inline -> Bool
isSpaceOrEmpty Space = True
isSpaceOrEmpty (Str "") = True
isSpaceOrEmpty _ = False

-- | Extract the leading and trailing spaces from inside an inline element
-- and place them outside the element.

extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
extractSpaces f is =
  let contents = B.unMany is
      left  = case viewl contents of
                    (Space :< _) -> B.space
                    _            -> mempty
      right = case viewr contents of
                    (_ :> Space) -> B.space
                    _            -> mempty in
  (left <> f (B.trimInlines . B.Many $ contents) <> right)

-- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
-- combining adjacent 'Str's and 'Emph's, remove 'Null's and
-- empty elements, etc.
normalize :: Pandoc -> Pandoc
normalize (Pandoc (Meta meta) blocks) =
  Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks)
  where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs
        go (MetaBlocks xs)  = MetaBlocks  $ normalizeBlocks xs
        go (MetaList ms)    = MetaList $ map go ms
        go (MetaMap m)      = MetaMap $ M.map go m
        go x                = x

normalizeBlocks :: [Block] -> [Block]
normalizeBlocks (Null : xs) = normalizeBlocks xs
normalizeBlocks (Div attr bs : xs) =
  Div attr (normalizeBlocks bs) : normalizeBlocks xs
normalizeBlocks (BlockQuote bs : xs) =
  case normalizeBlocks bs of
       []    -> normalizeBlocks xs
       bs'   -> BlockQuote bs' : normalizeBlocks xs
normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs
normalizeBlocks (BulletList items : xs) =
  BulletList (map normalizeBlocks items) : normalizeBlocks xs
normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs
normalizeBlocks (OrderedList attr items : xs) =
  OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs
normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs
normalizeBlocks (DefinitionList items : xs) =
  DefinitionList (map go items) : normalizeBlocks xs
  where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs)
normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs
normalizeBlocks (RawBlock f x : xs) =
   case normalizeBlocks xs of
        (RawBlock f' x' : rest) | f' == f ->
          RawBlock f (x ++ ('\n':x')) : rest
        rest -> RawBlock f x : rest
normalizeBlocks (Para ils : xs) =
  case normalizeInlines ils of
       []   -> normalizeBlocks xs
       ils' -> Para ils' : normalizeBlocks xs
normalizeBlocks (Plain ils : xs) =
  case normalizeInlines ils of
       []   -> normalizeBlocks xs
       ils' -> Plain ils' : normalizeBlocks xs
normalizeBlocks (Header lev attr ils : xs) =
  Header lev attr (normalizeInlines ils) : normalizeBlocks xs
normalizeBlocks (Table capt aligns widths hdrs rows : xs) =
  Table (normalizeInlines capt) aligns widths
    (map normalizeBlocks hdrs) (map (map normalizeBlocks) rows)
  : normalizeBlocks xs
normalizeBlocks (x:xs) = x : normalizeBlocks xs
normalizeBlocks [] = []

normalizeInlines :: [Inline] -> [Inline]
normalizeInlines (Str x : ys) =
  case concat (x : map fromStr strs) of
        ""     -> rest
        n      -> Str n : rest
   where
     (strs, rest)  = span isStr $ normalizeInlines ys
     isStr (Str _) = True
     isStr _       = False
     fromStr (Str z) = z
     fromStr _       = error "normalizeInlines - fromStr - not a Str"
normalizeInlines (Space : ys) =
  if null rest
     then []
     else Space : rest
   where isSp Space = True
         isSp _     = False
         rest       = dropWhile isSp $ normalizeInlines ys
normalizeInlines (Emph xs : zs) =
  case normalizeInlines zs of
       (Emph ys : rest) -> normalizeInlines $
         Emph (normalizeInlines $ xs ++ ys) : rest
       rest -> case normalizeInlines xs of
                    []  -> rest
                    xs' -> Emph xs' : rest
normalizeInlines (Strong xs : zs) =
  case normalizeInlines zs of
       (Strong ys : rest) -> normalizeInlines $
         Strong (normalizeInlines $ xs ++ ys) : rest
       rest -> case normalizeInlines xs of
                    []  -> rest
                    xs' -> Strong xs' : rest
normalizeInlines (Subscript xs : zs) =
  case normalizeInlines zs of
       (Subscript ys : rest) -> normalizeInlines $
         Subscript (normalizeInlines $ xs ++ ys) : rest
       rest -> case normalizeInlines xs of
                    []  -> rest
                    xs' -> Subscript xs' : rest
normalizeInlines (Superscript xs : zs) =
  case normalizeInlines zs of
       (Superscript ys : rest) -> normalizeInlines $
         Superscript (normalizeInlines $ xs ++ ys) : rest
       rest -> case normalizeInlines xs of
                    []  -> rest
                    xs' -> Superscript xs' : rest
normalizeInlines (SmallCaps xs : zs) =
  case normalizeInlines zs of
       (SmallCaps ys : rest) -> normalizeInlines $
         SmallCaps (normalizeInlines $ xs ++ ys) : rest
       rest -> case normalizeInlines xs of
                    []  -> rest
                    xs' -> SmallCaps xs' : rest
normalizeInlines (Strikeout xs : zs) =
  case normalizeInlines zs of
       (Strikeout ys : rest) -> normalizeInlines $
         Strikeout (normalizeInlines $ xs ++ ys) : rest
       rest -> case normalizeInlines xs of
                    []  -> rest
                    xs' -> Strikeout xs' : rest
normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys
normalizeInlines (RawInline f xs : zs) =
  case normalizeInlines zs of
       (RawInline f' ys : rest) | f == f' -> normalizeInlines $
         RawInline f (xs ++ ys) : rest
       rest -> RawInline f xs : rest
normalizeInlines (Code _ "" : ys) = normalizeInlines ys
normalizeInlines (Code attr xs : zs) =
  case normalizeInlines zs of
       (Code attr' ys : rest) | attr == attr' -> normalizeInlines $
         Code attr (xs ++ ys) : rest
       rest -> Code attr xs : rest
-- allow empty spans, they may carry identifiers etc.
-- normalizeInlines (Span _ [] : ys) = normalizeInlines ys
normalizeInlines (Span attr xs : zs) =
  case normalizeInlines zs of
       (Span attr' ys : rest) | attr == attr' -> normalizeInlines $
         Span attr (normalizeInlines $ xs ++ ys) : rest
       rest -> Span attr (normalizeInlines xs) : rest
normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) :
  normalizeInlines ys
normalizeInlines (Quoted qt ils : ys) =
  Quoted qt (normalizeInlines ils) : normalizeInlines ys
normalizeInlines (Link ils t : ys) =
  Link (normalizeInlines ils) t : normalizeInlines ys
normalizeInlines (Image ils t : ys) =
  Image (normalizeInlines ils) t : normalizeInlines ys
normalizeInlines (Cite cs ils : ys) =
  Cite cs (normalizeInlines ils) : normalizeInlines ys
normalizeInlines (x : xs) = x : normalizeInlines xs
normalizeInlines [] = []

-- | Extract inlines, removing formatting.
removeFormatting :: Walkable Inline a => a -> [Inline]
removeFormatting = query go . walk deNote
  where go :: Inline -> [Inline]
        go (Str xs)     = [Str xs]
        go Space        = [Space]
        go (Code _ x)   = [Str x]
        go (Math _ x)   = [Str x]
        go LineBreak    = [Space]
        go _            = []
        deNote (Note _) = Str ""
        deNote x        = x

-- | Convert pandoc structure to a string with formatting removed.
-- Footnotes are skipped (since we don't want their contents in link
-- labels).
stringify :: Walkable Inline a => a -> String
stringify = query go . walk deNote
  where go :: Inline -> [Char]
        go Space = " "
        go (Str x) = x
        go (Code _ x) = x
        go (Math _ x) = x
        go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
        go LineBreak = " "
        go _ = ""
        deNote (Note _) = Str ""
        deNote x = x

-- | Bring all regular text in a pandoc structure to uppercase.
--
-- This function correctly handles cases where a lowercase character doesn't
-- match to a single uppercase character – e.g. “Straße” would be converted
-- to “STRASSE”, not “STRAßE”.
capitalize :: Walkable Inline a => a -> a
capitalize = walk go
  where go :: Inline -> Inline
        go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
        go x       = x

-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks.
compactify :: [[Block]]  -- ^ List of list items (each a list of blocks)
           -> [[Block]]
compactify [] = []
compactify items =
  case (init items, last items) of
       (_,[])          -> items
       (others, final) ->
            case last final of
                 Para a -> case (filter isPara $ concat items) of
                                -- if this is only Para, change to Plain
                                [_] -> others ++ [init final ++ [Plain a]]
                                _   -> items
                 _      -> items

-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks.  Like compactify, but operates on @Blocks@ rather
-- than @[Block]@.
compactify' :: [Blocks]  -- ^ List of list items (each a list of blocks)
           -> [Blocks]
compactify' [] = []
compactify' items =
  let (others, final) = (init items, last items)
  in  case reverse (B.toList final) of
           (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
                            -- if this is only Para, change to Plain
                            [_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
                            _   -> items
           _      -> items

-- | Like @compactify'@, but acts on items of definition lists.
compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
compactify'DL items =
  let defs = concatMap snd items
  in  case reverse (concatMap B.toList defs) of
           (Para x:xs)
             | not (any isPara xs) ->
                   let (t,ds) = last items
                       lastDef = B.toList $ last ds
                       ds' = init ds ++
                             if null lastDef
                                then [B.fromList lastDef]
                                else [B.fromList $ init lastDef ++ [Plain x]]
                    in init items ++ [(t, ds')]
             | otherwise           -> items
           _                       -> items

isPara :: Block -> Bool
isPara (Para _) = True
isPara _        = False

-- | Data structure for defining hierarchical Pandoc documents
data Element = Blk Block
             | Sec Int [Int] Attr [Inline] [Element]
             --    lvl  num attributes label    contents
             deriving (Eq, Read, Show, Typeable, Data)

instance Walkable Inline Element where
  walk f (Blk x) = Blk (walk f x)
  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
  walkM f (Blk x) = Blk `fmap` walkM f x
  walkM f (Sec lev nums attr ils elts) = do
    ils' <- walkM f ils
    elts' <- walkM f elts
    return $ Sec lev nums attr ils' elts'
  query f (Blk x) = query f x
  query f (Sec _ _ _ ils elts) = query f ils <> query f elts

instance Walkable Block Element where
  walk f (Blk x) = Blk (walk f x)
  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
  walkM f (Blk x) = Blk `fmap` walkM f x
  walkM f (Sec lev nums attr ils elts) = do
    ils' <- walkM f ils
    elts' <- walkM f elts
    return $ Sec lev nums attr ils' elts'
  query f (Blk x) = query f x
  query f (Sec _ _ _ ils elts) = query f ils <> query f elts


-- | Convert Pandoc inline list to plain text identifier.  HTML
-- identifiers must start with a letter, and may contain only
-- letters, digits, and the characters _-.
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier =
  dropWhile (not . isAlpha) . intercalate "-" . words .
    map (nbspToSp . toLower) .
    filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
    stringify
 where nbspToSp '\160'     =  ' '
       nbspToSp x          =  x

-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
hierarchicalize :: [Block] -> [Element]
hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []

hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
hierarchicalizeWithIds [] = return []
hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
  lastnum <- S.get
  let lastnum' = take level lastnum
  let newnum = case length lastnum' of
                    x | "unnumbered" `elem` classes -> []
                      | x >= level -> init lastnum' ++ [last lastnum' + 1]
                      | otherwise -> lastnum ++
                           replicate (level - length lastnum - 1) 0 ++ [1]
  unless (null newnum) $ S.put newnum
  let (sectionContents, rest) = break (headerLtEq level) xs
  sectionContents' <- hierarchicalizeWithIds sectionContents
  rest' <- hierarchicalizeWithIds rest
  return $ Sec level newnum attr title' sectionContents' : rest'
hierarchicalizeWithIds ((Div ("",["references"],[])
                         (Header level (ident,classes,kvs) title' : xs)):ys) =
  hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs)
                           title') : (xs ++ ys))
hierarchicalizeWithIds (x:rest) = do
  rest' <- hierarchicalizeWithIds rest
  return $ (Blk x) : rest'

headerLtEq :: Int -> Block -> Bool
headerLtEq level (Header l _ _) = l <= level
headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _))  = l <= level
headerLtEq _ _ = False

-- | Generate a unique identifier from a list of inlines.
-- Second argument is a list of already used identifiers.
uniqueIdent :: [Inline] -> [String] -> String
uniqueIdent title' usedIdents
  =  let baseIdent = case inlineListToIdentifier title' of
                        ""   -> "section"
                        x    -> x
         numIdent n = baseIdent ++ "-" ++ show n
     in  if baseIdent `elem` usedIdents
           then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
                  Just x  -> numIdent x
                  Nothing -> baseIdent   -- if we have more than 60,000, allow repeats
           else baseIdent

-- | True if block is a Header block.
isHeaderBlock :: Block -> Bool
isHeaderBlock (Header _ _ _) = True
isHeaderBlock _ = False

-- | Shift header levels up or down.
headerShift :: Int -> Pandoc -> Pandoc
headerShift n = walk shift
  where shift :: Block -> Block
        shift (Header level attr inner) = Header (level + n) attr inner
        shift x                         = x

-- | Detect if a list is tight.
isTightList :: [[Block]] -> Bool
isTightList = all firstIsPlain
  where firstIsPlain (Plain _ : _) = True
        firstIsPlain _             = False

-- | Set a field of a 'Meta' object.  If the field already has a value,
-- convert it into a list with the new value appended to the old value(s).
addMetaField :: ToMetaValue a
             => String
             -> a
             -> Meta
             -> Meta
addMetaField key val (Meta meta) =
  Meta $ M.insertWith combine key (toMetaValue val) meta
  where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
        combine newval x             = MetaList [x, newval]
        tolist (MetaList ys)         = ys
        tolist y                     = [y]

-- | Create 'Meta' from old-style title, authors, date.  This is
-- provided to ease the transition from the old API.
makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
makeMeta title authors date =
      addMetaField "title" (B.fromList title)
    $ addMetaField "author" (map B.fromList authors)
    $ addMetaField "date" (B.fromList date)
    $ nullMeta

--
-- TagSoup HTML handling
--

-- | Render HTML tags.
renderTags' :: [Tag String] -> String
renderTags' = renderTagsOptions
               renderOptions{ optMinimize = matchTags ["hr", "br", "img",
                                                       "meta", "link"]
                            , optRawTag   = matchTags ["script", "style"] }
              where matchTags = \tags -> flip elem tags . map toLower

--
-- File handling
--

-- | Perform an IO action in a directory, returning to starting directory.
inDirectory :: FilePath -> IO a -> IO a
inDirectory path action = E.bracket
                             getCurrentDirectory
                             setCurrentDirectory
                             (const $ setCurrentDirectory path >> action)

getDefaultReferenceDocx :: Maybe FilePath -> IO Archive
getDefaultReferenceDocx datadir = do
  let paths = ["[Content_Types].xml",
               "_rels/.rels",
               "docProps/app.xml",
               "docProps/core.xml",
               "word/document.xml",
               "word/fontTable.xml",
               "word/footnotes.xml",
               "word/numbering.xml",
               "word/settings.xml",
               "word/webSettings.xml",
               "word/styles.xml",
               "word/_rels/document.xml.rels",
               "word/_rels/footnotes.xml.rels",
               "word/theme/theme1.xml"]
  let toLazy = fromChunks . (:[])
  let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$>
                                          getCurrentTime
                            contents <- toLazy <$> readDataFile datadir
                                                       ("docx/" ++ path)
                            return $ toEntry path epochtime contents
  mbArchive <- case datadir of
                    Nothing   -> return Nothing
                    Just d    -> do
                       exists <- doesFileExist (d </> "reference.docx")
                       if exists
                          then return (Just (d </> "reference.docx"))
                          else return Nothing
  case mbArchive of
     Just arch -> toArchive <$> BL.readFile arch
     Nothing   -> foldr addEntryToArchive emptyArchive <$>
                     mapM pathToEntry paths

getDefaultReferenceODT :: Maybe FilePath -> IO Archive
getDefaultReferenceODT datadir = do
  let paths = ["mimetype",
               "manifest.rdf",
               "styles.xml",
               "content.xml",
               "meta.xml",
               "settings.xml",
               "Configurations2/accelerator/current.xml",
               "Thumbnails/thumbnail.png",
               "META-INF/manifest.xml"]
  let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
                            contents <- (fromChunks . (:[])) `fmap`
                                          readDataFile datadir ("odt/" ++ path)
                            return $ toEntry path epochtime contents
  mbArchive <- case datadir of
                    Nothing   -> return Nothing
                    Just d    -> do
                       exists <- doesFileExist (d </> "reference.odt")
                       if exists
                          then return (Just (d </> "reference.odt"))
                          else return Nothing
  case mbArchive of
     Just arch -> toArchive <$> BL.readFile arch
     Nothing   -> foldr addEntryToArchive emptyArchive <$>
                     mapM pathToEntry paths


readDefaultDataFile :: FilePath -> IO BS.ByteString
readDefaultDataFile "reference.docx" =
  (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing
readDefaultDataFile "reference.odt" =
  (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing
readDefaultDataFile fname =
#ifdef EMBED_DATA_FILES
  case lookup (makeCanonical fname) dataFiles of
    Nothing       -> err 97 $ "Could not find data file " ++ fname
    Just contents -> return contents
  where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
        transformPathParts = reverse . foldl go []
        go as     "."  = as
        go (_:as) ".." = as
        go as     x    = x : as
#else
  getDataFileName fname' >>= checkExistence >>= BS.readFile
    where fname' = if fname == "README" then fname else "data" </> fname
#endif

checkExistence :: FilePath -> IO FilePath
checkExistence fn = do
  exists <- doesFileExist fn
  if exists
     then return fn
     else err 97 ("Could not find data file " ++ fn)

-- | Read file from specified user data directory or, if not found there, from
-- Cabal data directory.
readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString
readDataFile Nothing fname = readDefaultDataFile fname
readDataFile (Just userDir) fname = do
  exists <- doesFileExist (userDir </> fname)
  if exists
     then BS.readFile (userDir </> fname)
     else readDefaultDataFile fname

-- | Same as 'readDataFile' but returns a String instead of a ByteString.
readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
readDataFileUTF8 userDir fname =
  UTF8.toString `fmap` readDataFile userDir fname

-- | Fetch an image or other item from the local filesystem or the net.
-- Returns raw content and maybe mime type.
fetchItem :: Maybe String -> String
          -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
fetchItem sourceURL s =
  case (sourceURL >>= parseURIReference . ensureEscaped, ensureEscaped s) of
       (_, s') | isURI s'  -> openURL s'
       (Just u, s') -> -- try fetching from relative path at source
          case parseURIReference s' of
               Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
               Nothing -> openURL s' -- will throw error
       (Nothing, _) -> E.try readLocalFile -- get from local file system
  where readLocalFile = do
          cont <- BS.readFile fp
          return (cont, mime)
        dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
        fp = unEscapeString $ dropFragmentAndQuery s
        mime = case takeExtension fp of
                    ".gz" -> getMimeType $ dropExtension fp
                    ".svgz" -> getMimeType $ dropExtension fp ++ ".svg"
                    x     -> getMimeType x
        ensureEscaped x@(_:':':'\\':_) = x -- likely windows path
        ensureEscaped x = escapeURIString isAllowedInURI x

-- | Like 'fetchItem', but also looks for items in a 'MediaBag'.
fetchItem' :: MediaBag -> Maybe String -> String
           -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
fetchItem' media sourceURL s = do
  case lookupMedia s media of
       Nothing -> fetchItem sourceURL s
       Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)

-- | Read from a URL and return raw data and maybe mime type.
openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
openURL u
  | Just u' <- stripPrefix "data:" u =
    let mime     = takeWhile (/=',') u'
        contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u'
    in  return $ Right (decodeLenient contents, Just mime)
#ifdef HTTP_CLIENT
  | otherwise = withSocketsDo $ E.try $ do
     req <- parseUrl u
     (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
     let req' = case proxy of
                     Left _   -> req
                     Right pr -> case parseUrl pr of
                                      Just r  -> addProxy (host r) (port r) req
                                      Nothing -> req
#if MIN_VERSION_http_client(0,4,18)
     resp <- newManager tlsManagerSettings >>= httpLbs req'
#else
     resp <- withManager tlsManagerSettings $ httpLbs req'
#endif
     return (BS.concat $ toChunks $ responseBody resp,
             UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
#else
  | otherwise = E.try $ getBodyAndMimeType `fmap` browse
              (do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
                  setOutHandler $ const (return ())
                  setAllowRedirects True
                  request (getRequest' u'))
  where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r)
        getRequest' uriString = case parseURI uriString of
                                   Nothing -> error ("Not a valid URL: " ++
                                                        uriString)
                                   Just v  -> mkRequest GET v
        u' = escapeURIString (/= '|') u  -- pipes are rejected by Network.URI
#endif

--
-- Error reporting
--

err :: Int -> String -> IO a
err exitCode msg = do
  name <- getProgName
  UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
  exitWith $ ExitFailure exitCode
  return undefined

warn :: String -> IO ()
warn msg = do
  name <- getProgName
  UTF8.hPutStrLn stderr $ name ++ ": " ++ msg

mapLeft :: (a -> b) -> Either a c -> Either b c
mapLeft f (Left x) = Left (f x)
mapLeft _ (Right x) = Right x

hush :: Either a b -> Maybe b
hush (Left _) = Nothing
hush (Right x) = Just x

-- | Remove intermediate "." and ".." directories from a path.
--
-- > collapseFilePath "./foo" == "foo"
-- > collapseFilePath "/bar/../baz" == "/baz"
-- > collapseFilePath "/../baz" == "/../baz"
-- > collapseFilePath "parent/foo/baz/../bar" ==  "parent/foo/bar"
-- > collapseFilePath "parent/foo/baz/../../bar" ==  "parent/bar"
-- > collapseFilePath "parent/foo/.." ==  "parent"
-- > collapseFilePath "/parent/foo/../../bar" ==  "/bar"
collapseFilePath :: FilePath -> FilePath
collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
  where
    go rs "." = rs
    go r@(p:rs) ".." = case p of
                            ".." -> ("..":r)
                            (checkPathSeperator -> Just True) -> ("..":r)
                            _ -> rs
    go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
    go rs x = x:rs
    isSingleton [] = Nothing
    isSingleton [x] = Just x
    isSingleton _ = Nothing
    checkPathSeperator = fmap isPathSeparator . isSingleton

--
-- Safe read
--

safeRead :: (MonadPlus m, Read a) => String -> m a
safeRead s = case reads s of
                  (d,x):_
                    | all isSpace x -> return d
                  _                 -> mzero

--
-- Temp directory
--

withTempDir :: String -> (FilePath -> IO a) -> IO a
withTempDir =
#ifdef _WINDOWS
  withTempDirectory "."
#else
  withSystemTempDirectory
#endif
-												Generalized type of stringify.

											
										
										
											2013-08-28 08:43:51 -07:00
+								{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
-												Shared: Make collapseFilePath OS-agnostic

											
										
										
											2014-09-25 12:42:53 +01:00
+								    FlexibleContexts, ScopedTypeVariables, PatternGuards,
 								    ViewPatterns #-}
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								{-
-												Updated copyright notices to -2015. Closes #2111.

											
										
										
											2015-04-26 10:18:29 -07:00
+								Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								This program is free software; you can redistribute it and/or modify
 								it under the terms of the GNU General Public License as published by
 								the Free Software Foundation; either version 2 of the License, or
 								(at your option) any later version.
 								This program is distributed in the hope that it will be useful,
 								but WITHOUT ANY WARRANTY; without even the implied warranty of
 								MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 								GNU General Public License for more details.
 								You should have received a copy of the GNU General Public License
 								along with this program; if not, write to the Free Software
 								Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 								-}
 								{- |
 								   Module      : Text.Pandoc.Shared
-												Updated copyright notices to -2015. Closes #2111.

											
										
										
											2015-04-26 10:18:29 -07:00
+								   Copyright   : Copyright (C) 2006-2015 John MacFarlane
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								   License     : GNU GPL, version 2 or above
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								   Maintainer  : John MacFarlane <jgm@berkeley.edu>
 								   Stability   : alpha
 								   Portability : portable
 								Utility functions and definitions used by the various Pandoc modules.
 								-}
-												Shared: enamed stringToURI -> escapeURI.

											
										
										
											2010-03-23 15:05:33 -07:00
+								module Text.Pandoc.Shared (
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     -- * List processing
 								                     splitBy,
 								                     splitByIndices,
-												Shared: Added splitStringWithIndices.

This is like splitWithIndices, but it is sensitive to distinctions
between wide, combining, and regular characters.

											
										
										
											2012-01-27 00:37:46 -08:00
+								                     splitStringByIndices,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     substitute,
-												Shared:  Added ordNub.

API change (adds export).

											
										
										
											2014-06-03 11:00:54 -07:00
+								                     ordNub,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     -- * Text processing
 								                     backslashEscapes,
 								                     escapeStringUsing,
 								                     stripTrailingNewlines,
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								                     trim,
 								                     triml,
 								                     trimr,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     stripFirstAndLast,
 								                     camelCaseToHyphenated,
 								                     toRomanNumeral,
-												Shared: enamed stringToURI -> escapeURI.

											
										
										
											2010-03-23 15:05:33 -07:00
+								                     escapeURI,
-												Changed order of functions in Shared.

											
										
										
											2010-07-06 23:17:06 -07:00
+								                     tabFilter,
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
+								                     -- * Date/time
 								                     normalizeDate,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     -- * Pandoc block and inline list processing
 								                     orderedListMarkers,
 								                     normalizeSpaces,
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								                     extractSpaces,
-												Added 'normalize' to Text.Pandoc.Shared.

											
										
										
											2010-12-14 20:04:37 -08:00
+								                     normalize,
-												Rewrote normalize for efficiency. (Closes #1385.)

* Added normalizeInlines, normalizeBlocks.
* Type signature is now more narrow, `Pandoc -> Pandoc` instead of
  `Data a :: a -> a`.  Some users may need to change their uses of
  `normalize` to the newly exported `normalizeInlines` or
  `normalizeBlocks`.

											
										
										
											2014-06-29 23:03:12 -07:00
+								                     normalizeInlines,
 								                     normalizeBlocks,
-												Shared:  Added removeFormatting.

API change (addition of exported function).

											
										
										
											2014-07-13 10:13:22 -07:00
+								                     removeFormatting,
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
+								                     stringify,
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								                     capitalize,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     compactify,
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
+								                     compactify',
-												Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.

											
										
										
											2014-04-19 14:48:35 +02:00
+								                     compactify'DL,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     Element (..),
 								                     hierarchicalize,
-												Shared: Export uniqueIdent, don't allow tilde in identifier.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1894 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2010-03-16 06:45:52 +00:00
+								                     uniqueIdent,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     isHeaderBlock,
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
+								                     headerShift,
-												Shared: export isTightList.

											
										
										
											2013-01-07 20:12:05 -08:00
+								                     isTightList,
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								                     addMetaField,
 								                     makeMeta,
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
+								                     -- * TagSoup HTML handling
 								                     renderTags',
-												Added 'odt' output option to pandoc:
Not a writer, but a module that inserts the output of the OpenDocument
writer into an ODT archive.  This replaces markdown2odt.

+ Added odt output option to Main.hs.
+ Added default for .odt output file.
+ Changed defaults so that .xml and .sgml aren't automatically DocBook.
+ Added odt writer to Text.Pandoc exports.
+ Added Text.Pandoc.ODT and included in pandoc.cabal.
+ Added reference.odt as data-file in pandoc.cabal.
+ Handle picture links in OpenDocument files using xml library.
+ Removed markdown2odt and references from Makefile, README, man.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1345 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-07-31 23:16:02 +00:00
+								                     -- * File handling
-												Removed TH module; refactored LaTeXMathML not to use TH.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1692 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-12-31 01:11:23 +00:00
+								                     inDirectory,
-												Let reference.docx/odt behave as if they are virtual data files.

Now they are constructed on the fly from their components,
but we now allow them to be printed with `--print-default-data-file`
and to override the defaults if placed in the user data directory.

Shared now exports getDefaultReferenceDocx and getDefaultReferenceODT
(API change).

These functions have been removed from the Docx and ODT writers.

Shared.readDataFile has been modified so that requests to read
a reference.odt or reference.docx will use these functions to
generate the files.

											
										
										
											2015-06-28 22:30:21 -07:00
+								                     getDefaultReferenceDocx,
 								                     getDefaultReferenceODT,
-												Added getMimeType to Text.Pandoc.Shared.

											
										
										
											2011-07-17 19:33:52 -07:00
+								                     readDataFile,
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								                     readDataFileUTF8,
-												Refactoring:

* Shared now exports fetchItem (instead of getItem) and openURL
* fetchItem has different parameters than getItem and includes
  some logic formerly in the ODT and Docx writers
* getItem still used in SelfContained

											
										
										
											2013-01-11 16:19:06 -08:00
+								                     fetchItem,
-												Shared:  Added fetchItem', which searches a media bag too.

											
										
										
											2014-07-30 13:47:07 -07:00
+								                     fetchItem',
-												Refactoring:

* Shared now exports fetchItem (instead of getItem) and openURL
* fetchItem has different parameters than getItem and includes
  some logic formerly in the ODT and Docx writers
* getItem still used in SelfContained

											
										
										
											2013-01-11 16:19:06 -08:00
+								                     openURL,
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								                     collapseFilePath,
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
+								                     -- * Error handling
 								                     err,
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								                     warn,
-												Move utility error functions to Text.Pandoc.Shared

											
										
										
											2015-02-18 21:05:47 +00:00
+								                     mapLeft,
 								                     hush,
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								                     -- * Safe read
-												Moved withTempDir from PDF to Shared, export from Shared.

API change.

											
										
										
											2014-07-30 12:29:04 -07:00
+								                     safeRead,
 								                     -- * Temp directory
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								                     withTempDir,
 								                     -- * Version
 								                     pandocVersion
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                    ) where
 								import Text.Pandoc.Definition
-												Use query instead of queryWith.

											
										
										
											2013-08-10 18:13:38 -07:00
+								import Text.Pandoc.Walk
-												New module, Text.Pandoc.MediaBag.

Moved `MediaBag` definition and functions from Shared:
`lookupMedia`, `mediaDirectory`, `insertMedia`, `extractMediaBag`.
Removed `emptyMediaBag`; use `mempty` instead, since `MediaBag`
is a Monoid.

											
										
										
											2014-07-31 12:00:21 -07:00
+								import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
-												Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.

											
										
										
											2014-04-19 14:48:35 +02:00
+								import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
+								import qualified Text.Pandoc.Builder as B
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
+								import qualified Text.Pandoc.UTF8 as UTF8
 								import System.Environment (getProgName)
 								import System.Exit (exitWith, ExitCode(..))
-												Shared: Removed unescapeURI, modified escapeURI.

escapeURI now only escapes space characters, leaving unicode characters
as they are, instead of converting them to octets and URL-encoding them,
as before.  This gives more readable URIs.  User agents now do the
percent-encoding themselves.

URIs are no longer unescaped at all on conversion to markdown, asciidoc,
rst, org.

Closes #349.

											
										
										
											2011-12-02 19:39:30 -08:00
+								import Data.Char ( toLower, isLower, isUpper, isAlpha,
 								                   isLetter, isDigit, isSpace )
-												Use `stripPrefix` where appropriate.

											
										
										
											2014-08-03 14:44:39 +04:00
+								import Data.List ( find, stripPrefix, intercalate )
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								import Data.Version ( showVersion )
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								import qualified Data.Map as M
-												fetchItem: Handle image URLs beginning with '//'.

											
										
										
											2014-01-08 12:04:08 -08:00
+								import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
-												Shared: fetchItem improvements.

* More consistent logic:  absolute URIs are fetched from the net;
  other things are treated as relative URIs if sourceURL is a Just,
  otherwise as file paths.
* We escape characters that are not allowed in URIs before trying
  to parse them (e.g. '|', which often occurs in the wild).
* When treating relative paths as local file paths, we drop
  any fragment or query.  This is useful e.g. when you've downloaded
  web fonts locally, but your source still contains the original
  relative URLs.

Together with the previous commit, this should close #1477.

											
										
										
											2014-08-02 16:09:17 -07:00
+								                     unEscapeString, parseURIReference, isAllowedInURI )
-												Shared:  Added ordNub.

API change (adds export).

											
										
										
											2014-06-03 11:00:54 -07:00
+								import qualified Data.Set as Set
-												Added 'odt' output option to pandoc:
Not a writer, but a module that inserts the output of the OpenDocument
writer into an ODT archive.  This replaces markdown2odt.

+ Added odt output option to Main.hs.
+ Added default for .odt output file.
+ Changed defaults so that .xml and .sgml aren't automatically DocBook.
+ Added odt writer to Text.Pandoc exports.
+ Added Text.Pandoc.ODT and included in pandoc.cabal.
+ Added reference.odt as data-file in pandoc.cabal.
+ Handle picture links in OpenDocument files using xml library.
+ Removed markdown2odt and references from Makefile, README, man.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1345 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-07-31 23:16:02 +00:00
+								import System.Directory
-												Removed unneeded imports.

											
										
										
											2015-09-26 22:56:13 -07:00
+								import System.FilePath (splitDirectories, isPathSeparator)
-												MediaBag:  ensure that / is always used as path separator.

											
										
										
											2015-09-26 22:40:58 -07:00
+								import qualified System.FilePath.Posix as Posix
-												MIME cleanup.

  * Create a type synonym for MIME type (instead of `String`).
  * Add `getMimeTypeDef` function.
  * Avoid recreating MIME type `Map`s every time.
  * Move “Formula-...” case handling into `getMimeType`.

											
										
										
											2014-08-17 20:42:30 +04:00
+								import Text.Pandoc.MIME (MimeType, getMimeType)
-												New module, Text.Pandoc.MediaBag.

Moved `MediaBag` definition and functions from Shared:
`lookupMedia`, `mediaDirectory`, `insertMedia`, `extractMediaBag`.
Removed `emptyMediaBag`; use `mempty` instead, since `MediaBag`
is a Monoid.

											
										
										
											2014-07-31 12:00:21 -07:00
+								import System.FilePath ( (</>), takeExtension, dropExtension)
-												Use functions from Text.Pandoc.Generic instead of processWith(M).

											
										
										
											2010-12-24 13:39:27 -08:00
+								import Data.Generics (Typeable, Data)
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								import qualified Control.Monad.State as S
-												Improved fetching of external resources.

* In Shared, openURL and fetchItem now return an Either, for
  better error handling. (API change.)
* Better error message when fetching a URL fails with
  `--self-contained`.
* EPUB writer: If resource not found, skip it, as in Docx writer.
* Closes #916.

											
										
										
											2013-07-18 20:58:14 -07:00
+								import qualified Control.Exception as E
-												Added a needed import in Shared.

											
										
										
											2015-06-28 23:43:17 -07:00
+								import Control.Applicative ((<$>))
-												Make safeRead safe.

Fixes #1801

											
										
										
											2015-02-18 18:40:36 +00:00
+								import Control.Monad (msum, unless, MonadPlus(..))
-												Shared: Added splitStringWithIndices.

This is like splitWithIndices, but it is sensitive to distinctions
between wide, combining, and regular characters.

											
										
										
											2012-01-27 00:37:46 -08:00
+								import Text.Pandoc.Pretty (charWidth)
-												Added Text.Pandoc.Compat.Locale to assist with transition to time 1.5.

											
										
										
											2014-12-19 16:13:38 -08:00
+								import Text.Pandoc.Compat.Locale (defaultTimeLocale)
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
+								import Data.Time
-												Let reference.docx/odt behave as if they are virtual data files.

Now they are constructed on the fly from their components,
but we now allow them to be printed with `--print-default-data-file`
and to override the defaults if placed in the user data directory.

Shared now exports getDefaultReferenceDocx and getDefaultReferenceODT
(API change).

These functions have been removed from the Docx and ODT writers.

Shared.readDataFile has been modified so that requests to read
a reference.odt or reference.docx will use these functions to
generate the files.

											
										
										
											2015-06-28 22:30:21 -07:00
+								import Data.Time.Clock.POSIX
-												Revert "More intelligent handling of text encodings."

This reverts commit 7272735b3d413a644fd9ab01eeae8ae9cd5a925b.

											
										
										
											2012-09-23 22:53:34 -07:00
+								import System.IO (stderr)
-												Moved withTempDir from PDF to Shared, export from Shared.

API change.

											
										
										
											2014-07-30 12:29:04 -07:00
+								import System.IO.Temp
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
+								import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
 								         renderOptions)
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								import qualified Data.ByteString as BS
-												Shared.openURL:  Properly handle data: URIs.

											
										
										
											2013-05-28 12:48:17 -07:00
+								import qualified Data.ByteString.Char8 as B8
-												Use query instead of queryWith.

											
										
										
											2013-08-10 18:13:38 -07:00
+								import Text.Pandoc.Compat.Monoid
-												Shared: Fixed bug in openURL with data: URIs.

Previously the base-64 encoded bytestring was returned.
We now decode it so it's a proper image!

This should fix parsing of data: URLs.

											
										
										
											2013-11-19 13:15:24 -08:00
+								import Data.ByteString.Base64 (decodeLenient)
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								import qualified Data.Text as T (toUpper, pack, unpack)
-												Let reference.docx/odt behave as if they are virtual data files.

Now they are constructed on the fly from their components,
but we now allow them to be printed with `--print-default-data-file`
and to override the defaults if placed in the user data directory.

Shared now exports getDefaultReferenceDocx and getDefaultReferenceODT
(API change).

These functions have been removed from the Docx and ODT writers.

Shared.readDataFile has been modified so that requests to read
a reference.odt or reference.docx will use these functions to
generate the files.

											
										
										
											2015-06-28 22:30:21 -07:00
+								import Data.ByteString.Lazy (toChunks, fromChunks)
 								import qualified Data.ByteString.Lazy as BL
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								import Paths_pandoc (version)
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
-												fix build failure with --flags=-https

The issue was originally reported by CasperVector as
    https://github.com/gentoo-haskell/gentoo-haskell/issues/427

Mainfests itself as a builg failure full of missing zip-archive
names:

    src/Text/Pandoc/Shared.hs:756:49:
        Not in scope: type constructor or class ‘Archive’
    src/Text/Pandoc/Shared.hs:777:38: Not in scope: ‘toEntry’
    src/Text/Pandoc/Shared.hs:786:19:
        Not in scope: ‘toArchive’
        Perhaps you meant ‘mbArchive’ (line 778)

Included Codec.Archive.Zip unconditionally.

Signed-off-by: Sergei Trofimovich <siarheit@google.com>

											
										
										
											2015-07-30 22:39:25 +01:00
+								import Codec.Archive.Zip
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								#ifdef EMBED_DATA_FILES
-												Minor fixes to previous commit.

* Instead of defining readmeFile in Text.Pandoc.Data (which we forgot
  to export anyway), we simply add a record for "README" to the
  `dataFiles` lookup table.  This allows simplifying some of the code
  for `readDefaultDataFile` in SHared.

* As a bonus, `pandoc --print-default-data-file README` now works.

											
										
										
											2015-06-28 20:59:18 -07:00
+								import Text.Pandoc.Data (dataFiles)
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								#else
 								import Paths_pandoc (getDataFileName)
 								#endif
-												Removed dependency on conduit.

* http-conduit flag is now https.
* Instead of http-conduit, we depend on http-client and http-client-tls.

											
										
										
											2014-05-18 22:04:39 -07:00
+								#ifdef HTTP_CLIENT
-												Use newManager instead of withManager in recent http-client.

This avoids a deprecation warning.

											
										
										
											2015-07-21 16:32:44 -07:00
+								import Network.HTTP.Client (httpLbs, parseUrl,
-												Removed dependency on conduit.

* http-conduit flag is now https.
* Instead of http-conduit, we depend on http-client and http-client-tls.

											
										
										
											2014-05-18 22:04:39 -07:00
+								                            responseBody, responseHeaders,
 								                            Request(port,host))
-												Use newManager instead of withManager in recent http-client.

This avoids a deprecation warning.

											
										
										
											2015-07-21 16:32:44 -07:00
+								#if MIN_VERSION_http_client(0,4,18)
 								import Network.HTTP.Client (newManager)
 								#else
 								import Network.HTTP.Client (withManager)
 								#endif
-												Removed dependency on conduit.

* http-conduit flag is now https.
* Instead of http-conduit, we depend on http-client and http-client-tls.

											
										
										
											2014-05-18 22:04:39 -07:00
+								import Network.HTTP.Client.Internal (addProxy)
 								import Network.HTTP.Client.TLS (tlsManagerSettings)
-												Shared.openURL:  Set proxy with value of http_proxy env variable.

Note:  proxies with non-root paths are not supported,
because of limitations in http-conduit.

Closes #1211.

											
										
										
											2014-04-05 10:58:32 -07:00
+								import System.Environment (getEnv)
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								import Network.HTTP.Types.Header ( hContentType)
-												Added `withSocketsDo` around http conduit code in `openURL`.

This should address #1080, but further testing on Windows is needed
before we can close the bug.

											
										
										
											2013-12-09 22:35:22 -08:00
+								import Network (withSocketsDo)
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								#else
 								import Network.URI (parseURI)
 								import Network.HTTP (findHeader, rspBody,
 								                     RequestMethod(..), HeaderName(..), mkRequest)
 								import Network.Browser (browse, setAllowRedirects, setOutHandler, request)
 								#endif
-												Fixed treatment of unicode characters in URIs.

* Added stringToURI to Shared.  This is used in the HTML
  writer for all URIs.  It properly URI-encodes high
  characters (> 127), leaving everything else (including
  symbols and spaces) the same.

* Modified unsanitaryURI to allow UTF8 characters in a URI.
  (First, we convert the URI to URI-encoded octets, then we
  pass through parseURIReference.)
  This resolves gitit Issue #99. Previously
  '[abc](http://gitit.net/测试)' would not be rendered as
  a link when --sanitize was selected.

											
										
										
											2010-03-22 19:29:37 -07:00
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								-- | Version number of pandoc library.
 								pandocVersion :: String
 								pandocVersion = showVersion version
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								--
 								-- List processing
 								--
 								-- | Split list by groups of one or more sep.
-												Shared: Made splitBy take a test instead of an element.

											
										
										
											2010-12-21 08:41:24 -08:00
+								splitBy :: (a -> Bool) -> [a] -> [[a]]
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								splitBy _ [] = []
-												Shared: Made splitBy take a test instead of an element.

											
										
										
											2010-12-21 08:41:24 -08:00
+								splitBy isSep lst =
 								  let (first, rest) = break isSep lst
 								      rest'         = dropWhile isSep rest
 								  in  first:(splitBy isSep rest')
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								splitByIndices :: [Int] -> [a] -> [[a]]
 								splitByIndices [] lst = [lst]
-												Shared: Added splitStringWithIndices.

This is like splitWithIndices, but it is sensitive to distinctions
between wide, combining, and regular characters.

											
										
										
											2012-01-27 00:37:46 -08:00
+								splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x)  xs) rest)
 								  where (first, rest) = splitAt x lst
 								-- | Split string into chunks divided at specified indices.
 								splitStringByIndices :: [Int] -> [Char] -> [[Char]]
 								splitStringByIndices [] lst = [lst]
 								splitStringByIndices (x:xs) lst =
 								  let (first, rest) = splitAt' x lst in
 								  first : (splitStringByIndices (map (\y -> y - x) xs) rest)
 								splitAt' :: Int -> [Char] -> ([Char],[Char])
 								splitAt' _ []          = ([],[])
 								splitAt' n xs | n <= 0 = ([],xs)
 								splitAt' n (x:xs)      = (x:ys,zs)
 								  where (ys,zs) = splitAt' (n - charWidth x) xs
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Replace each occurrence of one sublist in a list with another.
 								substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
 								substitute _ _ [] = []
-												Slight code cleanup on substitute function.

											
										
										
											2010-07-11 12:22:18 -07:00
+								substitute [] _ xs = xs
 								substitute target replacement lst@(x:xs) =
-												Use `stripPrefix` where appropriate.

											
										
										
											2014-08-03 14:44:39 +04:00
+								    case stripPrefix target lst of
 								      Just lst' -> replacement ++ substitute target replacement lst'
 								      Nothing   -> x : substitute target replacement xs
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
-												Shared:  Added ordNub.

API change (adds export).

											
										
										
											2014-06-03 11:00:54 -07:00
+								ordNub :: (Ord a) => [a] -> [a]
 								ordNub l = go Set.empty l
 								  where
 								    go _ [] = []
 								    go s (x:xs) = if x `Set.member` s then go s xs
 								                                      else x : go (Set.insert x s) xs
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								--
 								-- Text processing
 								--
 								-- | Returns an association list of backslash escapes for the
 								-- designated characters.
 								backslashEscapes :: [Char]    -- ^ list of special characters to escape
 								                 -> [(Char, String)]
 								backslashEscapes = map (\ch -> (ch, ['\\',ch]))
 								-- | Escape a string of characters, using an association list of
 								-- characters and strings.
 								escapeStringUsing :: [(Char, String)] -> String -> String
 								escapeStringUsing _ [] = ""
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								escapeStringUsing escapeTable (x:xs) =
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								  case (lookup x escapeTable) of
 								       Just str  -> str ++ rest
 								       Nothing   -> x:rest
 								  where rest = escapeStringUsing escapeTable xs
 								-- | Strip trailing newlines from string.
 								stripTrailingNewlines :: String -> String
 								stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
 								-- | Remove leading and trailing space (including newlines) from string.
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								trim :: String -> String
 								trim = triml . trimr
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Remove leading space (including newlines) from string.
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								triml :: String -> String
 								triml = dropWhile (`elem` " \r\n\t")
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Remove trailing space (including newlines) from string.
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								trimr :: String -> String
 								trimr = reverse . triml . reverse
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Strip leading and trailing characters from string
 								stripFirstAndLast :: String -> String
 								stripFirstAndLast str =
 								  drop 1 $ take ((length str) - 1) str
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								camelCaseToHyphenated :: String -> String
 								camelCaseToHyphenated [] = ""
 								camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
 								  a:'-':(toLower b):(camelCaseToHyphenated rest)
 								camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
 								-- | Convert number < 4000 to uppercase roman numeral.
 								toRomanNumeral :: Int -> String
 								toRomanNumeral x =
 								  if x >= 4000 || x < 0
 								     then "?"
 								     else case x of
 								              _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
 								              _ | x >= 900  -> "CM" ++ toRomanNumeral (x - 900)
 								              _ | x >= 500  -> "D" ++ toRomanNumeral (x - 500)
 								              _ | x >= 400  -> "CD" ++ toRomanNumeral (x - 400)
 								              _ | x >= 100  -> "C" ++ toRomanNumeral (x - 100)
 								              _ | x >= 90   -> "XC" ++ toRomanNumeral (x - 90)
 								              _ | x >= 50   -> "L"  ++ toRomanNumeral (x - 50)
 								              _ | x >= 40   -> "XL" ++ toRomanNumeral (x - 40)
 								              _ | x >= 10   -> "X" ++ toRomanNumeral (x - 10)
-												Shared: Fixed bug in toRomanNumeral.

9 and numbers ending in 9 would end with "IXIV."
Thanks to Jesse Rosenthal.  Closes #1249.

											
										
										
											2014-04-15 19:53:11 -07:00
+								              _ | x == 9    -> "IX"
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								              _ | x >= 5    -> "V" ++ toRomanNumeral (x - 5)
-												Shared: Fixed bug in toRomanNumeral.

9 and numbers ending in 9 would end with "IXIV."
Thanks to Jesse Rosenthal.  Closes #1249.

											
										
										
											2014-04-15 19:53:11 -07:00
+								              _ | x == 4    -> "IV"
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								              _ | x >= 1    -> "I" ++ toRomanNumeral (x - 1)
 								              _             -> ""
-												Shared: Removed unescapeURI, modified escapeURI.

escapeURI now only escapes space characters, leaving unicode characters
as they are, instead of converting them to octets and URL-encoding them,
as before.  This gives more readable URIs.  User agents now do the
percent-encoding themselves.

URIs are no longer unescaped at all on conversion to markdown, asciidoc,
rst, org.

Closes #349.

											
										
										
											2011-12-02 19:39:30 -08:00
+								-- | Escape whitespace in URI.
-												Shared: enamed stringToURI -> escapeURI.

											
										
										
											2010-03-23 15:05:33 -07:00
+								escapeURI :: String -> String
-												Shared: Removed unescapeURI, modified escapeURI.

escapeURI now only escapes space characters, leaving unicode characters
as they are, instead of converting them to octets and URL-encoding them,
as before.  This gives more readable URIs.  User agents now do the
percent-encoding themselves.

URIs are no longer unescaped at all on conversion to markdown, asciidoc,
rst, org.

Closes #349.

											
										
										
											2011-12-02 19:39:30 -08:00
+								escapeURI = escapeURIString (not . isSpace)
-												Shared: Export  unescapeURI.

											
										
										
											2010-03-23 15:34:53 -07:00
-												Changed order of functions in Shared.

											
										
										
											2010-07-06 23:17:06 -07:00
+								-- | Convert tabs to spaces and filter out DOS line endings.
 								-- Tabs will be preserved if tab stop is set to 0.
 								tabFilter :: Int       -- ^ Tab stop
 								          -> String    -- ^ Input
 								          -> String
 								tabFilter tabStop =
 								  let go _ [] = ""
 								      go _ ('\n':xs) = '\n' : go tabStop xs
 								      go _ ('\r':'\n':xs) = '\n' : go tabStop xs
 								      go _ ('\r':xs) = '\n' : go tabStop xs
 								      go spsToNextStop ('\t':xs) =
 								        if tabStop == 0
 								           then '\t' : go tabStop xs
 								           else replicate spsToNextStop ' ' ++ go tabStop xs
 								      go 1 (x:xs) =
 								        x : go tabStop xs
 								      go spsToNextStop (x:xs) =
 								        x : go (spsToNextStop - 1) xs
 								  in  go tabStop
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
+								--
 								-- Date/time
 								--
 								-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format.
 								normalizeDate :: String -> Maybe String
 								normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
 								  (msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day)
 								   where parsetimeWith = parseTime defaultTimeLocale
 								         formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
-												normalizeDate: Allow dates with year only (%Y)

											
										
										
											2013-12-01 10:19:08 +02:00
+								                    "%d %B %Y", "%b. %d, %Y", "%B %d, %Y", "%Y"]
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								--
 								-- Pandoc block and inline list processing
 								--
 								-- | Generate infinite lazy list of markers for an ordered list,
 								-- depending on list attributes.
 								orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								orderedListMarkers (start, numstyle, numdelim) =
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								  let singleton c = [c]
 								      nums = case numstyle of
 								                     DefaultStyle -> map show [start..]
-												Merge branch 'atlists'.  Added auto-numbered example lists.

											
										
										
											2010-07-11 22:47:52 -07:00
+								                     Example      -> map show [start..]
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     Decimal      -> map show [start..]
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								                     UpperAlpha   -> drop (start - 1) $ cycle $
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                                     map singleton ['A'..'Z']
 								                     LowerAlpha   -> drop (start - 1) $ cycle $
 								                                     map singleton ['a'..'z']
 								                     UpperRoman   -> map toRomanNumeral [start..]
 								                     LowerRoman   -> map (map toLower . toRomanNumeral) [start..]
 								      inDelim str = case numdelim of
 								                            DefaultDelim -> str ++ "."
 								                            Period       -> str ++ "."
 								                            OneParen     -> str ++ ")"
 								                            TwoParens    -> "(" ++ str ++ ")"
 								  in  map inDelim nums
 								-- | Normalize a list of inline elements: remove leading and trailing
 								-- @Space@ elements, collapse double @Space@s into singles, and
 								-- remove empty Str elements.
 								normalizeSpaces :: [Inline] -> [Inline]
-												Rewrote normalizeSpaces (mostly aesthetic reasons).

											
										
										
											2010-12-07 20:10:21 -08:00
+								normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
-												Slightly more efficient normalizeSpaces.

											
										
										
											2012-07-24 22:12:18 -07:00
+								 where  cleanup []              = []
 								        cleanup (Space:rest)    = case dropWhile isSpaceOrEmpty rest of
 								                                        []     -> []
 								                                        (x:xs) -> Space : x : cleanup xs
-												Rewrote normalizeSpaces (mostly aesthetic reasons).

											
										
										
											2010-12-07 20:10:21 -08:00
+								        cleanup ((Str ""):rest) = cleanup rest
-												Slightly more efficient normalizeSpaces.

											
										
										
											2012-07-24 22:12:18 -07:00
+								        cleanup (x:rest)        = x : cleanup rest
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
-												normalize:  Normalize spaces too.

In normal form, Space elements only occur to separate two non-Space
elements.  So, we never have [Space], or [, ..., Space].

											
										
										
											2011-02-04 13:22:31 -08:00
+								isSpaceOrEmpty :: Inline -> Bool
 								isSpaceOrEmpty Space = True
 								isSpaceOrEmpty (Str "") = True
 								isSpaceOrEmpty _ = False
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								-- | Extract the leading and trailing spaces from inside an inline element
-												Removed space at ends of lines in source.

											
										
										
											2014-07-12 22:57:22 -07:00
+								-- and place them outside the element.
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
 								extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
-												Removed space at ends of lines in source.

											
										
										
											2014-07-12 22:57:22 -07:00
+								extractSpaces f is =
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								  let contents = B.unMany is
 								      left  = case viewl contents of
 								                    (Space :< _) -> B.space
 								                    _            -> mempty
 								      right = case viewr contents of
 								                    (_ :> Space) -> B.space
 								                    _            -> mempty in
 								  (left <> f (B.trimInlines . B.Many $ contents) <> right)
-												Added 'normalize' to Text.Pandoc.Shared.

											
										
										
											2010-12-14 20:04:37 -08:00
+								-- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
 								-- combining adjacent 'Str's and 'Emph's, remove 'Null's and
 								-- empty elements, etc.
-												Rewrote normalize for efficiency. (Closes #1385.)

* Added normalizeInlines, normalizeBlocks.
* Type signature is now more narrow, `Pandoc -> Pandoc` instead of
  `Data a :: a -> a`.  Some users may need to change their uses of
  `normalize` to the newly exported `normalizeInlines` or
  `normalizeBlocks`.

											
										
										
											2014-06-29 23:03:12 -07:00
+								normalize :: Pandoc -> Pandoc
 								normalize (Pandoc (Meta meta) blocks) =
 								  Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks)
 								  where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs
 								        go (MetaBlocks xs)  = MetaBlocks  $ normalizeBlocks xs
 								        go (MetaList ms)    = MetaList $ map go ms
 								        go (MetaMap m)      = MetaMap $ M.map go m
 								        go x                = x
 								normalizeBlocks :: [Block] -> [Block]
 								normalizeBlocks (Null : xs) = normalizeBlocks xs
 								normalizeBlocks (Div attr bs : xs) =
 								  Div attr (normalizeBlocks bs) : normalizeBlocks xs
 								normalizeBlocks (BlockQuote bs : xs) =
 								  case normalizeBlocks bs of
 								       []    -> normalizeBlocks xs
 								       bs'   -> BlockQuote bs' : normalizeBlocks xs
 								normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs
 								normalizeBlocks (BulletList items : xs) =
 								  BulletList (map normalizeBlocks items) : normalizeBlocks xs
 								normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs
 								normalizeBlocks (OrderedList attr items : xs) =
 								  OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs
 								normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs
 								normalizeBlocks (DefinitionList items : xs) =
 								  DefinitionList (map go items) : normalizeBlocks xs
 								  where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs)
 								normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs
-												normalize:  consolidate adjacent RawBlocks when possible.

											
										
										
											2014-07-07 15:39:59 -06:00
+								normalizeBlocks (RawBlock f x : xs) =
 								   case normalizeBlocks xs of
 								        (RawBlock f' x' : rest) | f' == f ->
 								          RawBlock f (x ++ ('\n':x')) : rest
 								        rest -> RawBlock f x : rest
-												Rewrote normalize for efficiency. (Closes #1385.)

* Added normalizeInlines, normalizeBlocks.
* Type signature is now more narrow, `Pandoc -> Pandoc` instead of
  `Data a :: a -> a`.  Some users may need to change their uses of
  `normalize` to the newly exported `normalizeInlines` or
  `normalizeBlocks`.

											
										
										
											2014-06-29 23:03:12 -07:00
+								normalizeBlocks (Para ils : xs) =
 								  case normalizeInlines ils of
 								       []   -> normalizeBlocks xs
 								       ils' -> Para ils' : normalizeBlocks xs
 								normalizeBlocks (Plain ils : xs) =
 								  case normalizeInlines ils of
 								       []   -> normalizeBlocks xs
 								       ils' -> Plain ils' : normalizeBlocks xs
 								normalizeBlocks (Header lev attr ils : xs) =
 								  Header lev attr (normalizeInlines ils) : normalizeBlocks xs
 								normalizeBlocks (Table capt aligns widths hdrs rows : xs) =
 								  Table (normalizeInlines capt) aligns widths
 								    (map normalizeBlocks hdrs) (map (map normalizeBlocks) rows)
 								  : normalizeBlocks xs
 								normalizeBlocks (x:xs) = x : normalizeBlocks xs
 								normalizeBlocks [] = []
 								normalizeInlines :: [Inline] -> [Inline]
 								normalizeInlines (Str x : ys) =
-												Added 'normalize' to Text.Pandoc.Shared.

											
										
										
											2010-12-14 20:04:37 -08:00
+								  case concat (x : map fromStr strs) of
-												Rewrote normalize for efficiency. (Closes #1385.)

* Added normalizeInlines, normalizeBlocks.
* Type signature is now more narrow, `Pandoc -> Pandoc` instead of
  `Data a :: a -> a`.  Some users may need to change their uses of
  `normalize` to the newly exported `normalizeInlines` or
  `normalizeBlocks`.

											
										
										
											2014-06-29 23:03:12 -07:00
+								        ""     -> rest
 								        n      -> Str n : rest
-												Added 'normalize' to Text.Pandoc.Shared.

											
										
										
											2010-12-14 20:04:37 -08:00
+								   where
-												Rewrote normalize for efficiency. (Closes #1385.)

* Added normalizeInlines, normalizeBlocks.
* Type signature is now more narrow, `Pandoc -> Pandoc` instead of
  `Data a :: a -> a`.  Some users may need to change their uses of
  `normalize` to the newly exported `normalizeInlines` or
  `normalizeBlocks`.

											
										
										
											2014-06-29 23:03:12 -07:00
+								     (strs, rest)  = span isStr $ normalizeInlines ys
-												Added 'normalize' to Text.Pandoc.Shared.

											
										
										
											2010-12-14 20:04:37 -08:00
+								     isStr (Str _) = True
 								     isStr _       = False
 								     fromStr (Str z) = z
-												Rewrote normalize for efficiency. (Closes #1385.)

* Added normalizeInlines, normalizeBlocks.
* Type signature is now more narrow, `Pandoc -> Pandoc` instead of
  `Data a :: a -> a`.  Some users may need to change their uses of
  `normalize` to the newly exported `normalizeInlines` or
  `normalizeBlocks`.

											
										
										
											2014-06-29 23:03:12 -07:00
+								     fromStr _       = error "normalizeInlines - fromStr - not a Str"
 								normalizeInlines (Space : ys) =
 								  if null rest
 								     then []
 								     else Space : rest
-												Shared: Removed unescapeURI, modified escapeURI.

escapeURI now only escapes space characters, leaving unicode characters
as they are, instead of converting them to octets and URL-encoding them,
as before.  This gives more readable URIs.  User agents now do the
percent-encoding themselves.

URIs are no longer unescaped at all on conversion to markdown, asciidoc,
rst, org.

Closes #349.

											
										
										
											2011-12-02 19:39:30 -08:00
+								   where isSp Space = True
 								         isSp _     = False
-												Rewrote normalize for efficiency. (Closes #1385.)

* Added normalizeInlines, normalizeBlocks.
* Type signature is now more narrow, `Pandoc -> Pandoc` instead of
  `Data a :: a -> a`.  Some users may need to change their uses of
  `normalize` to the newly exported `normalizeInlines` or
  `normalizeBlocks`.

											
										
										
											2014-06-29 23:03:12 -07:00
+								         rest       = dropWhile isSp $ normalizeInlines ys
 								normalizeInlines (Emph xs : zs) =
 								  case normalizeInlines zs of
 								       (Emph ys : rest) -> normalizeInlines $
 								         Emph (normalizeInlines $ xs ++ ys) : rest
 								       rest -> case normalizeInlines xs of
 								                    []  -> rest
 								                    xs' -> Emph xs' : rest
 								normalizeInlines (Strong xs : zs) =
 								  case normalizeInlines zs of
 								       (Strong ys : rest) -> normalizeInlines $
 								         Strong (normalizeInlines $ xs ++ ys) : rest
 								       rest -> case normalizeInlines xs of
 								                    []  -> rest
 								                    xs' -> Strong xs' : rest
 								normalizeInlines (Subscript xs : zs) =
 								  case normalizeInlines zs of
 								       (Subscript ys : rest) -> normalizeInlines $
 								         Subscript (normalizeInlines $ xs ++ ys) : rest
 								       rest -> case normalizeInlines xs of
 								                    []  -> rest
 								                    xs' -> Subscript xs' : rest
 								normalizeInlines (Superscript xs : zs) =
 								  case normalizeInlines zs of
 								       (Superscript ys : rest) -> normalizeInlines $
 								         Superscript (normalizeInlines $ xs ++ ys) : rest
 								       rest -> case normalizeInlines xs of
 								                    []  -> rest
 								                    xs' -> Superscript xs' : rest
 								normalizeInlines (SmallCaps xs : zs) =
 								  case normalizeInlines zs of
 								       (SmallCaps ys : rest) -> normalizeInlines $
 								         SmallCaps (normalizeInlines $ xs ++ ys) : rest
 								       rest -> case normalizeInlines xs of
 								                    []  -> rest
 								                    xs' -> SmallCaps xs' : rest
 								normalizeInlines (Strikeout xs : zs) =
 								  case normalizeInlines zs of
 								       (Strikeout ys : rest) -> normalizeInlines $
 								         Strikeout (normalizeInlines $ xs ++ ys) : rest
 								       rest -> case normalizeInlines xs of
 								                    []  -> rest
 								                    xs' -> Strikeout xs' : rest
 								normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys
 								normalizeInlines (RawInline f xs : zs) =
 								  case normalizeInlines zs of
 								       (RawInline f' ys : rest) | f == f' -> normalizeInlines $
 								         RawInline f (xs ++ ys) : rest
 								       rest -> RawInline f xs : rest
 								normalizeInlines (Code _ "" : ys) = normalizeInlines ys
 								normalizeInlines (Code attr xs : zs) =
 								  case normalizeInlines zs of
 								       (Code attr' ys : rest) | attr == attr' -> normalizeInlines $
 								         Code attr (xs ++ ys) : rest
 								       rest -> Code attr xs : rest
 								-- allow empty spans, they may carry identifiers etc.
 								-- normalizeInlines (Span _ [] : ys) = normalizeInlines ys
 								normalizeInlines (Span attr xs : zs) =
 								  case normalizeInlines zs of
 								       (Span attr' ys : rest) | attr == attr' -> normalizeInlines $
 								         Span attr (normalizeInlines $ xs ++ ys) : rest
 								       rest -> Span attr (normalizeInlines xs) : rest
 								normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) :
 								  normalizeInlines ys
 								normalizeInlines (Quoted qt ils : ys) =
 								  Quoted qt (normalizeInlines ils) : normalizeInlines ys
 								normalizeInlines (Link ils t : ys) =
 								  Link (normalizeInlines ils) t : normalizeInlines ys
 								normalizeInlines (Image ils t : ys) =
 								  Image (normalizeInlines ils) t : normalizeInlines ys
 								normalizeInlines (Cite cs ils : ys) =
 								  Cite cs (normalizeInlines ils) : normalizeInlines ys
 								normalizeInlines (x : xs) = x : normalizeInlines xs
 								normalizeInlines [] = []
-												Added 'normalize' to Text.Pandoc.Shared.

											
										
										
											2010-12-14 20:04:37 -08:00
-												Better comment on removeFormatting.

											
										
										
											2014-07-13 15:10:27 -07:00
+								-- | Extract inlines, removing formatting.
-												Shared: Generalized type of removeFormatting.

											
										
										
											2014-07-13 14:56:20 -07:00
+								removeFormatting :: Walkable Inline a => a -> [Inline]
-												Shared:  Added removeFormatting.

API change (addition of exported function).

											
										
										
											2014-07-13 10:13:22 -07:00
+								removeFormatting = query go . walk deNote
 								  where go :: Inline -> [Inline]
 								        go (Str xs)     = [Str xs]
 								        go Space        = [Space]
 								        go (Code _ x)   = [Str x]
 								        go (Math _ x)   = [Str x]
 								        go LineBreak    = [Space]
 								        go _            = []
 								        deNote (Note _) = Str ""
 								        deNote x        = x
-												Generalized type of stringify.

											
										
										
											2013-08-28 08:43:51 -07:00
+								-- | Convert pandoc structure to a string with formatting removed.
-												Shared:  Changed stringify so it ignores notes.

Also documented this in README.

											
										
										
											2013-08-16 13:22:27 -07:00
+								-- Footnotes are skipped (since we don't want their contents in link
 								-- labels).
-												Generalized type of stringify.

											
										
										
											2013-08-28 08:43:51 -07:00
+								stringify :: Walkable Inline a => a -> String
-												Shared:  Changed stringify so it ignores notes.

Also documented this in README.

											
										
										
											2013-08-16 13:22:27 -07:00
+								stringify = query go . walk deNote
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
+								  where go :: Inline -> [Char]
 								        go Space = " "
 								        go (Str x) = x
-												Add support for attributes in inline Code.

Additional related changes:

* URLs in Code in autolinks now use class "url".
* Require highlighting-kate 0.2.8.2, which omits the final <br/> tag,
  essential for inline code.

											
										
										
											2011-01-26 20:44:25 -08:00
+								        go (Code _ x) = x
-												Added new prettyprinting module.

* Added Text.Pandoc.Pretty.
  This is better suited for pandoc than the 'pretty' package.
  One advantage is that we now get proper wrapping; Emph [Inline]
  is no longer treated as a big unwrappable unit. Previously
  we only got breaks for spaces at the "outer level." We can also
  more easily avoid doubled blank lines.  Performance is
  significantly better as well.

* Removed Text.Pandoc.Blocks.
  Text.Pandoc.Pretty allows you to define blocks and concatenate
  them.

* Modified markdown, RST, org readers to use Text.Pandoc.Pretty
  instead of Text.PrettyPrint.HughesPJ.

* Text.Pandoc.Shared:  Added writerColumns to WriterOptions.

* Markdown, RST, Org writers now break text at writerColumns.

* Added --columns command-line option, which sets stColumns
  and writerColumns.

* Table parsing:  If the size of the header > stColumns,
  use the header size as 100% for purposes of calculating
  relative widths of columns.

											
										
										
											2010-12-12 20:09:14 -08:00
+								        go (Math _ x) = x
-												EPUB TOC: replace literal "<br/>" with space.

Closes #2105.

											
										
										
											2015-08-10 16:58:47 -07:00
+								        go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
-												Shared: Use stringify to simplify inlineListToIdentifier.

											
										
										
											2010-12-19 10:13:36 -08:00
+								        go LineBreak = " "
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
+								        go _ = ""
-												Shared:  Changed stringify so it ignores notes.

Also documented this in README.

											
										
										
											2013-08-16 13:22:27 -07:00
+								        deNote (Note _) = Str ""
 								        deNote x = x
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								-- | Bring all regular text in a pandoc structure to uppercase.
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								--
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								-- This function correctly handles cases where a lowercase character doesn't
 								-- match to a single uppercase character – e.g. “Straße” would be converted
 								-- to “STRASSE”, not “STRAßE”.
 								capitalize :: Walkable Inline a => a -> a
 								capitalize = walk go
 								  where go :: Inline -> Inline
 								        go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
 								        go x       = x
-												Changed heuristic in compactify.

compactify has to decide whether a Para that ends a list is a Para
intentionally, or just because of the blank lines at the end of
every list.  In the latter case the Para is turned to a Plain.

The old heuristic was:  change final Para to Plain iff the other
items all end in Plain.  This produces bad results when, for example,
an item contains just a Plain and an HTML comment, as it

- a
<!--
- b
-->
-c

The new heuristic:  change final Para to Plain iff the other items
don't contain a Para.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1616 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-11-01 02:38:18 +00:00
+								-- | Change final list item from @Para@ to @Plain@ if the list contains
 								-- no other @Para@ blocks.
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								compactify :: [[Block]]  -- ^ List of list items (each a list of blocks)
 								           -> [[Block]]
 								compactify [] = []
 								compactify items =
-												Changed heuristic in compactify.

compactify has to decide whether a Para that ends a list is a Para
intentionally, or just because of the blank lines at the end of
every list.  In the latter case the Para is turned to a Plain.

The old heuristic was:  change final Para to Plain iff the other
items all end in Plain.  This produces bad results when, for example,
an item contains just a Plain and an HTML comment, as it

- a
<!--
- b
-->
-c

The new heuristic:  change final Para to Plain iff the other items
don't contain a Para.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1616 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-11-01 02:38:18 +00:00
+								  case (init items, last items) of
 								       (_,[])          -> items
 								       (others, final) ->
 								            case last final of
 								                 Para a -> case (filter isPara $ concat items) of
 								                                -- if this is only Para, change to Plain
 								                                [_] -> others ++ [init final ++ [Plain a]]
 								                                _   -> items
 								                 _      -> items
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
+								-- | Change final list item from @Para@ to @Plain@ if the list contains
 								-- no other @Para@ blocks.  Like compactify, but operates on @Blocks@ rather
 								-- than @[Block]@.
 								compactify' :: [Blocks]  -- ^ List of list items (each a list of blocks)
 								           -> [Blocks]
 								compactify' [] = []
 								compactify' items =
 								  let (others, final) = (init items, last items)
 								  in  case reverse (B.toList final) of
 								           (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
 								                            -- if this is only Para, change to Plain
 								                            [_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
 								                            _   -> items
 								           _      -> items
-												Fixed runtime error with compactify'DL on certain lists.

Closes #1452.  Added test.

											
										
										
											2014-07-25 10:53:04 -07:00
+								-- | Like @compactify'@, but acts on items of definition lists.
-												Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.

											
										
										
											2014-04-19 14:48:35 +02:00
+								compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
 								compactify'DL items =
 								  let defs = concatMap snd items
-												Fixed runtime error with compactify'DL on certain lists.

Closes #1452.  Added test.

											
										
										
											2014-07-25 10:53:04 -07:00
+								  in  case reverse (concatMap B.toList defs) of
 								           (Para x:xs)
 								             | not (any isPara xs) ->
 								                   let (t,ds) = last items
 								                       lastDef = B.toList $ last ds
 								                       ds' = init ds ++
 								                             if null lastDef
 								                                then [B.fromList lastDef]
 								                                else [B.fromList $ init lastDef ++ [Plain x]]
 								                    in init items ++ [(t, ds')]
 								             | otherwise           -> items
 								           _                       -> items
-												Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.

											
										
										
											2014-04-19 14:48:35 +02:00
-												Changed heuristic in compactify.

compactify has to decide whether a Para that ends a list is a Para
intentionally, or just because of the blank lines at the end of
every list.  In the latter case the Para is turned to a Plain.

The old heuristic was:  change final Para to Plain iff the other
items all end in Plain.  This produces bad results when, for example,
an item contains just a Plain and an HTML comment, as it

- a
<!--
- b
-->
-c

The new heuristic:  change final Para to Plain iff the other items
don't contain a Para.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1616 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-11-01 02:38:18 +00:00
+								isPara :: Block -> Bool
 								isPara (Para _) = True
 								isPara _        = False
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Data structure for defining hierarchical Pandoc documents
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								data Element = Blk Block
-												Shared:  Changed type of Element.

Sec now includes a field for Attr rather than just String
(the identifier).

Note, this is an API change.

											
										
										
											2013-02-12 20:13:23 -08:00
+								             | Sec Int [Int] Attr [Inline] [Element]
 								             --    lvl  num attributes label    contents
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								             deriving (Eq, Read, Show, Typeable, Data)
-												Use query instead of queryWith.

											
										
										
											2013-08-10 18:13:38 -07:00
+								instance Walkable Inline Element where
 								  walk f (Blk x) = Blk (walk f x)
 								  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
 								  walkM f (Blk x) = Blk `fmap` walkM f x
 								  walkM f (Sec lev nums attr ils elts) = do
 								    ils' <- walkM f ils
 								    elts' <- walkM f elts
 								    return $ Sec lev nums attr ils' elts'
 								  query f (Blk x) = query f x
 								  query f (Sec _ _ _ ils elts) = query f ils <> query f elts
 								instance Walkable Block Element where
 								  walk f (Blk x) = Blk (walk f x)
 								  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
 								  walkM f (Blk x) = Blk `fmap` walkM f x
 								  walkM f (Sec lev nums attr ils elts) = do
 								    ils' <- walkM f ils
 								    elts' <- walkM f elts
 								    return $ Sec lev nums attr ils' elts'
 								  query f (Blk x) = query f x
 								  query f (Sec _ _ _ ils elts) = query f ils <> query f elts
-												Shared:  Fixed uniqueIdent so it behaves as described in README.

Previously some characters that are illegal in HTML identifiers,
such as '<', were being allowed in header identifiers.  The logic
has now been fixed. Thanks to Xyne for reporting.

											
										
										
											2010-03-28 22:29:31 -07:00
+								-- | Convert Pandoc inline list to plain text identifier.  HTML
 								-- identifiers must start with a letter, and may contain only
-												Don't allow colon in autogenerated HTML identifiers.

They have a special meaning in XML (e.g. in EPUB).

											
										
										
											2010-07-04 23:26:04 -07:00
+								-- letters, digits, and the characters _-.
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								inlineListToIdentifier :: [Inline] -> String
-												Shared:  Fixed uniqueIdent so it behaves as described in README.

Previously some characters that are illegal in HTML identifiers,
such as '<', were being allowed in header identifiers.  The logic
has now been fixed. Thanks to Xyne for reporting.

											
										
										
											2010-03-28 22:29:31 -07:00
+								inlineListToIdentifier =
-												Shared: Use stringify to simplify inlineListToIdentifier.

											
										
										
											2010-12-19 10:13:36 -08:00
+								  dropWhile (not . isAlpha) . intercalate "-" . words .
 								    map (nbspToSp . toLower) .
 								    filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
 								    stringify
 								 where nbspToSp '\160'     =  ' '
 								       nbspToSp x          =  x
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
 								hierarchicalize :: [Block] -> [Element]
-												Revert "Shared.hierarchicalize: Don't number subsections of unnumbered sections."

This reverts commit 2a46042661a088096ac54097db5cd3674438bb63.

											
										
										
											2014-07-21 20:47:18 -07:00
+								hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
 								hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
 								hierarchicalizeWithIds [] = return []
 								hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
 								  lastnum <- S.get
 								  let lastnum' = take level lastnum
 								  let newnum = case length lastnum' of
 								                    x | "unnumbered" `elem` classes -> []
 								                      | x >= level -> init lastnum' ++ [last lastnum' + 1]
 								                      | otherwise -> lastnum ++
 								                           replicate (level - length lastnum - 1) 0 ++ [1]
 								  unless (null newnum) $ S.put newnum
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								  let (sectionContents, rest) = break (headerLtEq level) xs
-												Revert "Shared.hierarchicalize: Don't number subsections of unnumbered sections."

This reverts commit 2a46042661a088096ac54097db5cd3674438bb63.

											
										
										
											2014-07-21 20:47:18 -07:00
+								  sectionContents' <- hierarchicalizeWithIds sectionContents
 								  rest' <- hierarchicalizeWithIds rest
-												Shared:  Changed type of Element.

Sec now includes a field for Attr rather than just String
(the identifier).

Note, this is an API change.

											
										
										
											2013-02-12 20:13:23 -08:00
+								  return $ Sec level newnum attr title' sectionContents' : rest'
-												Changed hierarchicalize so it treats references div as top-level header.

Fixes a bug with `--section-divs`, where the final references section
added by pandoc-citeproc, enclosed in its own div, got put in the
div for the section previous to it.

This fixes #2294.  Longer term, we might think about how hierarchicalize
should interact with Div elements.

											
										
										
											2015-07-12 13:57:14 -07:00
+								hierarchicalizeWithIds ((Div ("",["references"],[])
 								                         (Header level (ident,classes,kvs) title' : xs)):ys) =
 								  hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs)
 								                           title') : (xs ++ ys))
-												Revert "Shared.hierarchicalize: Don't number subsections of unnumbered sections."

This reverts commit 2a46042661a088096ac54097db5cd3674438bb63.

											
										
										
											2014-07-21 20:47:18 -07:00
+								hierarchicalizeWithIds (x:rest) = do
 								  rest' <- hierarchicalizeWithIds rest
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								  return $ (Blk x) : rest'
 								headerLtEq :: Int -> Block -> Bool
-												Added Attr field to Header.

Previously header ids were autogenerated by the writers.
Now they are generated (unless supplied explicitly) in the
markdown parser, if the `header_identifiers` extension is
selected.

In addition, the textile reader now supports id attributes on
headers.

											
										
										
											2012-10-29 22:45:52 -07:00
+								headerLtEq level (Header l _ _) = l <= level
-												Changed hierarchicalize so it treats references div as top-level header.

Fixes a bug with `--section-divs`, where the final references section
added by pandoc-citeproc, enclosed in its own div, got put in the
div for the section previous to it.

This fixes #2294.  Longer term, we might think about how hierarchicalize
should interact with Div elements.

											
										
										
											2015-07-12 13:57:14 -07:00
+								headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _))  = l <= level
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								headerLtEq _ _ = False
-												Shared: Export uniqueIdent, don't allow tilde in identifier.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1894 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2010-03-16 06:45:52 +00:00
+								-- | Generate a unique identifier from a list of inlines.
 								-- Second argument is a list of already used identifiers.
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								uniqueIdent :: [Inline] -> [String] -> String
-												Added odt reader

Fully implemented features:

* Paragraphs
* Headers
* Basic styling
* Unordered lists
* Ordered lists
* External Links
* Internal Links
* Footnotes, Endnotes
* Blockquotes

Partly implemented features:

* Citations
  Very basic, but pandoc can't do much more
* Tables
  No headers, no sizing, limited styling

											
										
										
											2015-07-23 09:06:14 +02:00
+								uniqueIdent title' usedIdents
 								  =  let baseIdent = case inlineListToIdentifier title' of
-												Shared:  Fixed uniqueIdent so it behaves as described in README.

Previously some characters that are illegal in HTML identifiers,
such as '<', were being allowed in header identifiers.  The logic
has now been fixed. Thanks to Xyne for reporting.

											
										
										
											2010-03-28 22:29:31 -07:00
+								                        ""   -> "section"
 								                        x    -> x
-												Added odt reader

Fully implemented features:

* Paragraphs
* Headers
* Basic styling
* Unordered lists
* Ordered lists
* External Links
* Internal Links
* Footnotes, Endnotes
* Blockquotes

Partly implemented features:

* Citations
  Very basic, but pandoc can't do much more
* Tables
  No headers, no sizing, limited styling

											
										
										
											2015-07-23 09:06:14 +02:00
+								         numIdent n = baseIdent ++ "-" ++ show n
 								     in  if baseIdent `elem` usedIdents
 								           then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								                  Just x  -> numIdent x
 								                  Nothing -> baseIdent   -- if we have more than 60,000, allow repeats
-												Added odt reader

Fully implemented features:

* Paragraphs
* Headers
* Basic styling
* Unordered lists
* Ordered lists
* External Links
* Internal Links
* Footnotes, Endnotes
* Blockquotes

Partly implemented features:

* Citations
  Very basic, but pandoc can't do much more
* Tables
  No headers, no sizing, limited styling

											
										
										
											2015-07-23 09:06:14 +02:00
+								           else baseIdent
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | True if block is a Header block.
 								isHeaderBlock :: Block -> Bool
-												Added Attr field to Header.

Previously header ids were autogenerated by the writers.
Now they are generated (unless supplied explicitly) in the
markdown parser, if the `header_identifiers` extension is
selected.

In addition, the textile reader now supports id attributes on
headers.

											
										
										
											2012-10-29 22:45:52 -07:00
+								isHeaderBlock (Header _ _ _) = True
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								isHeaderBlock _ = False
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
+								-- | Shift header levels up or down.
 								headerShift :: Int -> Pandoc -> Pandoc
-												Use walk, walkM in place of bottomUp, bottomUpM when possible.

They are significantly faster.

											
										
										
											2013-08-10 18:45:00 -07:00
+								headerShift n = walk shift
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
+								  where shift :: Block -> Block
-												Added Attr field to Header.

Previously header ids were autogenerated by the writers.
Now they are generated (unless supplied explicitly) in the
markdown parser, if the `header_identifiers` extension is
selected.

In addition, the textile reader now supports id attributes on
headers.

											
										
										
											2012-10-29 22:45:52 -07:00
+								        shift (Header level attr inner) = Header (level + n) attr inner
 								        shift x                         = x
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
-												Shared: export isTightList.

											
										
										
											2013-01-07 20:12:05 -08:00
+								-- | Detect if a list is tight.
 								isTightList :: [[Block]] -> Bool
-												HLint: Use all

Replace `and . map` with `all`.

											
										
										
											2013-12-19 17:06:27 -05:00
+								isTightList = all firstIsPlain
-												Shared: export isTightList.

											
										
										
											2013-01-07 20:12:05 -08:00
+								  where firstIsPlain (Plain _ : _) = True
 								        firstIsPlain _             = False
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								-- | Set a field of a 'Meta' object.  If the field already has a value,
 								-- convert it into a list with the new value appended to the old value(s).
 								addMetaField :: ToMetaValue a
 								             => String
 								             -> a
 								             -> Meta
 								             -> Meta
 								addMetaField key val (Meta meta) =
 								  Meta $ M.insertWith combine key (toMetaValue val) meta
-												Shared addMetaField:  if old and new values both lists, concatenate.

											
										
										
											2014-05-12 13:05:42 -07:00
+								  where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								        combine newval x             = MetaList [x, newval]
-												Shared addMetaField:  if old and new values both lists, concatenate.

											
										
										
											2014-05-12 13:05:42 -07:00
+								        tolist (MetaList ys)         = ys
 								        tolist y                     = [y]
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
 								-- | Create 'Meta' from old-style title, authors, date.  This is
 								-- provided to ease the transition from the old API.
 								makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
 								makeMeta title authors date =
 								      addMetaField "title" (B.fromList title)
 								    $ addMetaField "author" (map B.fromList authors)
 								    $ addMetaField "date" (B.fromList date)
 								    $ nullMeta
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
+								--
 								-- TagSoup HTML handling
 								--
 								-- | Render HTML tags.
 								renderTags' :: [Tag String] -> String
 								renderTags' = renderTagsOptions
-												HLint: use `elem` and `notElem`

Replaces long conditional chains with calls to `elem` and `notElem`.

											
										
										
											2013-12-19 20:19:24 -05:00
+								               renderOptions{ optMinimize = matchTags ["hr", "br", "img",
 								                                                       "meta", "link"]
 								                            , optRawTag   = matchTags ["script", "style"] }
 								              where matchTags = \tags -> flip elem tags . map toLower
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
-												Improved template handling:

+ Split template haskell functions into new module,
  Text.Pandoc.TH
+ Distinguish contentsOf and binaryContentsOf; the former
  uses text mode in Windows, while the latter uses binary mode


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1368 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-08-02 17:22:55 +00:00
+								--
 								-- File handling
 								--
-												Reworked Text.Pandoc.ODT to use zip-archive instead of calling external 'zip'.

+ Removed utf8-string and xml-light modules, and unneeded content.xml.
+ Removed code for building reference.odt from Setup.hs.
  The ODT is now built using template haskell in Text.Pandoc.ODT.
+ Removed copyright statements for utf8-string and xml modules,
  since they are no longer included in the source.
+ README: Removed claim that 'zip' is needed for ODT production.
+ Removed dependency on 'zip' from debian/control.
+ Text.Pandoc.Shared: Removed withTempDir, added inDirectory.
+ Added makeZip to Text.Pandoc.TH.
+ pandoc.cabal: Added dependencies on old-time, zip-archive, and utf8-string.
  Added markdown2pdf files to extra-sources list.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1417 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-09-04 02:51:28 +00:00
+								-- | Perform an IO action in a directory, returning to starting directory.
 								inDirectory :: FilePath -> IO a -> IO a
-												fix inDirectory to reset to the original directory in case an exception occurs

											
										
										
											2014-10-08 23:25:01 +02:00
+								inDirectory path action = E.bracket
 								                             getCurrentDirectory
 								                             setCurrentDirectory
 								                             (const $ setCurrentDirectory path >> action)
-												Removed TH module; refactored LaTeXMathML not to use TH.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1692 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-12-31 01:11:23 +00:00
-												Let reference.docx/odt behave as if they are virtual data files.

Now they are constructed on the fly from their components,
but we now allow them to be printed with `--print-default-data-file`
and to override the defaults if placed in the user data directory.

Shared now exports getDefaultReferenceDocx and getDefaultReferenceODT
(API change).

These functions have been removed from the Docx and ODT writers.

Shared.readDataFile has been modified so that requests to read
a reference.odt or reference.docx will use these functions to
generate the files.

											
										
										
											2015-06-28 22:30:21 -07:00
+								getDefaultReferenceDocx :: Maybe FilePath -> IO Archive
 								getDefaultReferenceDocx datadir = do
 								  let paths = ["[Content_Types].xml",
 								               "_rels/.rels",
 								               "docProps/app.xml",
 								               "docProps/core.xml",
 								               "word/document.xml",
 								               "word/fontTable.xml",
 								               "word/footnotes.xml",
 								               "word/numbering.xml",
 								               "word/settings.xml",
 								               "word/webSettings.xml",
 								               "word/styles.xml",
 								               "word/_rels/document.xml.rels",
 								               "word/_rels/footnotes.xml.rels",
 								               "word/theme/theme1.xml"]
 								  let toLazy = fromChunks . (:[])
 								  let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$>
 								                                          getCurrentTime
 								                            contents <- toLazy <$> readDataFile datadir
 								                                                       ("docx/" ++ path)
 								                            return $ toEntry path epochtime contents
 								  mbArchive <- case datadir of
 								                    Nothing   -> return Nothing
 								                    Just d    -> do
 								                       exists <- doesFileExist (d </> "reference.docx")
 								                       if exists
 								                          then return (Just (d </> "reference.docx"))
 								                          else return Nothing
 								  case mbArchive of
 								     Just arch -> toArchive <$> BL.readFile arch
 								     Nothing   -> foldr addEntryToArchive emptyArchive <$>
 								                     mapM pathToEntry paths
 								getDefaultReferenceODT :: Maybe FilePath -> IO Archive
 								getDefaultReferenceODT datadir = do
 								  let paths = ["mimetype",
 								               "manifest.rdf",
 								               "styles.xml",
 								               "content.xml",
 								               "meta.xml",
 								               "settings.xml",
 								               "Configurations2/accelerator/current.xml",
 								               "Thumbnails/thumbnail.png",
 								               "META-INF/manifest.xml"]
 								  let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
 								                            contents <- (fromChunks . (:[])) `fmap`
 								                                          readDataFile datadir ("odt/" ++ path)
 								                            return $ toEntry path epochtime contents
 								  mbArchive <- case datadir of
 								                    Nothing   -> return Nothing
 								                    Just d    -> do
 								                       exists <- doesFileExist (d </> "reference.odt")
 								                       if exists
 								                          then return (Just (d </> "reference.odt"))
 								                          else return Nothing
 								  case mbArchive of
 								     Just arch -> toArchive <$> BL.readFile arch
 								     Nothing   -> foldr addEntryToArchive emptyArchive <$>
 								                     mapM pathToEntry paths
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								readDefaultDataFile :: FilePath -> IO BS.ByteString
-												Let reference.docx/odt behave as if they are virtual data files.

Now they are constructed on the fly from their components,
but we now allow them to be printed with `--print-default-data-file`
and to override the defaults if placed in the user data directory.

Shared now exports getDefaultReferenceDocx and getDefaultReferenceODT
(API change).

These functions have been removed from the Docx and ODT writers.

Shared.readDataFile has been modified so that requests to read
a reference.odt or reference.docx will use these functions to
generate the files.

											
										
										
											2015-06-28 22:30:21 -07:00
+								readDefaultDataFile "reference.docx" =
 								  (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing
 								readDefaultDataFile "reference.odt" =
 								  (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								readDefaultDataFile fname =
 								#ifdef EMBED_DATA_FILES
-												Shared: readDefaultDataFile: normalize the paths.

This fixes bugs in `--self-contained` on pandoc compiled with
`embed_data_files`.  The bugs affect (a) paths containing `..`, (b)
Windows, where `\` is path separator.

Closes #833.

											
										
										
											2013-04-19 23:02:38 -07:00
+								  case lookup (makeCanonical fname) dataFiles of
-												Shared:  Better error message when default data file not found.

Listing the full path can confuse people who are using
`--self-contained`:  they might have intended the file to be
found locally.  So now we just list the data file name.

											
										
										
											2013-08-15 17:21:56 -07:00
+								    Nothing       -> err 97 $ "Could not find data file " ++ fname
-												Made `embed_data_files` flag work.

											
										
										
											2012-12-29 18:45:20 -08:00
+								    Just contents -> return contents
-												Removed unneeded imports.

											
										
										
											2015-09-26 22:56:13 -07:00
+								  where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
-												Shared: readDefaultDataFile: normalize the paths.

This fixes bugs in `--self-contained` on pandoc compiled with
`embed_data_files`.  The bugs affect (a) paths containing `..`, (b)
Windows, where `\` is path separator.

Closes #833.

											
										
										
											2013-04-19 23:02:38 -07:00
+								        transformPathParts = reverse . foldl go []
 								        go as     "."  = as
 								        go (_:as) ".." = as
 								        go as     x    = x : as
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								#else
-												Minor fixes to previous commit.

* Instead of defining readmeFile in Text.Pandoc.Data (which we forgot
  to export anyway), we simply add a record for "README" to the
  `dataFiles` lookup table.  This allows simplifying some of the code
  for `readDefaultDataFile` in SHared.

* As a bonus, `pandoc --print-default-data-file README` now works.

											
										
										
											2015-06-28 20:59:18 -07:00
+								  getDataFileName fname' >>= checkExistence >>= BS.readFile
 								    where fname' = if fname == "README" then fname else "data" </> fname
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								#endif
-												Shared: Added findFirstFile, findDataFile, refactored readDataFile.

											
										
										
											2010-11-19 22:13:30 -08:00
-												New method for producing man pages.

This change adds `--man1` and `--man5` options to pandoc, so
pandoc can generate its own man pages.

It removes the old overly complex method of building a separate
executable (but not installing it) just to create the man pages.

The man pages are no longer automatically created in the build
process.

The man/ directory has been removed.  The man page templates
have been moved to data/.

New unexported module:  Text.Pandoc.ManPages.

Text.Pandoc.Data now exports readmeFile, and `readDataFile`
knows how to find README.

Closes #2190.

											
										
										
											2015-06-28 14:39:17 -07:00
+								checkExistence :: FilePath -> IO FilePath
 								checkExistence fn = do
 								  exists <- doesFileExist fn
 								  if exists
 								     then return fn
 								     else err 97 ("Could not find data file " ++ fn)
-												Added --data-dir option.

+ This specifies a user data directory. If not specified, will default
  to ~/.pandoc on unix or Application Data\pandoc on Windows.
  Files placed in the user data directory will override system default
  data files.
+ Added datadir parameter to readDataFile, saveOpenDocumentAsODT,
  latexMathMLScript, s5HeaderIncludes, and getTemplate.  Removed
  getDefaultTemplate.
+ Updated documentation.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1809 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2010-01-14 05:54:38 +00:00
+								-- | Read file from specified user data directory or, if not found there, from
 								-- Cabal data directory.
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								readDataFile Nothing fname = readDefaultDataFile fname
 								readDataFile (Just userDir) fname = do
 								  exists <- doesFileExist (userDir </> fname)
 								  if exists
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								     then BS.readFile (userDir </> fname)
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								     else readDefaultDataFile fname
 								-- | Same as 'readDataFile' but returns a String instead of a ByteString.
 								readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
 								readDataFileUTF8 userDir fname =
 								  UTF8.toString `fmap` readDataFile userDir fname
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
-												Refactoring:

* Shared now exports fetchItem (instead of getItem) and openURL
* fetchItem has different parameters than getItem and includes
  some logic formerly in the ODT and Docx writers
* getItem still used in SelfContained

											
										
										
											2013-01-11 16:19:06 -08:00
+								-- | Fetch an image or other item from the local filesystem or the net.
 								-- Returns raw content and maybe mime type.
-												Options: Changed `writerSourceDir` to `writerSourceURL` (now a Maybe).

Previously we used to store the directory of the first input file,
even if it was local, and used this as a base directory for
finding images in ODT, EPUB, Docx, and PDF.

This has been confusing to many users.  It seems better to look for
images relative to the current working directory, even if the first
file argument is in another directory.

writerSourceURL is set to 'Just url' when the first command-line
argument is an absolute URL.  (So, relative links will be resolved
in relation to the first page.)  Otherwise, 'Nothing'.

The ODT, EPUB, Docx, and PDF writers have been modified accordingly.

Note that this change may break some existing workflows.  If you
have been assuming that relative links will be interpreted relative
to the directory of the first file argument, you'll need to
make that the current directory before running pandoc.

Closes #942.

											
										
										
											2013-08-11 15:58:09 -07:00
+								fetchItem :: Maybe String -> String
-												MIME cleanup.

  * Create a type synonym for MIME type (instead of `String`).
  * Add `getMimeTypeDef` function.
  * Avoid recreating MIME type `Map`s every time.
  * Move “Formula-...” case handling into `getMimeType`.

											
										
										
											2014-08-17 20:42:30 +04:00
+								          -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
-												Shared: fetchItem improvements.

* More consistent logic:  absolute URIs are fetched from the net;
  other things are treated as relative URIs if sourceURL is a Just,
  otherwise as file paths.
* We escape characters that are not allowed in URIs before trying
  to parse them (e.g. '|', which often occurs in the wild).
* When treating relative paths as local file paths, we drop
  any fragment or query.  This is useful e.g. when you've downloaded
  web fonts locally, but your source still contains the original
  relative URLs.

Together with the previous commit, this should close #1477.

											
										
										
											2014-08-02 16:09:17 -07:00
+								fetchItem sourceURL s =
 								  case (sourceURL >>= parseURIReference . ensureEscaped, ensureEscaped s) of
 								       (_, s') | isURI s'  -> openURL s'
 								       (Just u, s') -> -- try fetching from relative path at source
 								          case parseURIReference s' of
 								               Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
 								               Nothing -> openURL s' -- will throw error
 								       (Nothing, _) -> E.try readLocalFile -- get from local file system
-												Options: Changed `writerSourceDir` to `writerSourceURL` (now a Maybe).

Previously we used to store the directory of the first input file,
even if it was local, and used this as a base directory for
finding images in ODT, EPUB, Docx, and PDF.

This has been confusing to many users.  It seems better to look for
images relative to the current working directory, even if the first
file argument is in another directory.

writerSourceURL is set to 'Just url' when the first command-line
argument is an absolute URL.  (So, relative links will be resolved
in relation to the first page.)  Otherwise, 'Nothing'.

The ODT, EPUB, Docx, and PDF writers have been modified accordingly.

Note that this change may break some existing workflows.  If you
have been assuming that relative links will be interpreted relative
to the directory of the first file argument, you'll need to
make that the current directory before running pandoc.

Closes #942.

											
										
										
											2013-08-11 15:58:09 -07:00
+								  where readLocalFile = do
-												fetchItem:  improved mime type guessing.

Strip a fragment like `?#iefix` from the extension before doing
the mime lookup.

											
										
										
											2014-08-02 16:32:11 -07:00
+								          cont <- BS.readFile fp
-												Refactoring:

* Shared now exports fetchItem (instead of getItem) and openURL
* fetchItem has different parameters than getItem and includes
  some logic formerly in the ODT and Docx writers
* getItem still used in SelfContained

											
										
										
											2013-01-11 16:19:06 -08:00
+								          return (cont, mime)
-												Shared: fetchItem improvements.

* More consistent logic:  absolute URIs are fetched from the net;
  other things are treated as relative URIs if sourceURL is a Just,
  otherwise as file paths.
* We escape characters that are not allowed in URIs before trying
  to parse them (e.g. '|', which often occurs in the wild).
* When treating relative paths as local file paths, we drop
  any fragment or query.  This is useful e.g. when you've downloaded
  web fonts locally, but your source still contains the original
  relative URLs.

Together with the previous commit, this should close #1477.

											
										
										
											2014-08-02 16:09:17 -07:00
+								        dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
-												fetchItem:  improved mime type guessing.

Strip a fragment like `?#iefix` from the extension before doing
the mime lookup.

											
										
										
											2014-08-02 16:32:11 -07:00
+								        fp = unEscapeString $ dropFragmentAndQuery s
 								        mime = case takeExtension fp of
 								                    ".gz" -> getMimeType $ dropExtension fp
-												Fixed svg handling in EPUB writer.

This is a crude workaroud for #2183.
A correct fix would require having openURL and fetchItem return
a content encoding as well as a content type.

											
										
										
											2015-05-27 11:46:02 -07:00
+								                    ".svgz" -> getMimeType $ dropExtension fp ++ ".svg"
-												fetchItem:  improved mime type guessing.

Strip a fragment like `?#iefix` from the extension before doing
the mime lookup.

											
										
										
											2014-08-02 16:32:11 -07:00
+								                    x     -> getMimeType x
-												Fixed --self-contained with Windows paths.

Previously C:\foo.js was being wrongly interpreted as a URI.
Closes #1558.

											
										
										
											2014-08-22 23:21:57 -07:00
+								        ensureEscaped x@(_:':':'\\':_) = x -- likely windows path
 								        ensureEscaped x = escapeURIString isAllowedInURI x
-												Move getItem from SelfContained to Share; export getItem.

											
										
										
											2013-01-11 11:30:31 -08:00
-												Shared:  Added fetchItem', which searches a media bag too.

											
										
										
											2014-07-30 13:47:07 -07:00
+								-- | Like 'fetchItem', but also looks for items in a 'MediaBag'.
-												PDF, Docx, EPUB, and ODT writers now automatically use MediaBag.

The MediaBag is thread through from the reader, with no need
to extract to files.

											
										
										
											2014-07-30 14:07:31 -07:00
+								fetchItem' :: MediaBag -> Maybe String -> String
-												MIME cleanup.

  * Create a type synonym for MIME type (instead of `String`).
  * Add `getMimeTypeDef` function.
  * Avoid recreating MIME type `Map`s every time.
  * Move “Formula-...” case handling into `getMimeType`.

											
										
										
											2014-08-17 20:42:30 +04:00
+								           -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
-												PDF, Docx, EPUB, and ODT writers now automatically use MediaBag.

The MediaBag is thread through from the reader, with no need
to extract to files.

											
										
										
											2014-07-30 14:07:31 -07:00
+								fetchItem' media sourceURL s = do
-												Made MediaBag a newtype, and added mime type information to media.

Shared now exports functions for interacting with a MediaBag:

- `emptyMediaBag`
- `lookuMedia`
- `insertMedia`
- `mediaDirectory`
- `extractMediaBag`

											
										
										
											2014-07-31 11:04:40 -07:00
+								  case lookupMedia s media of
-												Shared:  Added fetchItem', which searches a media bag too.

											
										
										
											2014-07-30 13:47:07 -07:00
+								       Nothing -> fetchItem sourceURL s
-												Made MediaBag a newtype, and added mime type information to media.

Shared now exports functions for interacting with a MediaBag:

- `emptyMediaBag`
- `lookuMedia`
- `insertMedia`
- `mediaDirectory`
- `extractMediaBag`

											
										
										
											2014-07-31 11:04:40 -07:00
+								       Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)
-												Shared:  Added fetchItem', which searches a media bag too.

											
										
										
											2014-07-30 13:47:07 -07:00
-												Refactoring:

* Shared now exports fetchItem (instead of getItem) and openURL
* fetchItem has different parameters than getItem and includes
  some logic formerly in the ODT and Docx writers
* getItem still used in SelfContained

											
										
										
											2013-01-11 16:19:06 -08:00
+								-- | Read from a URL and return raw data and maybe mime type.
-												MIME cleanup.

  * Create a type synonym for MIME type (instead of `String`).
  * Add `getMimeTypeDef` function.
  * Avoid recreating MIME type `Map`s every time.
  * Move “Formula-...” case handling into `getMimeType`.

											
										
										
											2014-08-17 20:42:30 +04:00
+								openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
-												Shared.openURL:  Properly handle data: URIs.

											
										
										
											2013-05-28 12:48:17 -07:00
+								openURL u
-												Use `stripPrefix` where appropriate.

											
										
										
											2014-08-03 14:44:39 +04:00
+								  | Just u' <- stripPrefix "data:" u =
 								    let mime     = takeWhile (/=',') u'
 								        contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u'
-												Shared: Fixed bug in openURL with data: URIs.

Previously the base-64 encoded bytestring was returned.
We now decode it so it's a proper image!

This should fix parsing of data: URLs.

											
										
										
											2013-11-19 13:15:24 -08:00
+								    in  return $ Right (decodeLenient contents, Just mime)
-												Removed dependency on conduit.

* http-conduit flag is now https.
* Instead of http-conduit, we depend on http-client and http-client-tls.

											
										
										
											2014-05-18 22:04:39 -07:00
+								#ifdef HTTP_CLIENT
-												Added `withSocketsDo` around http conduit code in `openURL`.

This should address #1080, but further testing on Windows is needed
before we can close the bug.

											
										
										
											2013-12-09 22:35:22 -08:00
+								  | otherwise = withSocketsDo $ E.try $ do
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								     req <- parseUrl u
-												Shared.openURL:  Set proxy with value of http_proxy env variable.

Note:  proxies with non-root paths are not supported,
because of limitations in http-conduit.

Closes #1211.

											
										
										
											2014-04-05 10:58:32 -07:00
+								     (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
 								     let req' = case proxy of
 								                     Left _   -> req
 								                     Right pr -> case parseUrl pr of
 								                                      Just r  -> addProxy (host r) (port r) req
 								                                      Nothing -> req
-												Use newManager instead of withManager in recent http-client.

This avoids a deprecation warning.

											
										
										
											2015-07-21 16:32:44 -07:00
+								#if MIN_VERSION_http_client(0,4,18)
 								     resp <- newManager tlsManagerSettings >>= httpLbs req'
 								#else
-												Removed dependency on conduit.

* http-conduit flag is now https.
* Instead of http-conduit, we depend on http-client and http-client-tls.

											
										
										
											2014-05-18 22:04:39 -07:00
+								     resp <- withManager tlsManagerSettings $ httpLbs req'
-												Use newManager instead of withManager in recent http-client.

This avoids a deprecation warning.

											
										
										
											2015-07-21 16:32:44 -07:00
+								#endif
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								     return (BS.concat $ toChunks $ responseBody resp,
 								             UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
 								#else
-												Improved fetching of external resources.

* In Shared, openURL and fetchItem now return an Either, for
  better error handling. (API change.)
* Better error message when fetching a URL fails with
  `--self-contained`.
* EPUB writer: If resource not found, skip it, as in Docx writer.
* Closes #916.

											
										
										
											2013-07-18 20:58:14 -07:00
+								  | otherwise = E.try $ getBodyAndMimeType `fmap` browse
-												Fixed compilation with http-conduit flag False.

											
										
										
											2013-07-30 08:38:13 -07:00
+								              (do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
-												Less verbose output from --self-contained.

Now one gets "Fetching [URL]..." for each URL fetched, but not
the full header.

											
										
										
											2013-05-04 21:53:06 -07:00
+								                  setOutHandler $ const (return ())
-												Shared.openURL:  Print diagnostic output to stderr, not stdout.

											
										
										
											2013-05-01 10:55:06 -07:00
+								                  setAllowRedirects True
 								                  request (getRequest' u'))
-												Shared:  openURL now follows redirects.

Closes #701.

											
										
										
											2013-03-26 08:31:45 -07:00
+								  where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r)
 								        getRequest' uriString = case parseURI uriString of
 								                                   Nothing -> error ("Not a valid URL: " ++
 								                                                        uriString)
 								                                   Just v  -> mkRequest GET v
-												Shared.openURL:  URL-escape pipe characters.

Even though these are legal, Network.URI doesn't regard them
as legal in URLs.  So we escape them first.

Closes #535.

											
										
										
											2013-04-28 22:57:17 -07:00
+								        u' = escapeURIString (/= '|') u  -- pipes are rejected by Network.URI
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								#endif
-												Move getItem from SelfContained to Share; export getItem.

											
										
										
											2013-01-11 11:30:31 -08:00
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
+								--
 								-- Error reporting
 								--
 								err :: Int -> String -> IO a
 								err exitCode msg = do
 								  name <- getProgName
-												Revert "More intelligent handling of text encodings."

This reverts commit 7272735b3d413a644fd9ab01eeae8ae9cd5a925b.

											
										
										
											2012-09-23 22:53:34 -07:00
+								  UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
+								  exitWith $ ExitFailure exitCode
 								  return undefined
 								warn :: String -> IO ()
 								warn msg = do
 								  name <- getProgName
-												Revert "More intelligent handling of text encodings."

This reverts commit 7272735b3d413a644fd9ab01eeae8ae9cd5a925b.

											
										
										
											2012-09-23 22:53:34 -07:00
+								  UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
-												Move utility error functions to Text.Pandoc.Shared

											
										
										
											2015-02-18 21:05:47 +00:00
+								mapLeft :: (a -> b) -> Either a c -> Either b c
 								mapLeft f (Left x) = Left (f x)
 								mapLeft _ (Right x) = Right x
 								hush :: Either a b -> Maybe b
 								hush (Left _) = Nothing
 								hush (Right x) = Just x
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								-- | Remove intermediate "." and ".." directories from a path.
 								--
-												Fixed haddock comment.

											
										
										
											2014-08-13 13:59:18 -07:00
+								-- > collapseFilePath "./foo" == "foo"
 								-- > collapseFilePath "/bar/../baz" == "/baz"
 								-- > collapseFilePath "/../baz" == "/../baz"
 								-- > collapseFilePath "parent/foo/baz/../bar" ==  "parent/foo/bar"
 								-- > collapseFilePath "parent/foo/baz/../../bar" ==  "parent/bar"
 								-- > collapseFilePath "parent/foo/.." ==  "parent"
 								-- > collapseFilePath "/parent/foo/../../bar" ==  "/bar"
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								collapseFilePath :: FilePath -> FilePath
-												MediaBag:  ensure that / is always used as path separator.

											
										
										
											2015-09-26 22:40:58 -07:00
+								collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								  where
 								    go rs "." = rs
 								    go r@(p:rs) ".." = case p of
 								                            ".." -> ("..":r)
-												Shared: Make collapseFilePath OS-agnostic

											
										
										
											2014-09-25 12:42:53 +01:00
+								                            (checkPathSeperator -> Just True) -> ("..":r)
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								                            _ -> rs
-												MediaBag:  ensure that / is always used as path separator.

											
										
										
											2015-09-26 22:40:58 -07:00
+								    go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								    go rs x = x:rs
-												Shared: Make collapseFilePath OS-agnostic

											
										
										
											2014-09-25 12:42:53 +01:00
+								    isSingleton [] = Nothing
 								    isSingleton [x] = Just x
 								    isSingleton _ = Nothing
 								    checkPathSeperator = fmap isPathSeparator . isSingleton
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								--
 								-- Safe read
 								--
-												Make safeRead safe.

Fixes #1801

											
										
										
											2015-02-18 18:40:36 +00:00
+								safeRead :: (MonadPlus m, Read a) => String -> m a
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								safeRead s = case reads s of
-												Removed `--strict`, added extensions to writer/reader names.

* The `--strict` option has been removed.
* Instead of using `--strict`, one can now use `strict` instead of
  `markdown` as an input or output format name.
* The `--enable` and `--disable` optinos have been removed.
* It is now possible to enable or disable specific extensions
  by appending them (with '+' or '-') to the writer or reader
  name.  For example `pandoc -f markdown-footnotes+hard_line_breaks`.
* The lhs extensions are now implemented this way, too; you can
  use either `+lhs` or `+literate_haskell`.

											
										
										
											2012-08-09 20:19:06 -07:00
+								                  (d,x):_
 								                    | all isSpace x -> return d
-												Make safeRead safe.

Fixes #1801

											
										
										
											2015-02-18 18:40:36 +00:00
+								                  _                 -> mzero
-												Moved withTempDir from PDF to Shared, export from Shared.

API change.

											
										
										
											2014-07-30 12:29:04 -07:00
 								--
 								-- Temp directory
 								--
 								withTempDir :: String -> (FilePath -> IO a) -> IO a
 								withTempDir =
 								#ifdef _WINDOWS
 								  withTempDirectory "."
 								#else
 								  withSystemTempDirectory
 								#endif