pandoc/src/Text/Pandoc/Shared.hs

{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
    FlexibleContexts, ScopedTypeVariables, PatternGuards,
    ViewPatterns #-}
{-
Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-}

{- |
   Module      : Text.Pandoc.Shared
   Copyright   : Copyright (C) 2006-2016 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Utility functions and definitions used by the various Pandoc modules.
-}
module Text.Pandoc.Shared (
                     -- * List processing
                     splitBy,
                     splitByIndices,
                     splitStringByIndices,
                     substitute,
                     ordNub,
                     -- * Text processing
                     backslashEscapes,
                     escapeStringUsing,
                     stripTrailingNewlines,
                     trim,
                     triml,
                     trimr,
                     stripFirstAndLast,
                     camelCaseToHyphenated,
                     toRomanNumeral,
                     escapeURI,
                     tabFilter,
                     -- * Date/time
                     normalizeDate,
                     -- * Pandoc block and inline list processing
                     orderedListMarkers,
                     normalizeSpaces,
                     extractSpaces,
                     removeFormatting,
                     deNote,
                     stringify,
                     capitalize,
                     compactify',
                     compactify'DL,
                     linesToPara,
                     Element (..),
                     hierarchicalize,
                     uniqueIdent,
                     inlineListToIdentifier,
                     isHeaderBlock,
                     headerShift,
                     isTightList,
                     addMetaField,
                     makeMeta,
                     -- * TagSoup HTML handling
                     renderTags',
                     -- * File handling
                     inDirectory,
                     getDefaultReferenceDocx,
                     getDefaultReferenceODT,
                     readDataFile,
                     readDataFileUTF8,
                     openURL,
                     collapseFilePath,
                     filteredFilesFromArchive,
                     -- * Error handling
                     err,
                     warn,
                     mapLeft,
                     -- * for squashing blocks
                     blocksToInlines,
                     -- * Safe read
                     safeRead,
                     -- * Temp directory
                     withTempDir,
                     -- * Version
                     pandocVersion
                    ) where

import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.UTF8 as UTF8
import System.Exit (exitWith, ExitCode(..))
import Data.Char ( toLower, isLower, isUpper, isAlpha,
                   isLetter, isDigit, isSpace )
import Data.List ( find, stripPrefix, intercalate )
import Data.Maybe (mapMaybe)
import Data.Version ( showVersion )
import qualified Data.Map as M
import Network.URI ( escapeURIString, unEscapeString )
import qualified Data.Set as Set
import System.Directory
import System.FilePath (splitDirectories, isPathSeparator)
import qualified System.FilePath.Posix as Posix
import Text.Pandoc.MIME (MimeType)
import System.FilePath ( (</>) )
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import Control.Monad.Trans (MonadIO (..))
import qualified Control.Exception as E
import Control.Monad (msum, unless, MonadPlus(..))
import Text.Pandoc.Pretty (charWidth)
import Text.Pandoc.Compat.Time
import Data.Time.Clock.POSIX
import System.IO (stderr)
import System.IO.Temp
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
         renderOptions)
import Data.Monoid ((<>))
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
import Data.ByteString.Base64 (decodeLenient)
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
import qualified Data.Text as T (toUpper, pack, unpack)
import Data.ByteString.Lazy (toChunks, fromChunks)
import qualified Data.ByteString.Lazy as BL
import Paths_pandoc (version)

import Codec.Archive.Zip

#ifdef EMBED_DATA_FILES
import Text.Pandoc.Data (dataFiles)
#else
import Paths_pandoc (getDataFileName)
#endif
#ifdef HTTP_CLIENT
import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
                            Request(port,host))
import Network.HTTP.Client (parseRequest)
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.Internal (addProxy)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import System.Environment (getEnv)
import Network.HTTP.Types.Header ( hContentType)
import Network (withSocketsDo)
#else
import Network.URI (parseURI)
import Network.HTTP (findHeader, rspBody,
                     RequestMethod(..), HeaderName(..), mkRequest)
import Network.Browser (browse, setAllowRedirects, setOutHandler, request)
#endif

-- | Version number of pandoc library.
pandocVersion :: String
pandocVersion = showVersion version

--
-- List processing
--

-- | Split list by groups of one or more sep.
splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy _ [] = []
splitBy isSep lst =
  let (first, rest) = break isSep lst
      rest'         = dropWhile isSep rest
  in  first:(splitBy isSep rest')

splitByIndices :: [Int] -> [a] -> [[a]]
splitByIndices [] lst = [lst]
splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x)  xs) rest)
  where (first, rest) = splitAt x lst

-- | Split string into chunks divided at specified indices.
splitStringByIndices :: [Int] -> [Char] -> [[Char]]
splitStringByIndices [] lst = [lst]
splitStringByIndices (x:xs) lst =
  let (first, rest) = splitAt' x lst in
  first : (splitStringByIndices (map (\y -> y - x) xs) rest)

splitAt' :: Int -> [Char] -> ([Char],[Char])
splitAt' _ []          = ([],[])
splitAt' n xs | n <= 0 = ([],xs)
splitAt' n (x:xs)      = (x:ys,zs)
  where (ys,zs) = splitAt' (n - charWidth x) xs

-- | Replace each occurrence of one sublist in a list with another.
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
substitute _ _ [] = []
substitute [] _ xs = xs
substitute target replacement lst@(x:xs) =
    case stripPrefix target lst of
      Just lst' -> replacement ++ substitute target replacement lst'
      Nothing   -> x : substitute target replacement xs

ordNub :: (Ord a) => [a] -> [a]
ordNub l = go Set.empty l
  where
    go _ [] = []
    go s (x:xs) = if x `Set.member` s then go s xs
                                      else x : go (Set.insert x s) xs

--
-- Text processing
--

-- | Returns an association list of backslash escapes for the
-- designated characters.
backslashEscapes :: [Char]    -- ^ list of special characters to escape
                 -> [(Char, String)]
backslashEscapes = map (\ch -> (ch, ['\\',ch]))

-- | Escape a string of characters, using an association list of
-- characters and strings.
escapeStringUsing :: [(Char, String)] -> String -> String
escapeStringUsing _ [] = ""
escapeStringUsing escapeTable (x:xs) =
  case (lookup x escapeTable) of
       Just str  -> str ++ rest
       Nothing   -> x:rest
  where rest = escapeStringUsing escapeTable xs

-- | Strip trailing newlines from string.
stripTrailingNewlines :: String -> String
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse

-- | Remove leading and trailing space (including newlines) from string.
trim :: String -> String
trim = triml . trimr

-- | Remove leading space (including newlines) from string.
triml :: String -> String
triml = dropWhile (`elem` " \r\n\t")

-- | Remove trailing space (including newlines) from string.
trimr :: String -> String
trimr = reverse . triml . reverse

-- | Strip leading and trailing characters from string
stripFirstAndLast :: String -> String
stripFirstAndLast str =
  drop 1 $ take ((length str) - 1) str

-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
camelCaseToHyphenated :: String -> String
camelCaseToHyphenated [] = ""
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
  a:'-':(toLower b):(camelCaseToHyphenated rest)
camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)

-- | Convert number < 4000 to uppercase roman numeral.
toRomanNumeral :: Int -> String
toRomanNumeral x =
  if x >= 4000 || x < 0
     then "?"
     else case x of
              _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
              _ | x >= 900  -> "CM" ++ toRomanNumeral (x - 900)
              _ | x >= 500  -> "D" ++ toRomanNumeral (x - 500)
              _ | x >= 400  -> "CD" ++ toRomanNumeral (x - 400)
              _ | x >= 100  -> "C" ++ toRomanNumeral (x - 100)
              _ | x >= 90   -> "XC" ++ toRomanNumeral (x - 90)
              _ | x >= 50   -> "L"  ++ toRomanNumeral (x - 50)
              _ | x >= 40   -> "XL" ++ toRomanNumeral (x - 40)
              _ | x >= 10   -> "X" ++ toRomanNumeral (x - 10)
              _ | x == 9    -> "IX"
              _ | x >= 5    -> "V" ++ toRomanNumeral (x - 5)
              _ | x == 4    -> "IV"
              _ | x >= 1    -> "I" ++ toRomanNumeral (x - 1)
              _             -> ""

-- | Escape whitespace and some punctuation characters in URI.
escapeURI :: String -> String
escapeURI = escapeURIString (not . needsEscaping)
  where needsEscaping c = isSpace c || c `elem`
                           ['<','>','|','"','{','}','[',']','^', '`']


-- | Convert tabs to spaces and filter out DOS line endings.
-- Tabs will be preserved if tab stop is set to 0.
tabFilter :: Int       -- ^ Tab stop
          -> String    -- ^ Input
          -> String
tabFilter tabStop =
  let go _ [] = ""
      go _ ('\n':xs) = '\n' : go tabStop xs
      go _ ('\r':'\n':xs) = '\n' : go tabStop xs
      go _ ('\r':xs) = '\n' : go tabStop xs
      go spsToNextStop ('\t':xs) =
        if tabStop == 0
           then '\t' : go tabStop xs
           else replicate spsToNextStop ' ' ++ go tabStop xs
      go 1 (x:xs) =
        x : go tabStop xs
      go spsToNextStop (x:xs) =
        x : go (spsToNextStop - 1) xs
  in  go tabStop

--
-- Date/time
--

-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We
-- limit years to the range 1601-9999 (ISO 8601 accepts greater than
-- or equal to 1583, but MS Word only accepts dates starting 1601).
normalizeDate :: String -> Maybe String
normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
  (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
  where rejectBadYear day = case toGregorian day of
          (y, _, _) | y >= 1601 && y <= 9999 -> Just day
          _ -> Nothing
        parsetimeWith =
#if MIN_VERSION_time(1,5,0)
             parseTimeM True defaultTimeLocale
#else
             parseTime defaultTimeLocale
#endif
        formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
                    "%d %B %Y", "%b. %d, %Y", "%B %d, %Y",
                    "%Y%m%d", "%Y%m", "%Y"]

--
-- Pandoc block and inline list processing
--

-- | Generate infinite lazy list of markers for an ordered list,
-- depending on list attributes.
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
orderedListMarkers (start, numstyle, numdelim) =
  let singleton c = [c]
      nums = case numstyle of
                     DefaultStyle -> map show [start..]
                     Example      -> map show [start..]
                     Decimal      -> map show [start..]
                     UpperAlpha   -> drop (start - 1) $ cycle $
                                     map singleton ['A'..'Z']
                     LowerAlpha   -> drop (start - 1) $ cycle $
                                     map singleton ['a'..'z']
                     UpperRoman   -> map toRomanNumeral [start..]
                     LowerRoman   -> map (map toLower . toRomanNumeral) [start..]
      inDelim str = case numdelim of
                            DefaultDelim -> str ++ "."
                            Period       -> str ++ "."
                            OneParen     -> str ++ ")"
                            TwoParens    -> "(" ++ str ++ ")"
  in  map inDelim nums

-- | Normalize a list of inline elements: remove leading and trailing
-- @Space@ elements, collapse double @Space@s into singles, and
-- remove empty Str elements.
normalizeSpaces :: [Inline] -> [Inline]
normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
 where  cleanup []              = []
        cleanup (Space:rest)    = case dropWhile isSpaceOrEmpty rest of
                                        []     -> []
                                        (x:xs) -> Space : x : cleanup xs
        cleanup ((Str ""):rest) = cleanup rest
        cleanup (x:rest)        = x : cleanup rest

isSpaceOrEmpty :: Inline -> Bool
isSpaceOrEmpty Space = True
isSpaceOrEmpty (Str "") = True
isSpaceOrEmpty _ = False

-- | Extract the leading and trailing spaces from inside an inline element
-- and place them outside the element.  SoftBreaks count as Spaces for
-- these purposes.
extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
extractSpaces f is =
  let contents = B.unMany is
      left  = case viewl contents of
                    (Space :< _)     -> B.space
                    (SoftBreak :< _) -> B.softbreak
                    _                -> mempty
      right = case viewr contents of
                    (_ :> Space)     -> B.space
                    (_ :> SoftBreak) -> B.softbreak
                    _                -> mempty in
  (left <> f (B.trimInlines . B.Many $ contents) <> right)

-- | Extract inlines, removing formatting.
removeFormatting :: Walkable Inline a => a -> [Inline]
removeFormatting = query go . walk deNote
  where go :: Inline -> [Inline]
        go (Str xs)     = [Str xs]
        go Space        = [Space]
        go SoftBreak    = [SoftBreak]
        go (Code _ x)   = [Str x]
        go (Math _ x)   = [Str x]
        go LineBreak    = [Space]
        go _            = []

deNote :: Inline -> Inline
deNote (Note _) = Str ""
deNote x        = x

-- | Convert pandoc structure to a string with formatting removed.
-- Footnotes are skipped (since we don't want their contents in link
-- labels).
stringify :: Walkable Inline a => a -> String
stringify = query go . walk deNote
  where go :: Inline -> [Char]
        go Space = " "
        go SoftBreak = " "
        go (Str x) = x
        go (Code _ x) = x
        go (Math _ x) = x
        go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
        go LineBreak = " "
        go _ = ""

-- | Bring all regular text in a pandoc structure to uppercase.
--
-- This function correctly handles cases where a lowercase character doesn't
-- match to a single uppercase character – e.g. “Straße” would be converted
-- to “STRASSE”, not “STRAßE”.
capitalize :: Walkable Inline a => a -> a
capitalize = walk go
  where go :: Inline -> Inline
        go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
        go x       = x

-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks.  Like compactify, but operates on @Blocks@ rather
-- than @[Block]@.
compactify' :: [Blocks]  -- ^ List of list items (each a list of blocks)
           -> [Blocks]
compactify' [] = []
compactify' items =
  let (others, final) = (init items, last items)
  in  case reverse (B.toList final) of
           (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
                            -- if this is only Para, change to Plain
                            [_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
                            _   -> items
           _      -> items

-- | Like @compactify'@, but acts on items of definition lists.
compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
compactify'DL items =
  let defs = concatMap snd items
  in  case reverse (concatMap B.toList defs) of
           (Para x:xs)
             | not (any isPara xs) ->
                   let (t,ds) = last items
                       lastDef = B.toList $ last ds
                       ds' = init ds ++
                             if null lastDef
                                then [B.fromList lastDef]
                                else [B.fromList $ init lastDef ++ [Plain x]]
                    in init items ++ [(t, ds')]
             | otherwise           -> items
           _                       -> items

-- | Combine a list of lines by adding hard linebreaks.
combineLines :: [[Inline]] -> [Inline]
combineLines = intercalate [LineBreak]

-- | Convert a list of lines into a paragraph with hard line breaks. This is
--   useful e.g. for rudimentary support of LineBlock elements in writers.
linesToPara :: [[Inline]] -> Block
linesToPara = Para . combineLines

isPara :: Block -> Bool
isPara (Para _) = True
isPara _        = False

-- | Data structure for defining hierarchical Pandoc documents
data Element = Blk Block
             | Sec Int [Int] Attr [Inline] [Element]
             --    lvl  num attributes label    contents
             deriving (Eq, Read, Show, Typeable, Data)

instance Walkable Inline Element where
  walk f (Blk x) = Blk (walk f x)
  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
  walkM f (Blk x) = Blk `fmap` walkM f x
  walkM f (Sec lev nums attr ils elts) = do
    ils' <- walkM f ils
    elts' <- walkM f elts
    return $ Sec lev nums attr ils' elts'
  query f (Blk x) = query f x
  query f (Sec _ _ _ ils elts) = query f ils <> query f elts

instance Walkable Block Element where
  walk f (Blk x) = Blk (walk f x)
  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
  walkM f (Blk x) = Blk `fmap` walkM f x
  walkM f (Sec lev nums attr ils elts) = do
    ils' <- walkM f ils
    elts' <- walkM f elts
    return $ Sec lev nums attr ils' elts'
  query f (Blk x) = query f x
  query f (Sec _ _ _ ils elts) = query f ils <> query f elts


-- | Convert Pandoc inline list to plain text identifier.  HTML
-- identifiers must start with a letter, and may contain only
-- letters, digits, and the characters _-.
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier =
  dropWhile (not . isAlpha) . intercalate "-" . words .
    map (nbspToSp . toLower) .
    filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
    stringify
 where nbspToSp '\160'     =  ' '
       nbspToSp x          =  x

-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
hierarchicalize :: [Block] -> [Element]
hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []

hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
hierarchicalizeWithIds [] = return []
hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
  lastnum <- S.get
  let lastnum' = take level lastnum
  let newnum = case length lastnum' of
                    x | "unnumbered" `elem` classes -> []
                      | x >= level -> init lastnum' ++ [last lastnum' + 1]
                      | otherwise -> lastnum ++
                           replicate (level - length lastnum - 1) 0 ++ [1]
  unless (null newnum) $ S.put newnum
  let (sectionContents, rest) = break (headerLtEq level) xs
  sectionContents' <- hierarchicalizeWithIds sectionContents
  rest' <- hierarchicalizeWithIds rest
  return $ Sec level newnum attr title' sectionContents' : rest'
hierarchicalizeWithIds ((Div ("",["references"],[])
                         (Header level (ident,classes,kvs) title' : xs)):ys) =
  hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs)
                           title') : (xs ++ ys))
hierarchicalizeWithIds (x:rest) = do
  rest' <- hierarchicalizeWithIds rest
  return $ (Blk x) : rest'

headerLtEq :: Int -> Block -> Bool
headerLtEq level (Header l _ _) = l <= level
headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _))  = l <= level
headerLtEq _ _ = False

-- | Generate a unique identifier from a list of inlines.
-- Second argument is a list of already used identifiers.
uniqueIdent :: [Inline] -> Set.Set String -> String
uniqueIdent title' usedIdents
  =  let baseIdent = case inlineListToIdentifier title' of
                        ""   -> "section"
                        x    -> x
         numIdent n = baseIdent ++ "-" ++ show n
     in  if baseIdent `Set.member` usedIdents
           then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of
                  Just x  -> numIdent x
                  Nothing -> baseIdent   -- if we have more than 60,000, allow repeats
           else baseIdent

-- | True if block is a Header block.
isHeaderBlock :: Block -> Bool
isHeaderBlock (Header _ _ _) = True
isHeaderBlock _ = False

-- | Shift header levels up or down.
headerShift :: Int -> Pandoc -> Pandoc
headerShift n = walk shift
  where shift :: Block -> Block
        shift (Header level attr inner) = Header (level + n) attr inner
        shift x                         = x

-- | Detect if a list is tight.
isTightList :: [[Block]] -> Bool
isTightList = all firstIsPlain
  where firstIsPlain (Plain _ : _) = True
        firstIsPlain _             = False

-- | Set a field of a 'Meta' object.  If the field already has a value,
-- convert it into a list with the new value appended to the old value(s).
addMetaField :: ToMetaValue a
             => String
             -> a
             -> Meta
             -> Meta
addMetaField key val (Meta meta) =
  Meta $ M.insertWith combine key (toMetaValue val) meta
  where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
        combine newval x             = MetaList [x, newval]
        tolist (MetaList ys)         = ys
        tolist y                     = [y]

-- | Create 'Meta' from old-style title, authors, date.  This is
-- provided to ease the transition from the old API.
makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
makeMeta title authors date =
      addMetaField "title" (B.fromList title)
    $ addMetaField "author" (map B.fromList authors)
    $ addMetaField "date" (B.fromList date)
    $ nullMeta

--
-- TagSoup HTML handling
--

-- | Render HTML tags.
renderTags' :: [Tag String] -> String
renderTags' = renderTagsOptions
               renderOptions{ optMinimize = matchTags ["hr", "br", "img",
                                                       "meta", "link"]
                            , optRawTag   = matchTags ["script", "style"] }
              where matchTags = \tags -> flip elem tags . map toLower

--
-- File handling
--

-- | Perform an IO action in a directory, returning to starting directory.
inDirectory :: FilePath -> IO a -> IO a
inDirectory path action = E.bracket
                             getCurrentDirectory
                             setCurrentDirectory
                             (const $ setCurrentDirectory path >> action)

getDefaultReferenceDocx :: Maybe FilePath -> IO Archive
getDefaultReferenceDocx datadir = do
  let paths = ["[Content_Types].xml",
               "_rels/.rels",
               "docProps/app.xml",
               "docProps/core.xml",
               "word/document.xml",
               "word/fontTable.xml",
               "word/footnotes.xml",
               "word/numbering.xml",
               "word/settings.xml",
               "word/webSettings.xml",
               "word/styles.xml",
               "word/_rels/document.xml.rels",
               "word/_rels/footnotes.xml.rels",
               "word/theme/theme1.xml"]
  let toLazy = fromChunks . (:[])
  let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$>
                                          getCurrentTime
                            contents <- toLazy <$> readDataFile datadir
                                                       ("docx/" ++ path)
                            return $ toEntry path epochtime contents
  mbArchive <- case datadir of
                    Nothing   -> return Nothing
                    Just d    -> do
                       exists <- doesFileExist (d </> "reference.docx")
                       if exists
                          then return (Just (d </> "reference.docx"))
                          else return Nothing
  case mbArchive of
     Just arch -> toArchive <$> BL.readFile arch
     Nothing   -> foldr addEntryToArchive emptyArchive <$>
                     mapM pathToEntry paths

getDefaultReferenceODT :: Maybe FilePath -> IO Archive
getDefaultReferenceODT datadir = do
  let paths = ["mimetype",
               "manifest.rdf",
               "styles.xml",
               "content.xml",
               "meta.xml",
               "settings.xml",
               "Configurations2/accelerator/current.xml",
               "Thumbnails/thumbnail.png",
               "META-INF/manifest.xml"]
  let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
                            contents <- (fromChunks . (:[])) `fmap`
                                          readDataFile datadir ("odt/" ++ path)
                            return $ toEntry path epochtime contents
  mbArchive <- case datadir of
                    Nothing   -> return Nothing
                    Just d    -> do
                       exists <- doesFileExist (d </> "reference.odt")
                       if exists
                          then return (Just (d </> "reference.odt"))
                          else return Nothing
  case mbArchive of
     Just arch -> toArchive <$> BL.readFile arch
     Nothing   -> foldr addEntryToArchive emptyArchive <$>
                     mapM pathToEntry paths


readDefaultDataFile :: FilePath -> IO BS.ByteString
readDefaultDataFile "reference.docx" =
  (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing
readDefaultDataFile "reference.odt" =
  (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing
readDefaultDataFile fname =
#ifdef EMBED_DATA_FILES
  case lookup (makeCanonical fname) dataFiles of
    Nothing       -> err 97 $ "Could not find data file " ++ fname
    Just contents -> return contents
  where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
        transformPathParts = reverse . foldl go []
        go as     "."  = as
        go (_:as) ".." = as
        go as     x    = x : as
#else
  getDataFileName fname' >>= checkExistence >>= BS.readFile
    where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname

checkExistence :: FilePath -> IO FilePath
checkExistence fn = do
  exists <- doesFileExist fn
  if exists
     then return fn
     else err 97 ("Could not find data file " ++ fn)
#endif

-- | Read file from specified user data directory or, if not found there, from
-- Cabal data directory.
readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString
readDataFile Nothing fname = readDefaultDataFile fname
readDataFile (Just userDir) fname = do
  exists <- doesFileExist (userDir </> fname)
  if exists
     then BS.readFile (userDir </> fname)
     else readDefaultDataFile fname

-- | Same as 'readDataFile' but returns a String instead of a ByteString.
readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
readDataFileUTF8 userDir fname =
  UTF8.toString `fmap` readDataFile userDir fname

-- | Read from a URL and return raw data and maybe mime type.
openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
openURL u
  | Just u'' <- stripPrefix "data:" u =
    let mime     = takeWhile (/=',') u''
        contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u''
    in  return $ Right (decodeLenient contents, Just mime)
#ifdef HTTP_CLIENT
  | otherwise = withSocketsDo $ E.try $ do
     let parseReq = parseRequest
     (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
     req <- parseReq u
     req' <- case proxy of
                     Left _   -> return req
                     Right pr -> (parseReq pr >>= \r ->
                                  return $ addProxy (host r) (port r) req)
                                  `mplus` return req
     resp <- newManager tlsManagerSettings >>= httpLbs req'
     return (BS.concat $ toChunks $ responseBody resp,
             UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
#else
  | otherwise = E.try $ getBodyAndMimeType `fmap` browse
              (do liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
                  setOutHandler $ const (return ())
                  setAllowRedirects True
                  request (getRequest' u'))
  where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r)
        getRequest' uriString = case parseURI uriString of
                                   Nothing -> error ("Not a valid URL: " ++
                                                        uriString)
                                   Just v  -> mkRequest GET v
        u' = escapeURIString (/= '|') u  -- pipes are rejected by Network.URI
#endif

--
-- Error reporting
--

err :: MonadIO m => Int -> String -> m a
err exitCode msg = liftIO $ do
  UTF8.hPutStrLn stderr msg
  exitWith $ ExitFailure exitCode
  return undefined

warn :: MonadIO m => String -> m ()
warn msg = liftIO $ do
  UTF8.hPutStrLn stderr $ "[warning] " ++ msg

mapLeft :: (a -> b) -> Either a c -> Either b c
mapLeft f (Left x) = Left (f x)
mapLeft _ (Right x) = Right x

-- | Remove intermediate "." and ".." directories from a path.
--
-- > collapseFilePath "./foo" == "foo"
-- > collapseFilePath "/bar/../baz" == "/baz"
-- > collapseFilePath "/../baz" == "/../baz"
-- > collapseFilePath "parent/foo/baz/../bar" ==  "parent/foo/bar"
-- > collapseFilePath "parent/foo/baz/../../bar" ==  "parent/bar"
-- > collapseFilePath "parent/foo/.." ==  "parent"
-- > collapseFilePath "/parent/foo/../../bar" ==  "/bar"
collapseFilePath :: FilePath -> FilePath
collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
  where
    go rs "." = rs
    go r@(p:rs) ".." = case p of
                            ".." -> ("..":r)
                            (checkPathSeperator -> Just True) -> ("..":r)
                            _ -> rs
    go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
    go rs x = x:rs
    isSingleton [] = Nothing
    isSingleton [x] = Just x
    isSingleton _ = Nothing
    checkPathSeperator = fmap isPathSeparator . isSingleton

--
-- File selection from the archive
--
filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, BL.ByteString)]
filteredFilesFromArchive zf f =
  mapMaybe (fileAndBinary zf) (filter f (filesInArchive zf))
  where
    fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
    fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)

---
--- Squash blocks into inlines
---

blockToInlines :: Block -> [Inline]
blockToInlines (Plain ils) = ils
blockToInlines (Para ils) = ils
blockToInlines (LineBlock lns) = combineLines lns
blockToInlines (CodeBlock attr str) = [Code attr str]
blockToInlines (RawBlock fmt str) = [RawInline fmt str]
blockToInlines (BlockQuote blks) = blocksToInlines blks
blockToInlines (OrderedList _ blkslst) =
  concatMap blocksToInlines blkslst
blockToInlines (BulletList blkslst) =
  concatMap blocksToInlines blkslst
blockToInlines (DefinitionList pairslst) =
  concatMap f pairslst
  where
    f (ils, blkslst) = ils ++
      [Str ":", Space] ++
      (concatMap blocksToInlines blkslst)
blockToInlines (Header _ _  ils) = ils
blockToInlines (HorizontalRule) = []
blockToInlines (Table _ _ _ headers rows) =
  intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl
  where
    tbl = headers : rows
blockToInlines (Div _ blks) = blocksToInlines blks
blockToInlines Null = []

blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline]
blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks

blocksToInlines :: [Block] -> [Inline]
blocksToInlines = blocksToInlinesWithSep [Space, Str "¶", Space]


--
-- Safe read
--

safeRead :: (MonadPlus m, Read a) => String -> m a
safeRead s = case reads s of
                  (d,x):_
                    | all isSpace x -> return d
                  _                 -> mzero

--
-- Temp directory
--

withTempDir :: String -> (FilePath -> IO a) -> IO a
withTempDir =
#ifdef _WINDOWS
  withTempDirectory "."
#else
  withSystemTempDirectory
#endif
-												Generalized type of stringify.

											
										
										
											2013-08-28 08:43:51 -07:00
+								{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
-												Shared: Make collapseFilePath OS-agnostic

											
										
										
											2014-09-25 12:42:53 +01:00
+								    FlexibleContexts, ScopedTypeVariables, PatternGuards,
 								    ViewPatterns #-}
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								{-
-												Updated copyright dates to include 2016.

											
										
										
											2016-03-22 17:20:39 -07:00
+								Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								This program is free software; you can redistribute it and/or modify
 								it under the terms of the GNU General Public License as published by
 								the Free Software Foundation; either version 2 of the License, or
 								(at your option) any later version.
 								This program is distributed in the hope that it will be useful,
 								but WITHOUT ANY WARRANTY; without even the implied warranty of
 								MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 								GNU General Public License for more details.
 								You should have received a copy of the GNU General Public License
 								along with this program; if not, write to the Free Software
 								Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 								-}
 								{- |
 								   Module      : Text.Pandoc.Shared
-												Updated copyright dates to include 2016.

											
										
										
											2016-03-22 17:20:39 -07:00
+								   Copyright   : Copyright (C) 2006-2016 John MacFarlane
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								   License     : GNU GPL, version 2 or above
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								   Maintainer  : John MacFarlane <jgm@berkeley.edu>
 								   Stability   : alpha
 								   Portability : portable
 								Utility functions and definitions used by the various Pandoc modules.
 								-}
-												Shared: enamed stringToURI -> escapeURI.

											
										
										
											2010-03-23 15:05:33 -07:00
+								module Text.Pandoc.Shared (
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     -- * List processing
 								                     splitBy,
 								                     splitByIndices,
-												Shared: Added splitStringWithIndices.

This is like splitWithIndices, but it is sensitive to distinctions
between wide, combining, and regular characters.

											
										
										
											2012-01-27 00:37:46 -08:00
+								                     splitStringByIndices,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     substitute,
-												Shared:  Added ordNub.

API change (adds export).

											
										
										
											2014-06-03 11:00:54 -07:00
+								                     ordNub,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     -- * Text processing
 								                     backslashEscapes,
 								                     escapeStringUsing,
 								                     stripTrailingNewlines,
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								                     trim,
 								                     triml,
 								                     trimr,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     stripFirstAndLast,
 								                     camelCaseToHyphenated,
 								                     toRomanNumeral,
-												Shared: enamed stringToURI -> escapeURI.

											
										
										
											2010-03-23 15:05:33 -07:00
+								                     escapeURI,
-												Changed order of functions in Shared.

											
										
										
											2010-07-06 23:17:06 -07:00
+								                     tabFilter,
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
+								                     -- * Date/time
 								                     normalizeDate,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     -- * Pandoc block and inline list processing
 								                     orderedListMarkers,
 								                     normalizeSpaces,
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								                     extractSpaces,
-												Shared:  Added removeFormatting.

API change (addition of exported function).

											
										
										
											2014-07-13 10:13:22 -07:00
+								                     removeFormatting,
-												Removed writerIgnoreNotes.

Instead, just temporarily remove notes when generating
TOC lists in HTML and Markdown (as we already did in LaTeX).

Also export deNote from Text.Pandoc.Shared.

API change in Shared and Options.WriterOptions.

											
										
										
											2017-01-15 22:34:14 +01:00
+								                     deNote,
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
+								                     stringify,
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								                     capitalize,
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
+								                     compactify',
-												Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.

											
										
										
											2014-04-19 14:48:35 +02:00
+								                     compactify'DL,
-												Shared: add function combining lines using LineBreak

The `linesToBlock` function takes a list of lines and combines them by appending
a hard `LineBreak` to each line and concatenating the result, putting the result
it into a `Para`. This is most useful when dealing when converting `LineBlock`
elements.

											
										
										
											2016-10-13 08:46:38 +02:00
+								                     linesToPara,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     Element (..),
 								                     hierarchicalize,
-												Shared: Export uniqueIdent, don't allow tilde in identifier.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1894 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2010-03-16 06:45:52 +00:00
+								                     uniqueIdent,
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
+								                     inlineListToIdentifier,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     isHeaderBlock,
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
+								                     headerShift,
-												Shared: export isTightList.

											
										
										
											2013-01-07 20:12:05 -08:00
+								                     isTightList,
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								                     addMetaField,
 								                     makeMeta,
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
+								                     -- * TagSoup HTML handling
 								                     renderTags',
-												Added 'odt' output option to pandoc:
Not a writer, but a module that inserts the output of the OpenDocument
writer into an ODT archive.  This replaces markdown2odt.

+ Added odt output option to Main.hs.
+ Added default for .odt output file.
+ Changed defaults so that .xml and .sgml aren't automatically DocBook.
+ Added odt writer to Text.Pandoc exports.
+ Added Text.Pandoc.ODT and included in pandoc.cabal.
+ Added reference.odt as data-file in pandoc.cabal.
+ Handle picture links in OpenDocument files using xml library.
+ Removed markdown2odt and references from Makefile, README, man.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1345 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-07-31 23:16:02 +00:00
+								                     -- * File handling
-												Removed TH module; refactored LaTeXMathML not to use TH.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1692 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-12-31 01:11:23 +00:00
+								                     inDirectory,
-												Let reference.docx/odt behave as if they are virtual data files.

Now they are constructed on the fly from their components,
but we now allow them to be printed with `--print-default-data-file`
and to override the defaults if placed in the user data directory.

Shared now exports getDefaultReferenceDocx and getDefaultReferenceODT
(API change).

These functions have been removed from the Docx and ODT writers.

Shared.readDataFile has been modified so that requests to read
a reference.odt or reference.docx will use these functions to
generate the files.

											
										
										
											2015-06-28 22:30:21 -07:00
+								                     getDefaultReferenceDocx,
 								                     getDefaultReferenceODT,
-												Added getMimeType to Text.Pandoc.Shared.

											
										
										
											2011-07-17 19:33:52 -07:00
+								                     readDataFile,
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								                     readDataFileUTF8,
-												Refactoring:

* Shared now exports fetchItem (instead of getItem) and openURL
* fetchItem has different parameters than getItem and includes
  some logic formerly in the ODT and Docx writers
* getItem still used in SelfContained

											
										
										
											2013-01-11 16:19:06 -08:00
+								                     openURL,
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								                     collapseFilePath,
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
+								                     filteredFilesFromArchive,
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
+								                     -- * Error handling
 								                     err,
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								                     warn,
-												Move utility error functions to Text.Pandoc.Shared

											
										
										
											2015-02-18 21:05:47 +00:00
+								                     mapLeft,
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								                     -- * for squashing blocks
 								                     blocksToInlines,
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								                     -- * Safe read
-												Moved withTempDir from PDF to Shared, export from Shared.

API change.

											
										
										
											2014-07-30 12:29:04 -07:00
+								                     safeRead,
 								                     -- * Temp directory
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								                     withTempDir,
 								                     -- * Version
 								                     pandocVersion
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                    ) where
 								import Text.Pandoc.Definition
-												Use query instead of queryWith.

											
										
										
											2013-08-10 18:13:38 -07:00
+								import Text.Pandoc.Walk
-												Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.

											
										
										
											2014-04-19 14:48:35 +02:00
+								import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
+								import qualified Text.Pandoc.Builder as B
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
+								import qualified Text.Pandoc.UTF8 as UTF8
 								import System.Exit (exitWith, ExitCode(..))
-												Shared: Removed unescapeURI, modified escapeURI.

escapeURI now only escapes space characters, leaving unicode characters
as they are, instead of converting them to octets and URL-encoding them,
as before.  This gives more readable URIs.  User agents now do the
percent-encoding themselves.

URIs are no longer unescaped at all on conversion to markdown, asciidoc,
rst, org.

Closes #349.

											
										
										
											2011-12-02 19:39:30 -08:00
+								import Data.Char ( toLower, isLower, isUpper, isAlpha,
 								                   isLetter, isDigit, isSpace )
-												Use `stripPrefix` where appropriate.

											
										
										
											2014-08-03 14:44:39 +04:00
+								import Data.List ( find, stripPrefix, intercalate )
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
+								import Data.Maybe (mapMaybe)
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								import Data.Version ( showVersion )
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								import qualified Data.Map as M
-												Text.Pandoc.Shared: Removed fetchItem, fetchItem'.

Made changes where these are used, so that the version
of fetchItem from PandocMonad can be used instead.

											
										
										
											2016-12-12 13:51:20 +01:00
+								import Network.URI ( escapeURIString, unEscapeString )
-												Shared:  Added ordNub.

API change (adds export).

											
										
										
											2014-06-03 11:00:54 -07:00
+								import qualified Data.Set as Set
-												Added 'odt' output option to pandoc:
Not a writer, but a module that inserts the output of the OpenDocument
writer into an ODT archive.  This replaces markdown2odt.

+ Added odt output option to Main.hs.
+ Added default for .odt output file.
+ Changed defaults so that .xml and .sgml aren't automatically DocBook.
+ Added odt writer to Text.Pandoc exports.
+ Added Text.Pandoc.ODT and included in pandoc.cabal.
+ Added reference.odt as data-file in pandoc.cabal.
+ Handle picture links in OpenDocument files using xml library.
+ Removed markdown2odt and references from Makefile, README, man.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1345 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-07-31 23:16:02 +00:00
+								import System.Directory
-												Removed unneeded imports.

											
										
										
											2015-09-26 22:56:13 -07:00
+								import System.FilePath (splitDirectories, isPathSeparator)
-												MediaBag:  ensure that / is always used as path separator.

											
										
										
											2015-09-26 22:40:58 -07:00
+								import qualified System.FilePath.Posix as Posix
-												Text.Pandoc.Shared: Removed fetchItem, fetchItem'.

Made changes where these are used, so that the version
of fetchItem from PandocMonad can be used instead.

											
										
										
											2016-12-12 13:51:20 +01:00
+								import Text.Pandoc.MIME (MimeType)
 								import System.FilePath ( (</>) )
-												Use functions from Text.Pandoc.Generic instead of processWith(M).

											
										
										
											2010-12-24 13:39:27 -08:00
+								import Data.Generics (Typeable, Data)
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								import qualified Control.Monad.State as S
-												Put 'warn' in MonadIO.  Add warnings for math conversions in docx.

											
										
										
											2016-11-22 10:56:59 +01:00
+								import Control.Monad.Trans (MonadIO (..))
-												Improved fetching of external resources.

* In Shared, openURL and fetchItem now return an Either, for
  better error handling. (API change.)
* Better error message when fetching a URL fails with
  `--self-contained`.
* EPUB writer: If resource not found, skip it, as in Docx writer.
* Closes #916.

											
										
										
											2013-07-18 20:58:14 -07:00
+								import qualified Control.Exception as E
-												Make safeRead safe.

Fixes #1801

											
										
										
											2015-02-18 18:40:36 +00:00
+								import Control.Monad (msum, unless, MonadPlus(..))
-												Shared: Added splitStringWithIndices.

This is like splitWithIndices, but it is sensitive to distinctions
between wide, combining, and regular characters.

											
										
										
											2012-01-27 00:37:46 -08:00
+								import Text.Pandoc.Pretty (charWidth)
-												More changes to avoid compiler warnings on ghc 7.10.

* CPP around deprecated `parseTime`.
* Text.Pandoc.Compat.Locale -> Text.Pandoc.Compat.Time,
  now exports Data.Time.

											
										
										
											2015-10-14 10:05:17 -07:00
+								import Text.Pandoc.Compat.Time
-												Let reference.docx/odt behave as if they are virtual data files.

Now they are constructed on the fly from their components,
but we now allow them to be printed with `--print-default-data-file`
and to override the defaults if placed in the user data directory.

Shared now exports getDefaultReferenceDocx and getDefaultReferenceODT
(API change).

These functions have been removed from the Docx and ODT writers.

Shared.readDataFile has been modified so that requests to read
a reference.odt or reference.docx will use these functions to
generate the files.

											
										
										
											2015-06-28 22:30:21 -07:00
+								import Data.Time.Clock.POSIX
-												Revert "More intelligent handling of text encodings."

This reverts commit 7272735b3d413a644fd9ab01eeae8ae9cd5a925b.

											
										
										
											2012-09-23 22:53:34 -07:00
+								import System.IO (stderr)
-												Moved withTempDir from PDF to Shared, export from Shared.

API change.

											
										
										
											2014-07-30 12:29:04 -07:00
+								import System.IO.Temp
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
+								import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
 								         renderOptions)
-												Remove Compat.Monoid

This was only necessary for GHC versions with base below 4.5
(i.e., ghc < 7.4).

											
										
										
											2016-08-30 13:43:50 -04:00
+								import Data.Monoid ((<>))
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								import qualified Data.ByteString as BS
-												Shared.openURL:  Properly handle data: URIs.

											
										
										
											2013-05-28 12:48:17 -07:00
+								import qualified Data.ByteString.Char8 as B8
-												Shared: Fixed bug in openURL with data: URIs.

Previously the base-64 encoded bytestring was returned.
We now decode it so it's a proper image!

This should fix parsing of data: URLs.

											
										
										
											2013-11-19 13:15:24 -08:00
+								import Data.ByteString.Base64 (decodeLenient)
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								import qualified Data.Text as T (toUpper, pack, unpack)
-												Let reference.docx/odt behave as if they are virtual data files.

Now they are constructed on the fly from their components,
but we now allow them to be printed with `--print-default-data-file`
and to override the defaults if placed in the user data directory.

Shared now exports getDefaultReferenceDocx and getDefaultReferenceODT
(API change).

These functions have been removed from the Docx and ODT writers.

Shared.readDataFile has been modified so that requests to read
a reference.odt or reference.docx will use these functions to
generate the files.

											
										
										
											2015-06-28 22:30:21 -07:00
+								import Data.ByteString.Lazy (toChunks, fromChunks)
 								import qualified Data.ByteString.Lazy as BL
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								import Paths_pandoc (version)
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
-												fix build failure with --flags=-https

The issue was originally reported by CasperVector as
    https://github.com/gentoo-haskell/gentoo-haskell/issues/427

Mainfests itself as a builg failure full of missing zip-archive
names:

    src/Text/Pandoc/Shared.hs:756:49:
        Not in scope: type constructor or class ‘Archive’
    src/Text/Pandoc/Shared.hs:777:38: Not in scope: ‘toEntry’
    src/Text/Pandoc/Shared.hs:786:19:
        Not in scope: ‘toArchive’
        Perhaps you meant ‘mbArchive’ (line 778)

Included Codec.Archive.Zip unconditionally.

Signed-off-by: Sergei Trofimovich <siarheit@google.com>

											
										
										
											2015-07-30 22:39:25 +01:00
+								import Codec.Archive.Zip
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								#ifdef EMBED_DATA_FILES
-												Minor fixes to previous commit.

* Instead of defining readmeFile in Text.Pandoc.Data (which we forgot
  to export anyway), we simply add a record for "README" to the
  `dataFiles` lookup table.  This allows simplifying some of the code
  for `readDefaultDataFile` in SHared.

* As a bonus, `pandoc --print-default-data-file README` now works.

											
										
										
											2015-06-28 20:59:18 -07:00
+								import Text.Pandoc.Data (dataFiles)
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								#else
 								import Paths_pandoc (getDataFileName)
 								#endif
-												Removed dependency on conduit.

* http-conduit flag is now https.
* Instead of http-conduit, we depend on http-client and http-client-tls.

											
										
										
											2014-05-18 22:04:39 -07:00
+								#ifdef HTTP_CLIENT
-												Fix warning for parseURl import.

											
										
										
											2016-07-03 22:26:08 -07:00
+								import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
-												Removed dependency on conduit.

* http-conduit flag is now https.
* Instead of http-conduit, we depend on http-client and http-client-tls.

											
										
										
											2014-05-18 22:04:39 -07:00
+								                            Request(port,host))
-												Revert "Remove http-client CPP conditionals."

This reverts commit 3f82471355286d33f2d73329c29a51c47bf76ad7.

We might want to revert the requirement of http-client 0.5,
as this is not yet in Stackage and that is starting to
cause problems.  I can't recall why it is there.

											
										
										
											2016-10-13 12:32:10 +02:00
+								import Network.HTTP.Client (parseRequest)
 								import Network.HTTP.Client (newManager)
-												Removed dependency on conduit.

* http-conduit flag is now https.
* Instead of http-conduit, we depend on http-client and http-client-tls.

											
										
										
											2014-05-18 22:04:39 -07:00
+								import Network.HTTP.Client.Internal (addProxy)
 								import Network.HTTP.Client.TLS (tlsManagerSettings)
-												Shared.openURL:  Set proxy with value of http_proxy env variable.

Note:  proxies with non-root paths are not supported,
because of limitations in http-conduit.

Closes #1211.

											
										
										
											2014-04-05 10:58:32 -07:00
+								import System.Environment (getEnv)
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								import Network.HTTP.Types.Header ( hContentType)
-												Added `withSocketsDo` around http conduit code in `openURL`.

This should address #1080, but further testing on Windows is needed
before we can close the bug.

											
										
										
											2013-12-09 22:35:22 -08:00
+								import Network (withSocketsDo)
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								#else
 								import Network.URI (parseURI)
 								import Network.HTTP (findHeader, rspBody,
 								                     RequestMethod(..), HeaderName(..), mkRequest)
 								import Network.Browser (browse, setAllowRedirects, setOutHandler, request)
 								#endif
-												Fixed treatment of unicode characters in URIs.

* Added stringToURI to Shared.  This is used in the HTML
  writer for all URIs.  It properly URI-encodes high
  characters (> 127), leaving everything else (including
  symbols and spaces) the same.

* Modified unsanitaryURI to allow UTF8 characters in a URI.
  (First, we convert the URI to URI-encoded octets, then we
  pass through parseURIReference.)
  This resolves gitit Issue #99. Previously
  '[abc](http://gitit.net/测试)' would not be rendered as
  a link when --sanitize was selected.

											
										
										
											2010-03-22 19:29:37 -07:00
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								-- | Version number of pandoc library.
 								pandocVersion :: String
 								pandocVersion = showVersion version
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								--
 								-- List processing
 								--
 								-- | Split list by groups of one or more sep.
-												Shared: Made splitBy take a test instead of an element.

											
										
										
											2010-12-21 08:41:24 -08:00
+								splitBy :: (a -> Bool) -> [a] -> [[a]]
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								splitBy _ [] = []
-												Shared: Made splitBy take a test instead of an element.

											
										
										
											2010-12-21 08:41:24 -08:00
+								splitBy isSep lst =
 								  let (first, rest) = break isSep lst
 								      rest'         = dropWhile isSep rest
 								  in  first:(splitBy isSep rest')
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								splitByIndices :: [Int] -> [a] -> [[a]]
 								splitByIndices [] lst = [lst]
-												Shared: Added splitStringWithIndices.

This is like splitWithIndices, but it is sensitive to distinctions
between wide, combining, and regular characters.

											
										
										
											2012-01-27 00:37:46 -08:00
+								splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x)  xs) rest)
 								  where (first, rest) = splitAt x lst
 								-- | Split string into chunks divided at specified indices.
 								splitStringByIndices :: [Int] -> [Char] -> [[Char]]
 								splitStringByIndices [] lst = [lst]
 								splitStringByIndices (x:xs) lst =
 								  let (first, rest) = splitAt' x lst in
 								  first : (splitStringByIndices (map (\y -> y - x) xs) rest)
 								splitAt' :: Int -> [Char] -> ([Char],[Char])
 								splitAt' _ []          = ([],[])
 								splitAt' n xs | n <= 0 = ([],xs)
 								splitAt' n (x:xs)      = (x:ys,zs)
 								  where (ys,zs) = splitAt' (n - charWidth x) xs
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Replace each occurrence of one sublist in a list with another.
 								substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
 								substitute _ _ [] = []
-												Slight code cleanup on substitute function.

											
										
										
											2010-07-11 12:22:18 -07:00
+								substitute [] _ xs = xs
 								substitute target replacement lst@(x:xs) =
-												Use `stripPrefix` where appropriate.

											
										
										
											2014-08-03 14:44:39 +04:00
+								    case stripPrefix target lst of
 								      Just lst' -> replacement ++ substitute target replacement lst'
 								      Nothing   -> x : substitute target replacement xs
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
-												Shared:  Added ordNub.

API change (adds export).

											
										
										
											2014-06-03 11:00:54 -07:00
+								ordNub :: (Ord a) => [a] -> [a]
 								ordNub l = go Set.empty l
 								  where
 								    go _ [] = []
 								    go s (x:xs) = if x `Set.member` s then go s xs
 								                                      else x : go (Set.insert x s) xs
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								--
 								-- Text processing
 								--
 								-- | Returns an association list of backslash escapes for the
 								-- designated characters.
 								backslashEscapes :: [Char]    -- ^ list of special characters to escape
 								                 -> [(Char, String)]
 								backslashEscapes = map (\ch -> (ch, ['\\',ch]))
 								-- | Escape a string of characters, using an association list of
 								-- characters and strings.
 								escapeStringUsing :: [(Char, String)] -> String -> String
 								escapeStringUsing _ [] = ""
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								escapeStringUsing escapeTable (x:xs) =
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								  case (lookup x escapeTable) of
 								       Just str  -> str ++ rest
 								       Nothing   -> x:rest
 								  where rest = escapeStringUsing escapeTable xs
 								-- | Strip trailing newlines from string.
 								stripTrailingNewlines :: String -> String
 								stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
 								-- | Remove leading and trailing space (including newlines) from string.
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								trim :: String -> String
 								trim = triml . trimr
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Remove leading space (including newlines) from string.
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								triml :: String -> String
-												Revert "Use -XNoImplicitPrelude and 'import Prelude' explicitly."

This reverts commit c423dbb5a34c2d1195020e0f0ca3aae883d0749b.

											
										
										
											2015-11-09 10:08:22 -08:00
+								triml = dropWhile (`elem` " \r\n\t")
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Remove trailing space (including newlines) from string.
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								trimr :: String -> String
 								trimr = reverse . triml . reverse
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Strip leading and trailing characters from string
 								stripFirstAndLast :: String -> String
 								stripFirstAndLast str =
 								  drop 1 $ take ((length str) - 1) str
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								camelCaseToHyphenated :: String -> String
 								camelCaseToHyphenated [] = ""
 								camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
 								  a:'-':(toLower b):(camelCaseToHyphenated rest)
 								camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
 								-- | Convert number < 4000 to uppercase roman numeral.
 								toRomanNumeral :: Int -> String
 								toRomanNumeral x =
 								  if x >= 4000 || x < 0
 								     then "?"
 								     else case x of
 								              _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
 								              _ | x >= 900  -> "CM" ++ toRomanNumeral (x - 900)
 								              _ | x >= 500  -> "D" ++ toRomanNumeral (x - 500)
 								              _ | x >= 400  -> "CD" ++ toRomanNumeral (x - 400)
 								              _ | x >= 100  -> "C" ++ toRomanNumeral (x - 100)
 								              _ | x >= 90   -> "XC" ++ toRomanNumeral (x - 90)
 								              _ | x >= 50   -> "L"  ++ toRomanNumeral (x - 50)
 								              _ | x >= 40   -> "XL" ++ toRomanNumeral (x - 40)
 								              _ | x >= 10   -> "X" ++ toRomanNumeral (x - 10)
-												Shared: Fixed bug in toRomanNumeral.

9 and numbers ending in 9 would end with "IXIV."
Thanks to Jesse Rosenthal.  Closes #1249.

											
										
										
											2014-04-15 19:53:11 -07:00
+								              _ | x == 9    -> "IX"
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								              _ | x >= 5    -> "V" ++ toRomanNumeral (x - 5)
-												Shared: Fixed bug in toRomanNumeral.

9 and numbers ending in 9 would end with "IXIV."
Thanks to Jesse Rosenthal.  Closes #1249.

											
										
										
											2014-04-15 19:53:11 -07:00
+								              _ | x == 4    -> "IV"
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								              _ | x >= 1    -> "I" ++ toRomanNumeral (x - 1)
 								              _             -> ""
-												Percent-encode more special characters in URLs.

HTML, LaTeX writers adjusted.
The special characters are '<','>','|','"','{','}','[',']','^', '`'.

Closes #1640, #2377.

											
										
										
											2015-10-11 17:06:26 -07:00
+								-- | Escape whitespace and some punctuation characters in URI.
-												Shared: enamed stringToURI -> escapeURI.

											
										
										
											2010-03-23 15:05:33 -07:00
+								escapeURI :: String -> String
-												Percent-encode more special characters in URLs.

HTML, LaTeX writers adjusted.
The special characters are '<','>','|','"','{','}','[',']','^', '`'.

Closes #1640, #2377.

											
										
										
											2015-10-11 17:06:26 -07:00
+								escapeURI = escapeURIString (not . needsEscaping)
 								  where needsEscaping c = isSpace c || c `elem`
 								                           ['<','>','|','"','{','}','[',']','^', '`']
-												Shared: Export  unescapeURI.

											
										
										
											2010-03-23 15:34:53 -07:00
-												Changed order of functions in Shared.

											
										
										
											2010-07-06 23:17:06 -07:00
+								-- | Convert tabs to spaces and filter out DOS line endings.
 								-- Tabs will be preserved if tab stop is set to 0.
 								tabFilter :: Int       -- ^ Tab stop
 								          -> String    -- ^ Input
 								          -> String
 								tabFilter tabStop =
 								  let go _ [] = ""
 								      go _ ('\n':xs) = '\n' : go tabStop xs
 								      go _ ('\r':'\n':xs) = '\n' : go tabStop xs
 								      go _ ('\r':xs) = '\n' : go tabStop xs
 								      go spsToNextStop ('\t':xs) =
 								        if tabStop == 0
 								           then '\t' : go tabStop xs
 								           else replicate spsToNextStop ' ' ++ go tabStop xs
 								      go 1 (x:xs) =
 								        x : go tabStop xs
 								      go spsToNextStop (x:xs) =
 								        x : go (spsToNextStop - 1) xs
 								  in  go tabStop
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
+								--
 								-- Date/time
 								--
-												Shared: normalizeDate should reject illegal years.

We only allow years between 1601 and 9999, inclusive. The ISO 8601
actually says that years are supposed to start with 1583, but MS Word
only allows 1601-9999. This should stop corrupted word files if the date
is out of that range, or is parsed incorrectly.

											
										
										
											2016-07-09 15:37:47 -04:00
+								-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We
 								-- limit years to the range 1601-9999 (ISO 8601 accepts greater than
-												Shared: improve year sanity check in normalizeDate

Previously we parsed a list of dates, took the first one, and then
tested its year range. That meant that if the first one failed, we
returned nothing, regardless of what the others did. Now we test for
sanity before running `msum` over the list of Maybe values. Anything
failing the test will be Nothing, so will not be a candidate.

											
										
										
											2016-07-09 17:03:39 -04:00
+								-- or equal to 1583, but MS Word only accepts dates starting 1601).
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
+								normalizeDate :: String -> Maybe String
-												Shared: improve year sanity check in normalizeDate

Previously we parsed a list of dates, took the first one, and then
tested its year range. That meant that if the first one failed, we
returned nothing, regardless of what the others did. Now we test for
sanity before running `msum` over the list of Maybe values. Anything
failing the test will be Nothing, so will not be a candidate.

											
										
										
											2016-07-09 17:03:39 -04:00
+								normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
 								  (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
 								  where rejectBadYear day = case toGregorian day of
 								          (y, _, _) | y >= 1601 && y <= 9999 -> Just day
 								          _ -> Nothing
-												Shared: normalizeDate should reject illegal years.

We only allow years between 1601 and 9999, inclusive. The ISO 8601
actually says that years are supposed to start with 1583, but MS Word
only allows 1601-9999. This should stop corrupted word files if the date
is out of that range, or is parsed incorrectly.

											
										
										
											2016-07-09 15:37:47 -04:00
+								        parsetimeWith =
-												More changes to avoid compiler warnings on ghc 7.10.

* CPP around deprecated `parseTime`.
* Text.Pandoc.Compat.Locale -> Text.Pandoc.Compat.Time,
  now exports Data.Time.

											
										
										
											2015-10-14 10:05:17 -07:00
+								#if MIN_VERSION_time(1,5,0)
 								             parseTimeM True defaultTimeLocale
 								#else
 								             parseTime defaultTimeLocale
 								#endif
-												Shared: normalizeDate should reject illegal years.

We only allow years between 1601 and 9999, inclusive. The ISO 8601
actually says that years are supposed to start with 1583, but MS Word
only allows 1601-9999. This should stop corrupted word files if the date
is out of that range, or is parsed incorrectly.

											
										
										
											2016-07-09 15:37:47 -04:00
+								        formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
-												Shared: Add further formats for `normalizeDate`

We want to avoid illegal dates -- in particular years with greater than
four digits. We attempt to parse series of digits first as `%Y%m%d`, then
`%Y%m`, and finally `%Y`.

											
										
										
											2016-07-09 11:13:25 -04:00
+								                    "%d %B %Y", "%b. %d, %Y", "%B %d, %Y",
 								                    "%Y%m%d", "%Y%m", "%Y"]
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								--
 								-- Pandoc block and inline list processing
 								--
 								-- | Generate infinite lazy list of markers for an ordered list,
 								-- depending on list attributes.
 								orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								orderedListMarkers (start, numstyle, numdelim) =
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								  let singleton c = [c]
 								      nums = case numstyle of
 								                     DefaultStyle -> map show [start..]
-												Merge branch 'atlists'.  Added auto-numbered example lists.

											
										
										
											2010-07-11 22:47:52 -07:00
+								                     Example      -> map show [start..]
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     Decimal      -> map show [start..]
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								                     UpperAlpha   -> drop (start - 1) $ cycle $
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                                     map singleton ['A'..'Z']
 								                     LowerAlpha   -> drop (start - 1) $ cycle $
 								                                     map singleton ['a'..'z']
 								                     UpperRoman   -> map toRomanNumeral [start..]
 								                     LowerRoman   -> map (map toLower . toRomanNumeral) [start..]
 								      inDelim str = case numdelim of
 								                            DefaultDelim -> str ++ "."
 								                            Period       -> str ++ "."
 								                            OneParen     -> str ++ ")"
 								                            TwoParens    -> "(" ++ str ++ ")"
 								  in  map inDelim nums
 								-- | Normalize a list of inline elements: remove leading and trailing
 								-- @Space@ elements, collapse double @Space@s into singles, and
 								-- remove empty Str elements.
 								normalizeSpaces :: [Inline] -> [Inline]
-												Rewrote normalizeSpaces (mostly aesthetic reasons).

											
										
										
											2010-12-07 20:10:21 -08:00
+								normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
-												Slightly more efficient normalizeSpaces.

											
										
										
											2012-07-24 22:12:18 -07:00
+								 where  cleanup []              = []
 								        cleanup (Space:rest)    = case dropWhile isSpaceOrEmpty rest of
 								                                        []     -> []
 								                                        (x:xs) -> Space : x : cleanup xs
-												Rewrote normalizeSpaces (mostly aesthetic reasons).

											
										
										
											2010-12-07 20:10:21 -08:00
+								        cleanup ((Str ""):rest) = cleanup rest
-												Slightly more efficient normalizeSpaces.

											
										
										
											2012-07-24 22:12:18 -07:00
+								        cleanup (x:rest)        = x : cleanup rest
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
-												normalize:  Normalize spaces too.

In normal form, Space elements only occur to separate two non-Space
elements.  So, we never have [Space], or [, ..., Space].

											
										
										
											2011-02-04 13:22:31 -08:00
+								isSpaceOrEmpty :: Inline -> Bool
 								isSpaceOrEmpty Space = True
 								isSpaceOrEmpty (Str "") = True
 								isSpaceOrEmpty _ = False
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								-- | Extract the leading and trailing spaces from inside an inline element
-												Implemented SoftBreak and new `--wrap` option.

Added threefold wrapping option.

* Command line option: deprecated `--no-wrap`, added
  `--wrap=[auto|none|preserve]`
* Added WrapOption, exported from Text.Pandoc.Options
* Changed type of writerWrapText in WriterOptions from
  Bool to WrapOption.
* Modified Text.Pandoc.Shared functions for SoftBreak.
* Supported SoftBreak in writers.
* Updated tests.
* Updated README.

Closes #1701.

											
										
										
											2015-12-11 15:58:11 -08:00
+								-- and place them outside the element.  SoftBreaks count as Spaces for
 								-- these purposes.
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
-												Removed space at ends of lines in source.

											
										
										
											2014-07-12 22:57:22 -07:00
+								extractSpaces f is =
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								  let contents = B.unMany is
 								      left  = case viewl contents of
-												Implemented SoftBreak and new `--wrap` option.

Added threefold wrapping option.

* Command line option: deprecated `--no-wrap`, added
  `--wrap=[auto|none|preserve]`
* Added WrapOption, exported from Text.Pandoc.Options
* Changed type of writerWrapText in WriterOptions from
  Bool to WrapOption.
* Modified Text.Pandoc.Shared functions for SoftBreak.
* Supported SoftBreak in writers.
* Updated tests.
* Updated README.

Closes #1701.

											
										
										
											2015-12-11 15:58:11 -08:00
+								                    (Space :< _)     -> B.space
 								                    (SoftBreak :< _) -> B.softbreak
 								                    _                -> mempty
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								      right = case viewr contents of
-												Implemented SoftBreak and new `--wrap` option.

Added threefold wrapping option.

* Command line option: deprecated `--no-wrap`, added
  `--wrap=[auto|none|preserve]`
* Added WrapOption, exported from Text.Pandoc.Options
* Changed type of writerWrapText in WriterOptions from
  Bool to WrapOption.
* Modified Text.Pandoc.Shared functions for SoftBreak.
* Supported SoftBreak in writers.
* Updated tests.
* Updated README.

Closes #1701.

											
										
										
											2015-12-11 15:58:11 -08:00
+								                    (_ :> Space)     -> B.space
 								                    (_ :> SoftBreak) -> B.softbreak
 								                    _                -> mempty in
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								  (left <> f (B.trimInlines . B.Many $ contents) <> right)
-												Better comment on removeFormatting.

											
										
										
											2014-07-13 15:10:27 -07:00
+								-- | Extract inlines, removing formatting.
-												Shared: Generalized type of removeFormatting.

											
										
										
											2014-07-13 14:56:20 -07:00
+								removeFormatting :: Walkable Inline a => a -> [Inline]
-												Shared:  Added removeFormatting.

API change (addition of exported function).

											
										
										
											2014-07-13 10:13:22 -07:00
+								removeFormatting = query go . walk deNote
 								  where go :: Inline -> [Inline]
 								        go (Str xs)     = [Str xs]
 								        go Space        = [Space]
-												Implemented SoftBreak and new `--wrap` option.

Added threefold wrapping option.

* Command line option: deprecated `--no-wrap`, added
  `--wrap=[auto|none|preserve]`
* Added WrapOption, exported from Text.Pandoc.Options
* Changed type of writerWrapText in WriterOptions from
  Bool to WrapOption.
* Modified Text.Pandoc.Shared functions for SoftBreak.
* Supported SoftBreak in writers.
* Updated tests.
* Updated README.

Closes #1701.

											
										
										
											2015-12-11 15:58:11 -08:00
+								        go SoftBreak    = [SoftBreak]
-												Shared:  Added removeFormatting.

API change (addition of exported function).

											
										
										
											2014-07-13 10:13:22 -07:00
+								        go (Code _ x)   = [Str x]
 								        go (Math _ x)   = [Str x]
 								        go LineBreak    = [Space]
 								        go _            = []
-												Factored out deNote in Shared.

											
										
										
											2017-01-15 22:15:35 +01:00
 								deNote :: Inline -> Inline
 								deNote (Note _) = Str ""
 								deNote x        = x
-												Shared:  Added removeFormatting.

API change (addition of exported function).

											
										
										
											2014-07-13 10:13:22 -07:00
-												Generalized type of stringify.

											
										
										
											2013-08-28 08:43:51 -07:00
+								-- | Convert pandoc structure to a string with formatting removed.
-												Shared:  Changed stringify so it ignores notes.

Also documented this in README.

											
										
										
											2013-08-16 13:22:27 -07:00
+								-- Footnotes are skipped (since we don't want their contents in link
 								-- labels).
-												Generalized type of stringify.

											
										
										
											2013-08-28 08:43:51 -07:00
+								stringify :: Walkable Inline a => a -> String
-												Shared:  Changed stringify so it ignores notes.

Also documented this in README.

											
										
										
											2013-08-16 13:22:27 -07:00
+								stringify = query go . walk deNote
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
+								  where go :: Inline -> [Char]
 								        go Space = " "
-												Implemented SoftBreak and new `--wrap` option.

Added threefold wrapping option.

* Command line option: deprecated `--no-wrap`, added
  `--wrap=[auto|none|preserve]`
* Added WrapOption, exported from Text.Pandoc.Options
* Changed type of writerWrapText in WriterOptions from
  Bool to WrapOption.
* Modified Text.Pandoc.Shared functions for SoftBreak.
* Supported SoftBreak in writers.
* Updated tests.
* Updated README.

Closes #1701.

											
										
										
											2015-12-11 15:58:11 -08:00
+								        go SoftBreak = " "
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
+								        go (Str x) = x
-												Add support for attributes in inline Code.

Additional related changes:

* URLs in Code in autolinks now use class "url".
* Require highlighting-kate 0.2.8.2, which omits the final <br/> tag,
  essential for inline code.

											
										
										
											2011-01-26 20:44:25 -08:00
+								        go (Code _ x) = x
-												Added new prettyprinting module.

* Added Text.Pandoc.Pretty.
  This is better suited for pandoc than the 'pretty' package.
  One advantage is that we now get proper wrapping; Emph [Inline]
  is no longer treated as a big unwrappable unit. Previously
  we only got breaks for spaces at the "outer level." We can also
  more easily avoid doubled blank lines.  Performance is
  significantly better as well.

* Removed Text.Pandoc.Blocks.
  Text.Pandoc.Pretty allows you to define blocks and concatenate
  them.

* Modified markdown, RST, org readers to use Text.Pandoc.Pretty
  instead of Text.PrettyPrint.HughesPJ.

* Text.Pandoc.Shared:  Added writerColumns to WriterOptions.

* Markdown, RST, Org writers now break text at writerColumns.

* Added --columns command-line option, which sets stColumns
  and writerColumns.

* Table parsing:  If the size of the header > stColumns,
  use the header size as 100% for purposes of calculating
  relative widths of columns.

											
										
										
											2010-12-12 20:09:14 -08:00
+								        go (Math _ x) = x
-												EPUB TOC: replace literal "<br/>" with space.

Closes #2105.

											
										
										
											2015-08-10 16:58:47 -07:00
+								        go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
-												Shared: Use stringify to simplify inlineListToIdentifier.

											
										
										
											2010-12-19 10:13:36 -08:00
+								        go LineBreak = " "
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
+								        go _ = ""
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								-- | Bring all regular text in a pandoc structure to uppercase.
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								--
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								-- This function correctly handles cases where a lowercase character doesn't
 								-- match to a single uppercase character – e.g. “Straße” would be converted
 								-- to “STRASSE”, not “STRAßE”.
 								capitalize :: Walkable Inline a => a -> a
 								capitalize = walk go
 								  where go :: Inline -> Inline
 								        go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
 								        go x       = x
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
+								-- | Change final list item from @Para@ to @Plain@ if the list contains
 								-- no other @Para@ blocks.  Like compactify, but operates on @Blocks@ rather
 								-- than @[Block]@.
 								compactify' :: [Blocks]  -- ^ List of list items (each a list of blocks)
 								           -> [Blocks]
 								compactify' [] = []
 								compactify' items =
 								  let (others, final) = (init items, last items)
 								  in  case reverse (B.toList final) of
 								           (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
 								                            -- if this is only Para, change to Plain
 								                            [_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
 								                            _   -> items
 								           _      -> items
-												Fixed runtime error with compactify'DL on certain lists.

Closes #1452.  Added test.

											
										
										
											2014-07-25 10:53:04 -07:00
+								-- | Like @compactify'@, but acts on items of definition lists.
-												Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.

											
										
										
											2014-04-19 14:48:35 +02:00
+								compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
 								compactify'DL items =
 								  let defs = concatMap snd items
-												Fixed runtime error with compactify'DL on certain lists.

Closes #1452.  Added test.

											
										
										
											2014-07-25 10:53:04 -07:00
+								  in  case reverse (concatMap B.toList defs) of
 								           (Para x:xs)
 								             | not (any isPara xs) ->
 								                   let (t,ds) = last items
 								                       lastDef = B.toList $ last ds
 								                       ds' = init ds ++
 								                             if null lastDef
 								                                then [B.fromList lastDef]
 								                                else [B.fromList $ init lastDef ++ [Plain x]]
 								                    in init items ++ [(t, ds')]
 								             | otherwise           -> items
 								           _                       -> items
-												Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.

											
										
										
											2014-04-19 14:48:35 +02:00
-												Shared: add function combining lines using LineBreak

The `linesToBlock` function takes a list of lines and combines them by appending
a hard `LineBreak` to each line and concatenating the result, putting the result
it into a `Para`. This is most useful when dealing when converting `LineBlock`
elements.

											
										
										
											2016-10-13 08:46:38 +02:00
+								-- | Combine a list of lines by adding hard linebreaks.
 								combineLines :: [[Inline]] -> [Inline]
 								combineLines = intercalate [LineBreak]
 								-- | Convert a list of lines into a paragraph with hard line breaks. This is
 								--   useful e.g. for rudimentary support of LineBlock elements in writers.
 								linesToPara :: [[Inline]] -> Block
 								linesToPara = Para . combineLines
-												Changed heuristic in compactify.

compactify has to decide whether a Para that ends a list is a Para
intentionally, or just because of the blank lines at the end of
every list.  In the latter case the Para is turned to a Plain.

The old heuristic was:  change final Para to Plain iff the other
items all end in Plain.  This produces bad results when, for example,
an item contains just a Plain and an HTML comment, as it

- a
<!--
- b
-->
-c

The new heuristic:  change final Para to Plain iff the other items
don't contain a Para.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1616 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-11-01 02:38:18 +00:00
+								isPara :: Block -> Bool
 								isPara (Para _) = True
 								isPara _        = False
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Data structure for defining hierarchical Pandoc documents
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								data Element = Blk Block
-												Shared:  Changed type of Element.

Sec now includes a field for Attr rather than just String
(the identifier).

Note, this is an API change.

											
										
										
											2013-02-12 20:13:23 -08:00
+								             | Sec Int [Int] Attr [Inline] [Element]
 								             --    lvl  num attributes label    contents
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								             deriving (Eq, Read, Show, Typeable, Data)
-												Use query instead of queryWith.

											
										
										
											2013-08-10 18:13:38 -07:00
+								instance Walkable Inline Element where
 								  walk f (Blk x) = Blk (walk f x)
 								  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
 								  walkM f (Blk x) = Blk `fmap` walkM f x
 								  walkM f (Sec lev nums attr ils elts) = do
 								    ils' <- walkM f ils
 								    elts' <- walkM f elts
 								    return $ Sec lev nums attr ils' elts'
 								  query f (Blk x) = query f x
 								  query f (Sec _ _ _ ils elts) = query f ils <> query f elts
 								instance Walkable Block Element where
 								  walk f (Blk x) = Blk (walk f x)
 								  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
 								  walkM f (Blk x) = Blk `fmap` walkM f x
 								  walkM f (Sec lev nums attr ils elts) = do
 								    ils' <- walkM f ils
 								    elts' <- walkM f elts
 								    return $ Sec lev nums attr ils' elts'
 								  query f (Blk x) = query f x
 								  query f (Sec _ _ _ ils elts) = query f ils <> query f elts
-												Shared:  Fixed uniqueIdent so it behaves as described in README.

Previously some characters that are illegal in HTML identifiers,
such as '<', were being allowed in header identifiers.  The logic
has now been fixed. Thanks to Xyne for reporting.

											
										
										
											2010-03-28 22:29:31 -07:00
+								-- | Convert Pandoc inline list to plain text identifier.  HTML
 								-- identifiers must start with a letter, and may contain only
-												Don't allow colon in autogenerated HTML identifiers.

They have a special meaning in XML (e.g. in EPUB).

											
										
										
											2010-07-04 23:26:04 -07:00
+								-- letters, digits, and the characters _-.
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								inlineListToIdentifier :: [Inline] -> String
-												Shared:  Fixed uniqueIdent so it behaves as described in README.

Previously some characters that are illegal in HTML identifiers,
such as '<', were being allowed in header identifiers.  The logic
has now been fixed. Thanks to Xyne for reporting.

											
										
										
											2010-03-28 22:29:31 -07:00
+								inlineListToIdentifier =
-												Shared: Use stringify to simplify inlineListToIdentifier.

											
										
										
											2010-12-19 10:13:36 -08:00
+								  dropWhile (not . isAlpha) . intercalate "-" . words .
 								    map (nbspToSp . toLower) .
-												Revert "Use -XNoImplicitPrelude and 'import Prelude' explicitly."

This reverts commit c423dbb5a34c2d1195020e0f0ca3aae883d0749b.

											
										
										
											2015-11-09 10:08:22 -08:00
+								    filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
-												Shared: Use stringify to simplify inlineListToIdentifier.

											
										
										
											2010-12-19 10:13:36 -08:00
+								    stringify
 								 where nbspToSp '\160'     =  ' '
 								       nbspToSp x          =  x
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
 								hierarchicalize :: [Block] -> [Element]
-												Revert "Shared.hierarchicalize: Don't number subsections of unnumbered sections."

This reverts commit 2a46042661a088096ac54097db5cd3674438bb63.

											
										
										
											2014-07-21 20:47:18 -07:00
+								hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
 								hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
 								hierarchicalizeWithIds [] = return []
 								hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
 								  lastnum <- S.get
 								  let lastnum' = take level lastnum
 								  let newnum = case length lastnum' of
 								                    x | "unnumbered" `elem` classes -> []
 								                      | x >= level -> init lastnum' ++ [last lastnum' + 1]
 								                      | otherwise -> lastnum ++
 								                           replicate (level - length lastnum - 1) 0 ++ [1]
 								  unless (null newnum) $ S.put newnum
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								  let (sectionContents, rest) = break (headerLtEq level) xs
-												Revert "Shared.hierarchicalize: Don't number subsections of unnumbered sections."

This reverts commit 2a46042661a088096ac54097db5cd3674438bb63.

											
										
										
											2014-07-21 20:47:18 -07:00
+								  sectionContents' <- hierarchicalizeWithIds sectionContents
 								  rest' <- hierarchicalizeWithIds rest
-												Shared:  Changed type of Element.

Sec now includes a field for Attr rather than just String
(the identifier).

Note, this is an API change.

											
										
										
											2013-02-12 20:13:23 -08:00
+								  return $ Sec level newnum attr title' sectionContents' : rest'
-												Changed hierarchicalize so it treats references div as top-level header.

Fixes a bug with `--section-divs`, where the final references section
added by pandoc-citeproc, enclosed in its own div, got put in the
div for the section previous to it.

This fixes #2294.  Longer term, we might think about how hierarchicalize
should interact with Div elements.

											
										
										
											2015-07-12 13:57:14 -07:00
+								hierarchicalizeWithIds ((Div ("",["references"],[])
 								                         (Header level (ident,classes,kvs) title' : xs)):ys) =
 								  hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs)
 								                           title') : (xs ++ ys))
-												Revert "Shared.hierarchicalize: Don't number subsections of unnumbered sections."

This reverts commit 2a46042661a088096ac54097db5cd3674438bb63.

											
										
										
											2014-07-21 20:47:18 -07:00
+								hierarchicalizeWithIds (x:rest) = do
 								  rest' <- hierarchicalizeWithIds rest
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								  return $ (Blk x) : rest'
 								headerLtEq :: Int -> Block -> Bool
-												Added Attr field to Header.

Previously header ids were autogenerated by the writers.
Now they are generated (unless supplied explicitly) in the
markdown parser, if the `header_identifiers` extension is
selected.

In addition, the textile reader now supports id attributes on
headers.

											
										
										
											2012-10-29 22:45:52 -07:00
+								headerLtEq level (Header l _ _) = l <= level
-												Changed hierarchicalize so it treats references div as top-level header.

Fixes a bug with `--section-divs`, where the final references section
added by pandoc-citeproc, enclosed in its own div, got put in the
div for the section previous to it.

This fixes #2294.  Longer term, we might think about how hierarchicalize
should interact with Div elements.

											
										
										
											2015-07-12 13:57:14 -07:00
+								headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _))  = l <= level
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								headerLtEq _ _ = False
-												Shared: Export uniqueIdent, don't allow tilde in identifier.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1894 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2010-03-16 06:45:52 +00:00
+								-- | Generate a unique identifier from a list of inlines.
 								-- Second argument is a list of already used identifiers.
-												Changed type of Shared.uniqueIdent argument from [String] to Set String.

This avoids performance problems in documents with many identically
named headers.

Closes #2671.

											
										
										
											2016-01-22 10:16:47 -08:00
+								uniqueIdent :: [Inline] -> Set.Set String -> String
-												Added odt reader

Fully implemented features:

* Paragraphs
* Headers
* Basic styling
* Unordered lists
* Ordered lists
* External Links
* Internal Links
* Footnotes, Endnotes
* Blockquotes

Partly implemented features:

* Citations
  Very basic, but pandoc can't do much more
* Tables
  No headers, no sizing, limited styling

											
										
										
											2015-07-23 09:06:14 +02:00
+								uniqueIdent title' usedIdents
 								  =  let baseIdent = case inlineListToIdentifier title' of
-												Shared:  Fixed uniqueIdent so it behaves as described in README.

Previously some characters that are illegal in HTML identifiers,
such as '<', were being allowed in header identifiers.  The logic
has now been fixed. Thanks to Xyne for reporting.

											
										
										
											2010-03-28 22:29:31 -07:00
+								                        ""   -> "section"
 								                        x    -> x
-												Added odt reader

Fully implemented features:

* Paragraphs
* Headers
* Basic styling
* Unordered lists
* Ordered lists
* External Links
* Internal Links
* Footnotes, Endnotes
* Blockquotes

Partly implemented features:

* Citations
  Very basic, but pandoc can't do much more
* Tables
  No headers, no sizing, limited styling

											
										
										
											2015-07-23 09:06:14 +02:00
+								         numIdent n = baseIdent ++ "-" ++ show n
-												Changed type of Shared.uniqueIdent argument from [String] to Set String.

This avoids performance problems in documents with many identically
named headers.

Closes #2671.

											
										
										
											2016-01-22 10:16:47 -08:00
+								     in  if baseIdent `Set.member` usedIdents
 								           then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								                  Just x  -> numIdent x
 								                  Nothing -> baseIdent   -- if we have more than 60,000, allow repeats
-												Added odt reader

Fully implemented features:

* Paragraphs
* Headers
* Basic styling
* Unordered lists
* Ordered lists
* External Links
* Internal Links
* Footnotes, Endnotes
* Blockquotes

Partly implemented features:

* Citations
  Very basic, but pandoc can't do much more
* Tables
  No headers, no sizing, limited styling

											
										
										
											2015-07-23 09:06:14 +02:00
+								           else baseIdent
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | True if block is a Header block.
 								isHeaderBlock :: Block -> Bool
-												Added Attr field to Header.

Previously header ids were autogenerated by the writers.
Now they are generated (unless supplied explicitly) in the
markdown parser, if the `header_identifiers` extension is
selected.

In addition, the textile reader now supports id attributes on
headers.

											
										
										
											2012-10-29 22:45:52 -07:00
+								isHeaderBlock (Header _ _ _) = True
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								isHeaderBlock _ = False
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
+								-- | Shift header levels up or down.
 								headerShift :: Int -> Pandoc -> Pandoc
-												Use walk, walkM in place of bottomUp, bottomUpM when possible.

They are significantly faster.

											
										
										
											2013-08-10 18:45:00 -07:00
+								headerShift n = walk shift
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
+								  where shift :: Block -> Block
-												Added Attr field to Header.

Previously header ids were autogenerated by the writers.
Now they are generated (unless supplied explicitly) in the
markdown parser, if the `header_identifiers` extension is
selected.

In addition, the textile reader now supports id attributes on
headers.

											
										
										
											2012-10-29 22:45:52 -07:00
+								        shift (Header level attr inner) = Header (level + n) attr inner
 								        shift x                         = x
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
-												Shared: export isTightList.

											
										
										
											2013-01-07 20:12:05 -08:00
+								-- | Detect if a list is tight.
 								isTightList :: [[Block]] -> Bool
-												HLint: Use all

Replace `and . map` with `all`.

											
										
										
											2013-12-19 17:06:27 -05:00
+								isTightList = all firstIsPlain
-												Shared: export isTightList.

											
										
										
											2013-01-07 20:12:05 -08:00
+								  where firstIsPlain (Plain _ : _) = True
 								        firstIsPlain _             = False
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								-- | Set a field of a 'Meta' object.  If the field already has a value,
 								-- convert it into a list with the new value appended to the old value(s).
 								addMetaField :: ToMetaValue a
 								             => String
 								             -> a
 								             -> Meta
 								             -> Meta
 								addMetaField key val (Meta meta) =
 								  Meta $ M.insertWith combine key (toMetaValue val) meta
-												Shared addMetaField:  if old and new values both lists, concatenate.

											
										
										
											2014-05-12 13:05:42 -07:00
+								  where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								        combine newval x             = MetaList [x, newval]
-												Shared addMetaField:  if old and new values both lists, concatenate.

											
										
										
											2014-05-12 13:05:42 -07:00
+								        tolist (MetaList ys)         = ys
 								        tolist y                     = [y]
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
 								-- | Create 'Meta' from old-style title, authors, date.  This is
 								-- provided to ease the transition from the old API.
 								makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
 								makeMeta title authors date =
 								      addMetaField "title" (B.fromList title)
 								    $ addMetaField "author" (map B.fromList authors)
 								    $ addMetaField "date" (B.fromList date)
 								    $ nullMeta
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
+								--
 								-- TagSoup HTML handling
 								--
 								-- | Render HTML tags.
 								renderTags' :: [Tag String] -> String
 								renderTags' = renderTagsOptions
-												HLint: use `elem` and `notElem`

Replaces long conditional chains with calls to `elem` and `notElem`.

											
										
										
											2013-12-19 20:19:24 -05:00
+								               renderOptions{ optMinimize = matchTags ["hr", "br", "img",
 								                                                       "meta", "link"]
 								                            , optRawTag   = matchTags ["script", "style"] }
 								              where matchTags = \tags -> flip elem tags . map toLower
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
-												Improved template handling:

+ Split template haskell functions into new module,
  Text.Pandoc.TH
+ Distinguish contentsOf and binaryContentsOf; the former
  uses text mode in Windows, while the latter uses binary mode


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1368 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-08-02 17:22:55 +00:00
+								--
 								-- File handling
 								--
-												Reworked Text.Pandoc.ODT to use zip-archive instead of calling external 'zip'.

+ Removed utf8-string and xml-light modules, and unneeded content.xml.
+ Removed code for building reference.odt from Setup.hs.
  The ODT is now built using template haskell in Text.Pandoc.ODT.
+ Removed copyright statements for utf8-string and xml modules,
  since they are no longer included in the source.
+ README: Removed claim that 'zip' is needed for ODT production.
+ Removed dependency on 'zip' from debian/control.
+ Text.Pandoc.Shared: Removed withTempDir, added inDirectory.
+ Added makeZip to Text.Pandoc.TH.
+ pandoc.cabal: Added dependencies on old-time, zip-archive, and utf8-string.
  Added markdown2pdf files to extra-sources list.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1417 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-09-04 02:51:28 +00:00
+								-- | Perform an IO action in a directory, returning to starting directory.
 								inDirectory :: FilePath -> IO a -> IO a
-												fix inDirectory to reset to the original directory in case an exception occurs

											
										
										
											2014-10-08 23:25:01 +02:00
+								inDirectory path action = E.bracket
 								                             getCurrentDirectory
 								                             setCurrentDirectory
 								                             (const $ setCurrentDirectory path >> action)
-												Removed TH module; refactored LaTeXMathML not to use TH.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1692 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-12-31 01:11:23 +00:00
-												Let reference.docx/odt behave as if they are virtual data files.

Now they are constructed on the fly from their components,
but we now allow them to be printed with `--print-default-data-file`
and to override the defaults if placed in the user data directory.

Shared now exports getDefaultReferenceDocx and getDefaultReferenceODT
(API change).

These functions have been removed from the Docx and ODT writers.

Shared.readDataFile has been modified so that requests to read
a reference.odt or reference.docx will use these functions to
generate the files.

											
										
										
											2015-06-28 22:30:21 -07:00
+								getDefaultReferenceDocx :: Maybe FilePath -> IO Archive
 								getDefaultReferenceDocx datadir = do
 								  let paths = ["[Content_Types].xml",
 								               "_rels/.rels",
 								               "docProps/app.xml",
 								               "docProps/core.xml",
 								               "word/document.xml",
 								               "word/fontTable.xml",
 								               "word/footnotes.xml",
 								               "word/numbering.xml",
 								               "word/settings.xml",
 								               "word/webSettings.xml",
 								               "word/styles.xml",
 								               "word/_rels/document.xml.rels",
 								               "word/_rels/footnotes.xml.rels",
 								               "word/theme/theme1.xml"]
 								  let toLazy = fromChunks . (:[])
 								  let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$>
 								                                          getCurrentTime
 								                            contents <- toLazy <$> readDataFile datadir
 								                                                       ("docx/" ++ path)
 								                            return $ toEntry path epochtime contents
 								  mbArchive <- case datadir of
 								                    Nothing   -> return Nothing
 								                    Just d    -> do
 								                       exists <- doesFileExist (d </> "reference.docx")
 								                       if exists
 								                          then return (Just (d </> "reference.docx"))
 								                          else return Nothing
 								  case mbArchive of
 								     Just arch -> toArchive <$> BL.readFile arch
 								     Nothing   -> foldr addEntryToArchive emptyArchive <$>
 								                     mapM pathToEntry paths
 								getDefaultReferenceODT :: Maybe FilePath -> IO Archive
 								getDefaultReferenceODT datadir = do
 								  let paths = ["mimetype",
 								               "manifest.rdf",
 								               "styles.xml",
 								               "content.xml",
 								               "meta.xml",
 								               "settings.xml",
 								               "Configurations2/accelerator/current.xml",
 								               "Thumbnails/thumbnail.png",
 								               "META-INF/manifest.xml"]
 								  let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
 								                            contents <- (fromChunks . (:[])) `fmap`
 								                                          readDataFile datadir ("odt/" ++ path)
 								                            return $ toEntry path epochtime contents
 								  mbArchive <- case datadir of
 								                    Nothing   -> return Nothing
 								                    Just d    -> do
 								                       exists <- doesFileExist (d </> "reference.odt")
 								                       if exists
 								                          then return (Just (d </> "reference.odt"))
 								                          else return Nothing
 								  case mbArchive of
 								     Just arch -> toArchive <$> BL.readFile arch
 								     Nothing   -> foldr addEntryToArchive emptyArchive <$>
 								                     mapM pathToEntry paths
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								readDefaultDataFile :: FilePath -> IO BS.ByteString
-												Let reference.docx/odt behave as if they are virtual data files.

Now they are constructed on the fly from their components,
but we now allow them to be printed with `--print-default-data-file`
and to override the defaults if placed in the user data directory.

Shared now exports getDefaultReferenceDocx and getDefaultReferenceODT
(API change).

These functions have been removed from the Docx and ODT writers.

Shared.readDataFile has been modified so that requests to read
a reference.odt or reference.docx will use these functions to
generate the files.

											
										
										
											2015-06-28 22:30:21 -07:00
+								readDefaultDataFile "reference.docx" =
 								  (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing
 								readDefaultDataFile "reference.odt" =
 								  (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								readDefaultDataFile fname =
 								#ifdef EMBED_DATA_FILES
-												Shared: readDefaultDataFile: normalize the paths.

This fixes bugs in `--self-contained` on pandoc compiled with
`embed_data_files`.  The bugs affect (a) paths containing `..`, (b)
Windows, where `\` is path separator.

Closes #833.

											
										
										
											2013-04-19 23:02:38 -07:00
+								  case lookup (makeCanonical fname) dataFiles of
-												Shared:  Better error message when default data file not found.

Listing the full path can confuse people who are using
`--self-contained`:  they might have intended the file to be
found locally.  So now we just list the data file name.

											
										
										
											2013-08-15 17:21:56 -07:00
+								    Nothing       -> err 97 $ "Could not find data file " ++ fname
-												Made `embed_data_files` flag work.

											
										
										
											2012-12-29 18:45:20 -08:00
+								    Just contents -> return contents
-												Removed unneeded imports.

											
										
										
											2015-09-26 22:56:13 -07:00
+								  where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
-												Shared: readDefaultDataFile: normalize the paths.

This fixes bugs in `--self-contained` on pandoc compiled with
`embed_data_files`.  The bugs affect (a) paths containing `..`, (b)
Windows, where `\` is path separator.

Closes #833.

											
										
										
											2013-04-19 23:02:38 -07:00
+								        transformPathParts = reverse . foldl go []
 								        go as     "."  = as
 								        go (_:as) ".." = as
 								        go as     x    = x : as
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								#else
-												Minor fixes to previous commit.

* Instead of defining readmeFile in Text.Pandoc.Data (which we forgot
  to export anyway), we simply add a record for "README" to the
  `dataFiles` lookup table.  This allows simplifying some of the code
  for `readDefaultDataFile` in SHared.

* As a bonus, `pandoc --print-default-data-file README` now works.

											
										
										
											2015-06-28 20:59:18 -07:00
+								  getDataFileName fname' >>= checkExistence >>= BS.readFile
-												Rename README to MANUAL.txt

											
										
										
											2016-07-20 14:12:57 +02:00
+								    where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
-												Shared: Added findFirstFile, findDataFile, refactored readDataFile.

											
										
										
											2010-11-19 22:13:30 -08:00
-												New method for producing man pages.

This change adds `--man1` and `--man5` options to pandoc, so
pandoc can generate its own man pages.

It removes the old overly complex method of building a separate
executable (but not installing it) just to create the man pages.

The man pages are no longer automatically created in the build
process.

The man/ directory has been removed.  The man page templates
have been moved to data/.

New unexported module:  Text.Pandoc.ManPages.

Text.Pandoc.Data now exports readmeFile, and `readDataFile`
knows how to find README.

Closes #2190.

											
										
										
											2015-06-28 14:39:17 -07:00
+								checkExistence :: FilePath -> IO FilePath
 								checkExistence fn = do
 								  exists <- doesFileExist fn
 								  if exists
 								     then return fn
 								     else err 97 ("Could not find data file " ++ fn)
-												Remove compiler warning with embed_data_files.

											
										
										
											2015-10-17 21:21:52 -07:00
+								#endif
-												New method for producing man pages.

This change adds `--man1` and `--man5` options to pandoc, so
pandoc can generate its own man pages.

It removes the old overly complex method of building a separate
executable (but not installing it) just to create the man pages.

The man pages are no longer automatically created in the build
process.

The man/ directory has been removed.  The man page templates
have been moved to data/.

New unexported module:  Text.Pandoc.ManPages.

Text.Pandoc.Data now exports readmeFile, and `readDataFile`
knows how to find README.

Closes #2190.

											
										
										
											2015-06-28 14:39:17 -07:00
-												Added --data-dir option.

+ This specifies a user data directory. If not specified, will default
  to ~/.pandoc on unix or Application Data\pandoc on Windows.
  Files placed in the user data directory will override system default
  data files.
+ Added datadir parameter to readDataFile, saveOpenDocumentAsODT,
  latexMathMLScript, s5HeaderIncludes, and getTemplate.  Removed
  getDefaultTemplate.
+ Updated documentation.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1809 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2010-01-14 05:54:38 +00:00
+								-- | Read file from specified user data directory or, if not found there, from
 								-- Cabal data directory.
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								readDataFile Nothing fname = readDefaultDataFile fname
 								readDataFile (Just userDir) fname = do
 								  exists <- doesFileExist (userDir </> fname)
 								  if exists
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								     then BS.readFile (userDir </> fname)
-												Data files changes.

* Added `embed_data_files` flag.  (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
  the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
  using functions from Paths_pandoc directly.

											
										
										
											2012-12-29 17:44:02 -08:00
+								     else readDefaultDataFile fname
 								-- | Same as 'readDataFile' but returns a String instead of a ByteString.
 								readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
 								readDataFileUTF8 userDir fname =
 								  UTF8.toString `fmap` readDataFile userDir fname
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
-												Refactoring:

* Shared now exports fetchItem (instead of getItem) and openURL
* fetchItem has different parameters than getItem and includes
  some logic formerly in the ODT and Docx writers
* getItem still used in SelfContained

											
										
										
											2013-01-11 16:19:06 -08:00
+								-- | Read from a URL and return raw data and maybe mime type.
-												MIME cleanup.

  * Create a type synonym for MIME type (instead of `String`).
  * Add `getMimeTypeDef` function.
  * Avoid recreating MIME type `Map`s every time.
  * Move “Formula-...” case handling into `getMimeType`.

											
										
										
											2014-08-17 20:42:30 +04:00
+								openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
-												Shared.openURL:  Properly handle data: URIs.

											
										
										
											2013-05-28 12:48:17 -07:00
+								openURL u
-												Fixed shadowing error.

											
										
										
											2015-11-09 11:25:05 -08:00
+								  | Just u'' <- stripPrefix "data:" u =
 								    let mime     = takeWhile (/=',') u''
 								        contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u''
-												Shared: Fixed bug in openURL with data: URIs.

Previously the base-64 encoded bytestring was returned.
We now decode it so it's a proper image!

This should fix parsing of data: URLs.

											
										
										
											2013-11-19 13:15:24 -08:00
+								    in  return $ Right (decodeLenient contents, Just mime)
-												Removed dependency on conduit.

* http-conduit flag is now https.
* Instead of http-conduit, we depend on http-client and http-client-tls.

											
										
										
											2014-05-18 22:04:39 -07:00
+								#ifdef HTTP_CLIENT
-												Added `withSocketsDo` around http conduit code in `openURL`.

This should address #1080, but further testing on Windows is needed
before we can close the bug.

											
										
										
											2013-12-09 22:35:22 -08:00
+								  | otherwise = withSocketsDo $ E.try $ do
-												CPP workaround for deprecation of parseUrl in http-client.

											
										
										
											2016-07-03 21:29:47 -07:00
+								     let parseReq = parseRequest
-												Shared.openURL:  Set proxy with value of http_proxy env variable.

Note:  proxies with non-root paths are not supported,
because of limitations in http-conduit.

Closes #1211.

											
										
										
											2014-04-05 10:58:32 -07:00
+								     (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
-												CPP workaround for deprecation of parseUrl in http-client.

											
										
										
											2016-07-03 21:29:47 -07:00
+								     req <- parseReq u
 								     req' <- case proxy of
 								                     Left _   -> return req
 								                     Right pr -> (parseReq pr >>= \r ->
 								                                  return $ addProxy (host r) (port r) req)
 								                                  `mplus` return req
-												Use newManager instead of withManager in recent http-client.

This avoids a deprecation warning.

											
										
										
											2015-07-21 16:32:44 -07:00
+								     resp <- newManager tlsManagerSettings >>= httpLbs req'
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								     return (BS.concat $ toChunks $ responseBody resp,
 								             UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
 								#else
-												Improved fetching of external resources.

* In Shared, openURL and fetchItem now return an Either, for
  better error handling. (API change.)
* Better error message when fetching a URL fails with
  `--self-contained`.
* EPUB writer: If resource not found, skip it, as in Docx writer.
* Closes #916.

											
										
										
											2013-07-18 20:58:14 -07:00
+								  | otherwise = E.try $ getBodyAndMimeType `fmap` browse
-												Put 'warn' in MonadIO.  Add warnings for math conversions in docx.

											
										
										
											2016-11-22 10:56:59 +01:00
+								              (do liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
-												Less verbose output from --self-contained.

Now one gets "Fetching [URL]..." for each URL fetched, but not
the full header.

											
										
										
											2013-05-04 21:53:06 -07:00
+								                  setOutHandler $ const (return ())
-												Shared.openURL:  Print diagnostic output to stderr, not stdout.

											
										
										
											2013-05-01 10:55:06 -07:00
+								                  setAllowRedirects True
 								                  request (getRequest' u'))
-												Shared:  openURL now follows redirects.

Closes #701.

											
										
										
											2013-03-26 08:31:45 -07:00
+								  where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r)
 								        getRequest' uriString = case parseURI uriString of
 								                                   Nothing -> error ("Not a valid URL: " ++
 								                                                        uriString)
 								                                   Just v  -> mkRequest GET v
-												Shared.openURL:  URL-escape pipe characters.

Even though these are legal, Network.URI doesn't regard them
as legal in URLs.  So we escape them first.

Closes #535.

											
										
										
											2013-04-28 22:57:17 -07:00
+								        u' = escapeURIString (/= '|') u  -- pipes are rejected by Network.URI
-												cabal:  Added http-conduit flag, which allows fetching https resources.

It also brings in a large number of dependencies (http-conduit and its
dependencies), which is why for now it is an optional flag.

Closes #820.

											
										
										
											2013-07-04 22:40:23 -07:00
+								#endif
-												Move getItem from SelfContained to Share; export getItem.

											
										
										
											2013-01-11 11:30:31 -08:00
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
+								--
 								-- Error reporting
 								--
-												Shared:  put err into MonadIO.

											
										
										
											2016-12-10 11:40:14 +01:00
+								err :: MonadIO m => Int -> String -> m a
 								err exitCode msg = liftIO $ do
-												Shared: changed err and warn output.

Don't print program name in either case.
Print [warning] for warnings.

											
										
										
											2016-12-03 23:12:09 +01:00
+								  UTF8.hPutStrLn stderr msg
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
+								  exitWith $ ExitFailure exitCode
 								  return undefined
-												Put 'warn' in MonadIO.  Add warnings for math conversions in docx.

											
										
										
											2016-11-22 10:56:59 +01:00
+								warn :: MonadIO m => String -> m ()
 								warn msg = liftIO $ do
-												Shared: changed err and warn output.

Don't print program name in either case.
Print [warning] for warnings.

											
										
										
											2016-12-03 23:12:09 +01:00
+								  UTF8.hPutStrLn stderr $ "[warning] " ++ msg
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
-												Move utility error functions to Text.Pandoc.Shared

											
										
										
											2015-02-18 21:05:47 +00:00
+								mapLeft :: (a -> b) -> Either a c -> Either b c
 								mapLeft f (Left x) = Left (f x)
 								mapLeft _ (Right x) = Right x
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								-- | Remove intermediate "." and ".." directories from a path.
 								--
-												Fixed haddock comment.

											
										
										
											2014-08-13 13:59:18 -07:00
+								-- > collapseFilePath "./foo" == "foo"
 								-- > collapseFilePath "/bar/../baz" == "/baz"
 								-- > collapseFilePath "/../baz" == "/../baz"
 								-- > collapseFilePath "parent/foo/baz/../bar" ==  "parent/foo/bar"
 								-- > collapseFilePath "parent/foo/baz/../../bar" ==  "parent/bar"
 								-- > collapseFilePath "parent/foo/.." ==  "parent"
 								-- > collapseFilePath "/parent/foo/../../bar" ==  "/bar"
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								collapseFilePath :: FilePath -> FilePath
-												MediaBag:  ensure that / is always used as path separator.

											
										
										
											2015-09-26 22:40:58 -07:00
+								collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								  where
 								    go rs "." = rs
 								    go r@(p:rs) ".." = case p of
 								                            ".." -> ("..":r)
-												Shared: Make collapseFilePath OS-agnostic

											
										
										
											2014-09-25 12:42:53 +01:00
+								                            (checkPathSeperator -> Just True) -> ("..":r)
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								                            _ -> rs
-												MediaBag:  ensure that / is always used as path separator.

											
										
										
											2015-09-26 22:40:58 -07:00
+								    go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								    go rs x = x:rs
-												Shared: Make collapseFilePath OS-agnostic

											
										
										
											2014-09-25 12:42:53 +01:00
+								    isSingleton [] = Nothing
 								    isSingleton [x] = Just x
 								    isSingleton _ = Nothing
 								    checkPathSeperator = fmap isPathSeparator . isSingleton
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
+								--
 								-- File selection from the archive
 								--
 								filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, BL.ByteString)]
 								filteredFilesFromArchive zf f =
 								  mapMaybe (fileAndBinary zf) (filter f (filesInArchive zf))
 								  where
 								    fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
-												Use bind function instead of pattern matching

											
										
										
											2016-10-17 16:58:53 +02:00
+								    fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								---
 								--- Squash blocks into inlines
 								---
 								blockToInlines :: Block -> [Inline]
 								blockToInlines (Plain ils) = ils
 								blockToInlines (Para ils) = ils
-												Shared: add function combining lines using LineBreak

The `linesToBlock` function takes a list of lines and combines them by appending
a hard `LineBreak` to each line and concatenating the result, putting the result
it into a `Para`. This is most useful when dealing when converting `LineBlock`
elements.

											
										
										
											2016-10-13 08:46:38 +02:00
+								blockToInlines (LineBlock lns) = combineLines lns
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								blockToInlines (CodeBlock attr str) = [Code attr str]
 								blockToInlines (RawBlock fmt str) = [RawInline fmt str]
-												Shared: Add BlockQuote to blocksToInlines

											
										
										
											2016-06-22 13:41:53 -04:00
+								blockToInlines (BlockQuote blks) = blocksToInlines blks
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								blockToInlines (OrderedList _ blkslst) =
 								  concatMap blocksToInlines blkslst
 								blockToInlines (BulletList blkslst) =
 								  concatMap blocksToInlines blkslst
 								blockToInlines (DefinitionList pairslst) =
 								  concatMap f pairslst
 								  where
 								    f (ils, blkslst) = ils ++
 								      [Str ":", Space] ++
 								      (concatMap blocksToInlines blkslst)
 								blockToInlines (Header _ _  ils) = ils
 								blockToInlines (HorizontalRule) = []
 								blockToInlines (Table _ _ _ headers rows) =
 								  intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl
 								  where
 								    tbl = headers : rows
 								blockToInlines (Div _ blks) = blocksToInlines blks
 								blockToInlines Null = []
 								blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline]
 								blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks
 								blocksToInlines :: [Block] -> [Inline]
 								blocksToInlines = blocksToInlinesWithSep [Space, Str "¶", Space]
-												Rename README to MANUAL.txt

											
										
										
											2016-07-20 14:12:57 +02:00
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								--
 								-- Safe read
 								--
-												Make safeRead safe.

Fixes #1801

											
										
										
											2015-02-18 18:40:36 +00:00
+								safeRead :: (MonadPlus m, Read a) => String -> m a
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								safeRead s = case reads s of
-												Removed `--strict`, added extensions to writer/reader names.

* The `--strict` option has been removed.
* Instead of using `--strict`, one can now use `strict` instead of
  `markdown` as an input or output format name.
* The `--enable` and `--disable` optinos have been removed.
* It is now possible to enable or disable specific extensions
  by appending them (with '+' or '-') to the writer or reader
  name.  For example `pandoc -f markdown-footnotes+hard_line_breaks`.
* The lhs extensions are now implemented this way, too; you can
  use either `+lhs` or `+literate_haskell`.

											
										
										
											2012-08-09 20:19:06 -07:00
+								                  (d,x):_
 								                    | all isSpace x -> return d
-												Make safeRead safe.

Fixes #1801

											
										
										
											2015-02-18 18:40:36 +00:00
+								                  _                 -> mzero
-												Moved withTempDir from PDF to Shared, export from Shared.

API change.

											
										
										
											2014-07-30 12:29:04 -07:00
 								--
 								-- Temp directory
 								--
 								withTempDir :: String -> (FilePath -> IO a) -> IO a
 								withTempDir =
 								#ifdef _WINDOWS
 								  withTempDirectory "."
 								#else
 								  withSystemTempDirectory
 								#endif