pandoc/src/Text/Pandoc/Shared.hs

{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
    FlexibleContexts, ScopedTypeVariables, PatternGuards,
    ViewPatterns #-}
{-
Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-}

{- |
   Module      : Text.Pandoc.Shared
   Copyright   : Copyright (C) 2006-2017 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Utility functions and definitions used by the various Pandoc modules.
-}
module Text.Pandoc.Shared (
                     -- * List processing
                     splitBy,
                     splitByIndices,
                     splitStringByIndices,
                     substitute,
                     ordNub,
                     -- * Text processing
                     backslashEscapes,
                     escapeStringUsing,
                     stripTrailingNewlines,
                     trim,
                     triml,
                     trimr,
                     stripFirstAndLast,
                     camelCaseToHyphenated,
                     toRomanNumeral,
                     escapeURI,
                     tabFilter,
                     crFilter,
                     -- * Date/time
                     normalizeDate,
                     -- * Pandoc block and inline list processing
                     orderedListMarkers,
                     extractSpaces,
                     removeFormatting,
                     deNote,
                     stringify,
                     capitalize,
                     compactify,
                     compactifyDL,
                     linesToPara,
                     Element (..),
                     hierarchicalize,
                     uniqueIdent,
                     inlineListToIdentifier,
                     isHeaderBlock,
                     headerShift,
                     isTightList,
                     addMetaField,
                     makeMeta,
                     eastAsianLineBreakFilter,
                     -- * TagSoup HTML handling
                     renderTags',
                     -- * File handling
                     inDirectory,
                     collapseFilePath,
                     filteredFilesFromArchive,
                     -- * URI handling
                     schemes,
                     isURI,
                     -- * Error handling
                     mapLeft,
                     -- * for squashing blocks
                     blocksToInlines,
                     -- * Safe read
                     safeRead,
                     -- * Temp directory
                     withTempDir,
                     -- * Version
                     pandocVersion
                    ) where

import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
import qualified Text.Pandoc.Builder as B
import Data.Char ( toLower, isLower, isUpper, isAlpha,
                   isLetter, isDigit, isSpace )
import Data.List ( find, stripPrefix, intercalate )
import Data.Maybe (mapMaybe)
import Data.Version ( showVersion )
import qualified Data.Map as M
import Network.URI ( URI(uriScheme), escapeURIString, parseURI )
import qualified Data.Set as Set
import System.Directory
import System.FilePath (splitDirectories, isPathSeparator)
import qualified System.FilePath.Posix as Posix
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State.Strict as S
import qualified Control.Exception as E
import Control.Monad (msum, unless, MonadPlus(..))
import Text.Pandoc.Pretty (charWidth)
import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.Compat.Time
import System.IO.Temp
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
         renderOptions)
import Data.Monoid ((<>))
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
import qualified Data.Text as T
import qualified Data.ByteString.Lazy as BL
import Paths_pandoc (version)
import Codec.Archive.Zip

-- | Version number of pandoc library.
pandocVersion :: String
pandocVersion = showVersion version

--
-- List processing
--

-- | Split list by groups of one or more sep.
splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy _ [] = []
splitBy isSep lst =
  let (first, rest) = break isSep lst
      rest'         = dropWhile isSep rest
  in  first:(splitBy isSep rest')

splitByIndices :: [Int] -> [a] -> [[a]]
splitByIndices [] lst = [lst]
splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x)  xs) rest)
  where (first, rest) = splitAt x lst

-- | Split string into chunks divided at specified indices.
splitStringByIndices :: [Int] -> [Char] -> [[Char]]
splitStringByIndices [] lst = [lst]
splitStringByIndices (x:xs) lst =
  let (first, rest) = splitAt' x lst in
  first : (splitStringByIndices (map (\y -> y - x) xs) rest)

splitAt' :: Int -> [Char] -> ([Char],[Char])
splitAt' _ []          = ([],[])
splitAt' n xs | n <= 0 = ([],xs)
splitAt' n (x:xs)      = (x:ys,zs)
  where (ys,zs) = splitAt' (n - charWidth x) xs

-- | Replace each occurrence of one sublist in a list with another.
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
substitute _ _ [] = []
substitute [] _ xs = xs
substitute target replacement lst@(x:xs) =
    case stripPrefix target lst of
      Just lst' -> replacement ++ substitute target replacement lst'
      Nothing   -> x : substitute target replacement xs

ordNub :: (Ord a) => [a] -> [a]
ordNub l = go Set.empty l
  where
    go _ [] = []
    go s (x:xs) = if x `Set.member` s then go s xs
                                      else x : go (Set.insert x s) xs

--
-- Text processing
--

-- | Returns an association list of backslash escapes for the
-- designated characters.
backslashEscapes :: [Char]    -- ^ list of special characters to escape
                 -> [(Char, String)]
backslashEscapes = map (\ch -> (ch, ['\\',ch]))

-- | Escape a string of characters, using an association list of
-- characters and strings.
escapeStringUsing :: [(Char, String)] -> String -> String
escapeStringUsing _ [] = ""
escapeStringUsing escapeTable (x:xs) =
  case (lookup x escapeTable) of
       Just str  -> str ++ rest
       Nothing   -> x:rest
  where rest = escapeStringUsing escapeTable xs

-- | Strip trailing newlines from string.
stripTrailingNewlines :: String -> String
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse

-- | Remove leading and trailing space (including newlines) from string.
trim :: String -> String
trim = triml . trimr

-- | Remove leading space (including newlines) from string.
triml :: String -> String
triml = dropWhile (`elem` " \r\n\t")

-- | Remove trailing space (including newlines) from string.
trimr :: String -> String
trimr = reverse . triml . reverse

-- | Strip leading and trailing characters from string
stripFirstAndLast :: String -> String
stripFirstAndLast str =
  drop 1 $ take ((length str) - 1) str

-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
camelCaseToHyphenated :: String -> String
camelCaseToHyphenated [] = ""
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
  a:'-':(toLower b):(camelCaseToHyphenated rest)
camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)

-- | Convert number < 4000 to uppercase roman numeral.
toRomanNumeral :: Int -> String
toRomanNumeral x
  | x >= 4000 || x < 0 = "?"
  | x >= 1000 = "M" ++ toRomanNumeral (x - 1000)
  | x >= 900  = "CM" ++ toRomanNumeral (x - 900)
  | x >= 500  = "D" ++ toRomanNumeral (x - 500)
  | x >= 400  = "CD" ++ toRomanNumeral (x - 400)
  | x >= 100  = "C" ++ toRomanNumeral (x - 100)
  | x >= 90   = "XC" ++ toRomanNumeral (x - 90)
  | x >= 50   = "L"  ++ toRomanNumeral (x - 50)
  | x >= 40   = "XL" ++ toRomanNumeral (x - 40)
  | x >= 10   = "X" ++ toRomanNumeral (x - 10)
  | x == 9    = "IX"
  | x >= 5    = "V" ++ toRomanNumeral (x - 5)
  | x == 4    = "IV"
  | x >= 1    = "I" ++ toRomanNumeral (x - 1)
  | otherwise = ""

-- | Escape whitespace and some punctuation characters in URI.
escapeURI :: String -> String
escapeURI = escapeURIString (not . needsEscaping)
  where needsEscaping c = isSpace c || c `elem`
                           ['<','>','|','"','{','}','[',']','^', '`']

-- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0.
tabFilter :: Int       -- ^ Tab stop
          -> T.Text    -- ^ Input
          -> T.Text
tabFilter 0 = id
tabFilter tabStop = T.unlines . map go . T.lines
  where go s =
         let (s1, s2) = T.break (== '\t') s
         in  if T.null s2
                then s1
                else s1 <> T.replicate
                       (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
                       <> go (T.drop 1 s2)

-- | Strip out DOS line endings.
crFilter :: T.Text -> T.Text
crFilter = T.filter (/= '\r')

--
-- Date/time
--

-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We
-- limit years to the range 1601-9999 (ISO 8601 accepts greater than
-- or equal to 1583, but MS Word only accepts dates starting 1601).
normalizeDate :: String -> Maybe String
normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
  (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
  where rejectBadYear day = case toGregorian day of
          (y, _, _) | y >= 1601 && y <= 9999 -> Just day
          _ -> Nothing
        parsetimeWith =
#if MIN_VERSION_time(1,5,0)
             parseTimeM True defaultTimeLocale
#else
             parseTime defaultTimeLocale
#endif
        formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
                    "%d %B %Y", "%b. %d, %Y", "%B %d, %Y",
                    "%Y%m%d", "%Y%m", "%Y"]

--
-- Pandoc block and inline list processing
--

-- | Generate infinite lazy list of markers for an ordered list,
-- depending on list attributes.
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
orderedListMarkers (start, numstyle, numdelim) =
  let singleton c = [c]
      nums = case numstyle of
                     DefaultStyle -> map show [start..]
                     Example      -> map show [start..]
                     Decimal      -> map show [start..]
                     UpperAlpha   -> drop (start - 1) $ cycle $
                                     map singleton ['A'..'Z']
                     LowerAlpha   -> drop (start - 1) $ cycle $
                                     map singleton ['a'..'z']
                     UpperRoman   -> map toRomanNumeral [start..]
                     LowerRoman   -> map (map toLower . toRomanNumeral) [start..]
      inDelim str = case numdelim of
                            DefaultDelim -> str ++ "."
                            Period       -> str ++ "."
                            OneParen     -> str ++ ")"
                            TwoParens    -> "(" ++ str ++ ")"
  in  map inDelim nums

-- | Extract the leading and trailing spaces from inside an inline element
-- and place them outside the element.  SoftBreaks count as Spaces for
-- these purposes.
extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
extractSpaces f is =
  let contents = B.unMany is
      left  = case viewl contents of
                    (Space :< _)     -> B.space
                    (SoftBreak :< _) -> B.softbreak
                    _                -> mempty
      right = case viewr contents of
                    (_ :> Space)     -> B.space
                    (_ :> SoftBreak) -> B.softbreak
                    _                -> mempty in
  (left <> f (B.trimInlines . B.Many $ contents) <> right)

-- | Extract inlines, removing formatting.
removeFormatting :: Walkable Inline a => a -> [Inline]
removeFormatting = query go . walk (deNote . deQuote)
  where go :: Inline -> [Inline]
        go (Str xs)     = [Str xs]
        go Space        = [Space]
        go SoftBreak    = [SoftBreak]
        go (Code _ x)   = [Str x]
        go (Math _ x)   = [Str x]
        go LineBreak    = [Space]
        go _            = []

deNote :: Inline -> Inline
deNote (Note _) = Str ""
deNote x        = x

deQuote :: Inline -> Inline
deQuote (Quoted SingleQuote xs) =
  Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"])
deQuote (Quoted DoubleQuote xs) =
  Span ("",[],[]) (Str "\8220" : xs ++ [Str "\8221"])
deQuote x = x

-- | Convert pandoc structure to a string with formatting removed.
-- Footnotes are skipped (since we don't want their contents in link
-- labels).
stringify :: Walkable Inline a => a -> String
stringify = query go . walk (deNote . deQuote)
  where go :: Inline -> [Char]
        go Space = " "
        go SoftBreak = " "
        go (Str x) = x
        go (Code _ x) = x
        go (Math _ x) = x
        go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
        go LineBreak = " "
        go _ = ""

-- | Bring all regular text in a pandoc structure to uppercase.
--
-- This function correctly handles cases where a lowercase character doesn't
-- match to a single uppercase character – e.g. “Straße” would be converted
-- to “STRASSE”, not “STRAßE”.
capitalize :: Walkable Inline a => a -> a
capitalize = walk go
  where go :: Inline -> Inline
        go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
        go x       = x

-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks.  Like compactify, but operates on @Blocks@ rather
-- than @[Block]@.
compactify :: [Blocks]  -- ^ List of list items (each a list of blocks)
           -> [Blocks]
compactify [] = []
compactify items =
  let (others, final) = (init items, last items)
  in  case reverse (B.toList final) of
           (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
                            -- if this is only Para, change to Plain
                            [_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
                            _   -> items
           _      -> items

-- | Like @compactify@, but acts on items of definition lists.
compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
compactifyDL items =
  let defs = concatMap snd items
  in  case reverse (concatMap B.toList defs) of
           (Para x:xs)
             | not (any isPara xs) ->
                   let (t,ds) = last items
                       lastDef = B.toList $ last ds
                       ds' = init ds ++
                             if null lastDef
                                then [B.fromList lastDef]
                                else [B.fromList $ init lastDef ++ [Plain x]]
                    in init items ++ [(t, ds')]
             | otherwise           -> items
           _                       -> items

-- | Combine a list of lines by adding hard linebreaks.
combineLines :: [[Inline]] -> [Inline]
combineLines = intercalate [LineBreak]

-- | Convert a list of lines into a paragraph with hard line breaks. This is
--   useful e.g. for rudimentary support of LineBlock elements in writers.
linesToPara :: [[Inline]] -> Block
linesToPara = Para . combineLines

isPara :: Block -> Bool
isPara (Para _) = True
isPara _        = False

-- | Data structure for defining hierarchical Pandoc documents
data Element = Blk Block
             | Sec Int [Int] Attr [Inline] [Element]
             --    lvl  num attributes label    contents
             deriving (Eq, Read, Show, Typeable, Data)

instance Walkable Inline Element where
  walk f (Blk x) = Blk (walk f x)
  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
  walkM f (Blk x) = Blk `fmap` walkM f x
  walkM f (Sec lev nums attr ils elts) = do
    ils' <- walkM f ils
    elts' <- walkM f elts
    return $ Sec lev nums attr ils' elts'
  query f (Blk x) = query f x
  query f (Sec _ _ _ ils elts) = query f ils <> query f elts

instance Walkable Block Element where
  walk f (Blk x) = Blk (walk f x)
  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
  walkM f (Blk x) = Blk `fmap` walkM f x
  walkM f (Sec lev nums attr ils elts) = do
    ils' <- walkM f ils
    elts' <- walkM f elts
    return $ Sec lev nums attr ils' elts'
  query f (Blk x) = query f x
  query f (Sec _ _ _ ils elts) = query f ils <> query f elts


-- | Convert Pandoc inline list to plain text identifier.  HTML
-- identifiers must start with a letter, and may contain only
-- letters, digits, and the characters _-.
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier =
  dropWhile (not . isAlpha) . intercalate "-" . words .
    map (nbspToSp . toLower) .
    filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
    stringify
 where nbspToSp '\160'     =  ' '
       nbspToSp x          =  x

-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
hierarchicalize :: [Block] -> [Element]
hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []

hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
hierarchicalizeWithIds [] = return []
hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
  lastnum <- S.get
  let lastnum' = take level lastnum
  let newnum = case length lastnum' of
                    x | "unnumbered" `elem` classes -> []
                      | x >= level -> init lastnum' ++ [last lastnum' + 1]
                      | otherwise -> lastnum ++
                           replicate (level - length lastnum - 1) 0 ++ [1]
  unless (null newnum) $ S.put newnum
  let (sectionContents, rest) = break (headerLtEq level) xs
  sectionContents' <- hierarchicalizeWithIds sectionContents
  rest' <- hierarchicalizeWithIds rest
  return $ Sec level newnum attr title' sectionContents' : rest'
hierarchicalizeWithIds ((Div ("",["references"],[])
                         (Header level (ident,classes,kvs) title' : xs)):ys) =
  hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs)
                           title') : (xs ++ ys))
hierarchicalizeWithIds (x:rest) = do
  rest' <- hierarchicalizeWithIds rest
  return $ (Blk x) : rest'

headerLtEq :: Int -> Block -> Bool
headerLtEq level (Header l _ _) = l <= level
headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _))  = l <= level
headerLtEq _ _ = False

-- | Generate a unique identifier from a list of inlines.
-- Second argument is a list of already used identifiers.
uniqueIdent :: [Inline] -> Set.Set String -> String
uniqueIdent title' usedIdents
  =  let baseIdent = case inlineListToIdentifier title' of
                        ""   -> "section"
                        x    -> x
         numIdent n = baseIdent ++ "-" ++ show n
     in  if baseIdent `Set.member` usedIdents
           then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of
                  Just x  -> numIdent x
                  Nothing -> baseIdent   -- if we have more than 60,000, allow repeats
           else baseIdent

-- | True if block is a Header block.
isHeaderBlock :: Block -> Bool
isHeaderBlock (Header _ _ _) = True
isHeaderBlock _ = False

-- | Shift header levels up or down.
headerShift :: Int -> Pandoc -> Pandoc
headerShift n = walk shift
  where shift :: Block -> Block
        shift (Header level attr inner) = Header (level + n) attr inner
        shift x                         = x

-- | Detect if a list is tight.
isTightList :: [[Block]] -> Bool
isTightList = all firstIsPlain
  where firstIsPlain (Plain _ : _) = True
        firstIsPlain _             = False

-- | Set a field of a 'Meta' object.  If the field already has a value,
-- convert it into a list with the new value appended to the old value(s).
addMetaField :: ToMetaValue a
             => String
             -> a
             -> Meta
             -> Meta
addMetaField key val (Meta meta) =
  Meta $ M.insertWith combine key (toMetaValue val) meta
  where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
        combine newval x             = MetaList [x, newval]
        tolist (MetaList ys)         = ys
        tolist y                     = [y]

-- | Create 'Meta' from old-style title, authors, date.  This is
-- provided to ease the transition from the old API.
makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
makeMeta title authors date =
      addMetaField "title" (B.fromList title)
    $ addMetaField "author" (map B.fromList authors)
    $ addMetaField "date" (B.fromList date)
    $ nullMeta

-- | Remove soft breaks between East Asian characters.
eastAsianLineBreakFilter :: Pandoc -> Pandoc
eastAsianLineBreakFilter = bottomUp go
  where go (x:SoftBreak:y:zs) =
         case (stringify x, stringify y) of
               (xs@(_:_), (c:_))
                 | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
               _ -> x:SoftBreak:y:zs
        go xs = xs

--
-- TagSoup HTML handling
--

-- | Render HTML tags.
renderTags' :: [Tag String] -> String
renderTags' = renderTagsOptions
               renderOptions{ optMinimize = matchTags ["hr", "br", "img",
                                                       "meta", "link"]
                            , optRawTag   = matchTags ["script", "style"] }
              where matchTags = \tags -> flip elem tags . map toLower

--
-- File handling
--

-- | Perform an IO action in a directory, returning to starting directory.
inDirectory :: FilePath -> IO a -> IO a
inDirectory path action = E.bracket
                             getCurrentDirectory
                             setCurrentDirectory
                             (const $ setCurrentDirectory path >> action)

--
-- Error reporting
--

mapLeft :: (a -> b) -> Either a c -> Either b c
mapLeft f (Left x) = Left (f x)
mapLeft _ (Right x) = Right x

-- | Remove intermediate "." and ".." directories from a path.
--
-- > collapseFilePath "./foo" == "foo"
-- > collapseFilePath "/bar/../baz" == "/baz"
-- > collapseFilePath "/../baz" == "/../baz"
-- > collapseFilePath "parent/foo/baz/../bar" ==  "parent/foo/bar"
-- > collapseFilePath "parent/foo/baz/../../bar" ==  "parent/bar"
-- > collapseFilePath "parent/foo/.." ==  "parent"
-- > collapseFilePath "/parent/foo/../../bar" ==  "/bar"
collapseFilePath :: FilePath -> FilePath
collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
  where
    go rs "." = rs
    go r@(p:rs) ".." = case p of
                            ".." -> ("..":r)
                            (checkPathSeperator -> Just True) -> ("..":r)
                            _ -> rs
    go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
    go rs x = x:rs
    isSingleton [] = Nothing
    isSingleton [x] = Just x
    isSingleton _ = Nothing
    checkPathSeperator = fmap isPathSeparator . isSingleton

--
-- File selection from the archive
--
filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, BL.ByteString)]
filteredFilesFromArchive zf f =
  mapMaybe (fileAndBinary zf) (filter f (filesInArchive zf))
  where
    fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
    fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)


--
-- IANA URIs
--

-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus
-- the unofficial schemes doi, javascript, isbn, pmid.
schemes :: Set.Set String
schemes = Set.fromList
  -- Official IANA schemes
  [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs"
  , "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin"
  , "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension"
  , "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs"
  , "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle"
  , "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed"
  , "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg"
  , "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham"
  , "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon"
  , "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6"
  , "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs"
  , "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap"
  , "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market"
  , "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access"
  , "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel"
  , "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath"
  , "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint"
  , "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller"
  , "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode"
  , "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular"
  , "ms-settings-cloudstorage", "ms-settings-connectabledevices"
  , "ms-settings-displays-topology", "ms-settings-emailandaccounts"
  , "ms-settings-language", "ms-settings-location", "ms-settings-lock"
  , "ms-settings-nfctransactions", "ms-settings-notifications"
  , "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity"
  , "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace"
  , "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad"
  , "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word"
  , "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs"
  , "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd"
  , "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop"
  , "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis"
  , "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp"
  , "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn"
  , "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews"
  , "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam"
  , "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid"
  , "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn"
  , "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi"
  , "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid"
  , "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire"
  , "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r"
  , "z39.50s"
  -- Inofficial schemes
  , "doi", "isbn", "javascript", "pmid"
  ]

-- | Check if the string is a valid URL with a IANA or frequently used but
-- unofficial scheme (see @schemes@).
isURI :: String -> Bool
isURI = maybe False hasKnownScheme . parseURI
  where
    hasKnownScheme = (`Set.member` schemes) . map toLower .
                     filter (/= ':') . uriScheme

---
--- Squash blocks into inlines
---

blockToInlines :: Block -> [Inline]
blockToInlines (Plain ils) = ils
blockToInlines (Para ils) = ils
blockToInlines (LineBlock lns) = combineLines lns
blockToInlines (CodeBlock attr str) = [Code attr str]
blockToInlines (RawBlock fmt str) = [RawInline fmt str]
blockToInlines (BlockQuote blks) = blocksToInlines blks
blockToInlines (OrderedList _ blkslst) =
  concatMap blocksToInlines blkslst
blockToInlines (BulletList blkslst) =
  concatMap blocksToInlines blkslst
blockToInlines (DefinitionList pairslst) =
  concatMap f pairslst
  where
    f (ils, blkslst) = ils ++
      [Str ":", Space] ++
      (concatMap blocksToInlines blkslst)
blockToInlines (Header _ _  ils) = ils
blockToInlines (HorizontalRule) = []
blockToInlines (Table _ _ _ headers rows) =
  intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl
  where
    tbl = headers : rows
blockToInlines (Div _ blks) = blocksToInlines blks
blockToInlines Null = []

blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline]
blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks

blocksToInlines :: [Block] -> [Inline]
blocksToInlines = blocksToInlinesWithSep [Space, Str "¶", Space]


--
-- Safe read
--

safeRead :: (MonadPlus m, Read a) => String -> m a
safeRead s = case reads s of
                  (d,x):_
                    | all isSpace x -> return d
                  _                 -> mzero

--
-- Temp directory
--

withTempDir :: String -> (FilePath -> IO a) -> IO a
withTempDir =
#ifdef _WINDOWS
  withTempDirectory "."
#else
  withSystemTempDirectory
#endif
-												Generalized type of stringify.

											
										
										
											2013-08-28 08:43:51 -07:00
+								{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
-												Shared: Make collapseFilePath OS-agnostic

											
										
										
											2014-09-25 12:42:53 +01:00
+								    FlexibleContexts, ScopedTypeVariables, PatternGuards,
 								    ViewPatterns #-}
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								{-
-												Update dates in copyright notices

This follows the suggestions given by the FSF for GPL licensed software.
<https://www.gnu.org/prep/maintain/html_node/Copyright-Notices.html>

											
										
										
											2017-05-13 23:30:13 +02:00
+								Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								This program is free software; you can redistribute it and/or modify
 								it under the terms of the GNU General Public License as published by
 								the Free Software Foundation; either version 2 of the License, or
 								(at your option) any later version.
 								This program is distributed in the hope that it will be useful,
 								but WITHOUT ANY WARRANTY; without even the implied warranty of
 								MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 								GNU General Public License for more details.
 								You should have received a copy of the GNU General Public License
 								along with this program; if not, write to the Free Software
 								Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 								-}
 								{- |
 								   Module      : Text.Pandoc.Shared
-												Update dates in copyright notices

This follows the suggestions given by the FSF for GPL licensed software.
<https://www.gnu.org/prep/maintain/html_node/Copyright-Notices.html>

											
										
										
											2017-05-13 23:30:13 +02:00
+								   Copyright   : Copyright (C) 2006-2017 John MacFarlane
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								   License     : GNU GPL, version 2 or above
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								   Maintainer  : John MacFarlane <jgm@berkeley.edu>
 								   Stability   : alpha
 								   Portability : portable
 								Utility functions and definitions used by the various Pandoc modules.
 								-}
-												Shared: enamed stringToURI -> escapeURI.

											
										
										
											2010-03-23 15:05:33 -07:00
+								module Text.Pandoc.Shared (
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     -- * List processing
 								                     splitBy,
 								                     splitByIndices,
-												Shared: Added splitStringWithIndices.

This is like splitWithIndices, but it is sensitive to distinctions
between wide, combining, and regular characters.

											
										
										
											2012-01-27 00:37:46 -08:00
+								                     splitStringByIndices,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     substitute,
-												Shared:  Added ordNub.

API change (adds export).

											
										
										
											2014-06-03 11:00:54 -07:00
+								                     ordNub,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     -- * Text processing
 								                     backslashEscapes,
 								                     escapeStringUsing,
 								                     stripTrailingNewlines,
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								                     trim,
 								                     triml,
 								                     trimr,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     stripFirstAndLast,
 								                     camelCaseToHyphenated,
 								                     toRomanNumeral,
-												Shared: enamed stringToURI -> escapeURI.

											
										
										
											2010-03-23 15:05:33 -07:00
+								                     escapeURI,
-												Changed order of functions in Shared.

											
										
										
											2010-07-06 23:17:06 -07:00
+								                     tabFilter,
-												Move CR filtering from tabFilter to the readers.

The readers previously assumed that CRs had been filtered
from the input.  Now we strip the CRs in the readers themselves,
before parsing.  (The point of this is just to simplify the
parsers.)

Shared now exports a new function `crFilter`. [API change]
And `tabFilter` no longer filters CRs.

											
										
										
											2017-06-20 21:52:13 +02:00
+								                     crFilter,
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
+								                     -- * Date/time
 								                     normalizeDate,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     -- * Pandoc block and inline list processing
 								                     orderedListMarkers,
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								                     extractSpaces,
-												Shared:  Added removeFormatting.

API change (addition of exported function).

											
										
										
											2014-07-13 10:13:22 -07:00
+								                     removeFormatting,
-												Removed writerIgnoreNotes.

Instead, just temporarily remove notes when generating
TOC lists in HTML and Markdown (as we already did in LaTeX).

Also export deNote from Text.Pandoc.Shared.

API change in Shared and Options.WriterOptions.

											
										
										
											2017-01-15 22:34:14 +01:00
+								                     deNote,
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
+								                     stringify,
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								                     capitalize,
-												Shared: rename compactify', compactify'DL -> compactify, compactifyDL.

											
										
										
											2017-01-27 21:36:45 +01:00
+								                     compactify,
 								                     compactifyDL,
-												Shared: add function combining lines using LineBreak

The `linesToBlock` function takes a list of lines and combines them by appending
a hard `LineBreak` to each line and concatenating the result, putting the result
it into a `Para`. This is most useful when dealing when converting `LineBlock`
elements.

											
										
										
											2016-10-13 08:46:38 +02:00
+								                     linesToPara,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     Element (..),
 								                     hierarchicalize,
-												Shared: Export uniqueIdent, don't allow tilde in identifier.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1894 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2010-03-16 06:45:52 +00:00
+								                     uniqueIdent,
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
+								                     inlineListToIdentifier,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     isHeaderBlock,
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
+								                     headerShift,
-												Shared: export isTightList.

											
										
										
											2013-01-07 20:12:05 -08:00
+								                     isTightList,
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								                     addMetaField,
 								                     makeMeta,
-												Added eastAsianLineBreakFilter to Shared.

This used to live in the Markdown reader.

											
										
										
											2017-05-30 10:22:48 +02:00
+								                     eastAsianLineBreakFilter,
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
+								                     -- * TagSoup HTML handling
 								                     renderTags',
-												Added 'odt' output option to pandoc:
Not a writer, but a module that inserts the output of the OpenDocument
writer into an ODT archive.  This replaces markdown2odt.

+ Added odt output option to Main.hs.
+ Added default for .odt output file.
+ Changed defaults so that .xml and .sgml aren't automatically DocBook.
+ Added odt writer to Text.Pandoc exports.
+ Added Text.Pandoc.ODT and included in pandoc.cabal.
+ Added reference.odt as data-file in pandoc.cabal.
+ Handle picture links in OpenDocument files using xml library.
+ Removed markdown2odt and references from Makefile, README, man.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1345 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-07-31 23:16:02 +00:00
+								                     -- * File handling
-												Removed TH module; refactored LaTeXMathML not to use TH.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1692 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-12-31 01:11:23 +00:00
+								                     inDirectory,
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								                     collapseFilePath,
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
+								                     filteredFilesFromArchive,
-												Shared: Provide custom isURI that rejects unknown schemes [isURI]

We also export the set of known `schemes`.

The new function replaces the function of the same name
from `Network.URI`, as the latter did not check whether a scheme is
well-known.  E.g. MediaWiki wikis frequently feature pages with names
like `User:John`. These links were interpreted as URIs, thus turning
internal links into global links. This is prevented by also checking
whether the scheme of a URI is frequently used (i.e. is IANA registered
or an otherwise well-known scheme).

Fixes: #2713

Update set of well-known URIs from IANA list
All official IANA schemes (as of 2017-05-22) are included in the set of
known schemes.  The four non-official schemes doi, isbn, javascript, and
pmid are kept.

											
										
										
											2017-05-23 09:48:11 +02:00
+								                     -- * URI handling
 								                     schemes,
 								                     isURI,
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
+								                     -- * Error handling
-												Move utility error functions to Text.Pandoc.Shared

											
										
										
											2015-02-18 21:05:47 +00:00
+								                     mapLeft,
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								                     -- * for squashing blocks
 								                     blocksToInlines,
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								                     -- * Safe read
-												Moved withTempDir from PDF to Shared, export from Shared.

API change.

											
										
										
											2014-07-30 12:29:04 -07:00
+								                     safeRead,
 								                     -- * Temp directory
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								                     withTempDir,
 								                     -- * Version
 								                     pandocVersion
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                    ) where
 								import Text.Pandoc.Definition
-												Use query instead of queryWith.

											
										
										
											2013-08-10 18:13:38 -07:00
+								import Text.Pandoc.Walk
-												Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.

											
										
										
											2014-04-19 14:48:35 +02:00
+								import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
+								import qualified Text.Pandoc.Builder as B
-												Shared: Removed unescapeURI, modified escapeURI.

escapeURI now only escapes space characters, leaving unicode characters
as they are, instead of converting them to octets and URL-encoding them,
as before.  This gives more readable URIs.  User agents now do the
percent-encoding themselves.

URIs are no longer unescaped at all on conversion to markdown, asciidoc,
rst, org.

Closes #349.

											
										
										
											2011-12-02 19:39:30 -08:00
+								import Data.Char ( toLower, isLower, isUpper, isAlpha,
 								                   isLetter, isDigit, isSpace )
-												Use `stripPrefix` where appropriate.

											
										
										
											2014-08-03 14:44:39 +04:00
+								import Data.List ( find, stripPrefix, intercalate )
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
+								import Data.Maybe (mapMaybe)
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								import Data.Version ( showVersion )
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								import qualified Data.Map as M
-												Remove openURL from Shared (API change).

Now all the guts of openURL have been put into openURL from
Class.  openURL is now sensitive to stRequestHeaders in CommonState
and will add these custom headers when making a request.
It no longer looks at the USER_AGENT environment variable,
since you can now set the `User-Agent` header directly.

											
										
										
											2017-10-15 22:10:13 -07:00
+								import Network.URI ( URI(uriScheme), escapeURIString, parseURI )
-												Shared:  Added ordNub.

API change (adds export).

											
										
										
											2014-06-03 11:00:54 -07:00
+								import qualified Data.Set as Set
-												Added 'odt' output option to pandoc:
Not a writer, but a module that inserts the output of the OpenDocument
writer into an ODT archive.  This replaces markdown2odt.

+ Added odt output option to Main.hs.
+ Added default for .odt output file.
+ Changed defaults so that .xml and .sgml aren't automatically DocBook.
+ Added odt writer to Text.Pandoc exports.
+ Added Text.Pandoc.ODT and included in pandoc.cabal.
+ Added reference.odt as data-file in pandoc.cabal.
+ Handle picture links in OpenDocument files using xml library.
+ Removed markdown2odt and references from Makefile, README, man.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1345 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-07-31 23:16:02 +00:00
+								import System.Directory
-												Removed unneeded imports.

											
										
										
											2015-09-26 22:56:13 -07:00
+								import System.FilePath (splitDirectories, isPathSeparator)
-												MediaBag:  ensure that / is always used as path separator.

											
										
										
											2015-09-26 22:40:58 -07:00
+								import qualified System.FilePath.Posix as Posix
-												Use functions from Text.Pandoc.Generic instead of processWith(M).

											
										
										
											2010-12-24 13:39:27 -08:00
+								import Data.Generics (Typeable, Data)
-												Use Control.Monad.State.Strict throughout.

This gives 20-30% speedup and reduction of memory
usage in most of the writers.

											
										
										
											2017-06-16 23:29:37 +02:00
+								import qualified Control.Monad.State.Strict as S
-												Improved fetching of external resources.

* In Shared, openURL and fetchItem now return an Either, for
  better error handling. (API change.)
* Better error message when fetching a URL fails with
  `--self-contained`.
* EPUB writer: If resource not found, skip it, as in Docx writer.
* Closes #916.

											
										
										
											2013-07-18 20:58:14 -07:00
+								import qualified Control.Exception as E
-												Make safeRead safe.

Fixes #1801

											
										
										
											2015-02-18 18:40:36 +00:00
+								import Control.Monad (msum, unless, MonadPlus(..))
-												Shared: Added splitStringWithIndices.

This is like splitWithIndices, but it is sensitive to distinctions
between wide, combining, and regular characters.

											
										
										
											2012-01-27 00:37:46 -08:00
+								import Text.Pandoc.Pretty (charWidth)
-												Added eastAsianLineBreakFilter to Shared.

This used to live in the Markdown reader.

											
										
										
											2017-05-30 10:22:48 +02:00
+								import Text.Pandoc.Generic (bottomUp)
-												More changes to avoid compiler warnings on ghc 7.10.

* CPP around deprecated `parseTime`.
* Text.Pandoc.Compat.Locale -> Text.Pandoc.Compat.Time,
  now exports Data.Time.

											
										
										
											2015-10-14 10:05:17 -07:00
+								import Text.Pandoc.Compat.Time
-												Moved withTempDir from PDF to Shared, export from Shared.

API change.

											
										
										
											2014-07-30 12:29:04 -07:00
+								import System.IO.Temp
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
+								import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
 								         renderOptions)
-												Remove Compat.Monoid

This was only necessary for GHC versions with base below 4.5
(i.e., ghc < 7.4).

											
										
										
											2016-08-30 13:43:50 -04:00
+								import Data.Monoid ((<>))
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
-												Rewrote convertTabs to use Text not String.

											
										
										
											2017-06-10 15:22:25 +02:00
+								import qualified Data.Text as T
-												Let reference.docx/odt behave as if they are virtual data files.

Now they are constructed on the fly from their components,
but we now allow them to be printed with `--print-default-data-file`
and to override the defaults if placed in the user data directory.

Shared now exports getDefaultReferenceDocx and getDefaultReferenceODT
(API change).

These functions have been removed from the Docx and ODT writers.

Shared.readDataFile has been modified so that requests to read
a reference.odt or reference.docx will use these functions to
generate the files.

											
										
										
											2015-06-28 22:30:21 -07:00
+								import qualified Data.ByteString.Lazy as BL
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								import Paths_pandoc (version)
-												fix build failure with --flags=-https

The issue was originally reported by CasperVector as
    https://github.com/gentoo-haskell/gentoo-haskell/issues/427

Mainfests itself as a builg failure full of missing zip-archive
names:

    src/Text/Pandoc/Shared.hs:756:49:
        Not in scope: type constructor or class ‘Archive’
    src/Text/Pandoc/Shared.hs:777:38: Not in scope: ‘toEntry’
    src/Text/Pandoc/Shared.hs:786:19:
        Not in scope: ‘toArchive’
        Perhaps you meant ‘mbArchive’ (line 778)

Included Codec.Archive.Zip unconditionally.

Signed-off-by: Sergei Trofimovich <siarheit@google.com>

											
										
										
											2015-07-30 22:39:25 +01:00
+								import Codec.Archive.Zip
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								-- | Version number of pandoc library.
 								pandocVersion :: String
 								pandocVersion = showVersion version
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								--
 								-- List processing
 								--
 								-- | Split list by groups of one or more sep.
-												Shared: Made splitBy take a test instead of an element.

											
										
										
											2010-12-21 08:41:24 -08:00
+								splitBy :: (a -> Bool) -> [a] -> [[a]]
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								splitBy _ [] = []
-												Shared: Made splitBy take a test instead of an element.

											
										
										
											2010-12-21 08:41:24 -08:00
+								splitBy isSep lst =
 								  let (first, rest) = break isSep lst
 								      rest'         = dropWhile isSep rest
 								  in  first:(splitBy isSep rest')
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								splitByIndices :: [Int] -> [a] -> [[a]]
 								splitByIndices [] lst = [lst]
-												Shared: Added splitStringWithIndices.

This is like splitWithIndices, but it is sensitive to distinctions
between wide, combining, and regular characters.

											
										
										
											2012-01-27 00:37:46 -08:00
+								splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x)  xs) rest)
 								  where (first, rest) = splitAt x lst
 								-- | Split string into chunks divided at specified indices.
 								splitStringByIndices :: [Int] -> [Char] -> [[Char]]
 								splitStringByIndices [] lst = [lst]
 								splitStringByIndices (x:xs) lst =
 								  let (first, rest) = splitAt' x lst in
 								  first : (splitStringByIndices (map (\y -> y - x) xs) rest)
 								splitAt' :: Int -> [Char] -> ([Char],[Char])
 								splitAt' _ []          = ([],[])
 								splitAt' n xs | n <= 0 = ([],xs)
 								splitAt' n (x:xs)      = (x:ys,zs)
 								  where (ys,zs) = splitAt' (n - charWidth x) xs
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Replace each occurrence of one sublist in a list with another.
 								substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
 								substitute _ _ [] = []
-												Slight code cleanup on substitute function.

											
										
										
											2010-07-11 12:22:18 -07:00
+								substitute [] _ xs = xs
 								substitute target replacement lst@(x:xs) =
-												Use `stripPrefix` where appropriate.

											
										
										
											2014-08-03 14:44:39 +04:00
+								    case stripPrefix target lst of
 								      Just lst' -> replacement ++ substitute target replacement lst'
 								      Nothing   -> x : substitute target replacement xs
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
-												Shared:  Added ordNub.

API change (adds export).

											
										
										
											2014-06-03 11:00:54 -07:00
+								ordNub :: (Ord a) => [a] -> [a]
 								ordNub l = go Set.empty l
 								  where
 								    go _ [] = []
 								    go s (x:xs) = if x `Set.member` s then go s xs
 								                                      else x : go (Set.insert x s) xs
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								--
 								-- Text processing
 								--
 								-- | Returns an association list of backslash escapes for the
 								-- designated characters.
 								backslashEscapes :: [Char]    -- ^ list of special characters to escape
 								                 -> [(Char, String)]
 								backslashEscapes = map (\ch -> (ch, ['\\',ch]))
 								-- | Escape a string of characters, using an association list of
 								-- characters and strings.
 								escapeStringUsing :: [(Char, String)] -> String -> String
 								escapeStringUsing _ [] = ""
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								escapeStringUsing escapeTable (x:xs) =
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								  case (lookup x escapeTable) of
 								       Just str  -> str ++ rest
 								       Nothing   -> x:rest
 								  where rest = escapeStringUsing escapeTable xs
 								-- | Strip trailing newlines from string.
 								stripTrailingNewlines :: String -> String
 								stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
 								-- | Remove leading and trailing space (including newlines) from string.
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								trim :: String -> String
 								trim = triml . trimr
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Remove leading space (including newlines) from string.
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								triml :: String -> String
-												Revert "Use -XNoImplicitPrelude and 'import Prelude' explicitly."

This reverts commit c423dbb5a34c2d1195020e0f0ca3aae883d0749b.

											
										
										
											2015-11-09 10:08:22 -08:00
+								triml = dropWhile (`elem` " \r\n\t")
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Remove trailing space (including newlines) from string.
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								trimr :: String -> String
 								trimr = reverse . triml . reverse
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Strip leading and trailing characters from string
 								stripFirstAndLast :: String -> String
 								stripFirstAndLast str =
 								  drop 1 $ take ((length str) - 1) str
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								camelCaseToHyphenated :: String -> String
 								camelCaseToHyphenated [] = ""
 								camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
 								  a:'-':(toLower b):(camelCaseToHyphenated rest)
 								camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
 								-- | Convert number < 4000 to uppercase roman numeral.
 								toRomanNumeral :: Int -> String
-												Simplify toRomanNumeral using guards (#3445)


											
										
										
											2017-02-15 02:00:23 +04:00
+								toRomanNumeral x
 								  | x >= 4000 || x < 0 = "?"
 								  | x >= 1000 = "M" ++ toRomanNumeral (x - 1000)
 								  | x >= 900  = "CM" ++ toRomanNumeral (x - 900)
 								  | x >= 500  = "D" ++ toRomanNumeral (x - 500)
 								  | x >= 400  = "CD" ++ toRomanNumeral (x - 400)
 								  | x >= 100  = "C" ++ toRomanNumeral (x - 100)
 								  | x >= 90   = "XC" ++ toRomanNumeral (x - 90)
 								  | x >= 50   = "L"  ++ toRomanNumeral (x - 50)
 								  | x >= 40   = "XL" ++ toRomanNumeral (x - 40)
 								  | x >= 10   = "X" ++ toRomanNumeral (x - 10)
 								  | x == 9    = "IX"
 								  | x >= 5    = "V" ++ toRomanNumeral (x - 5)
 								  | x == 4    = "IV"
 								  | x >= 1    = "I" ++ toRomanNumeral (x - 1)
 								  | otherwise = ""
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
-												Percent-encode more special characters in URLs.

HTML, LaTeX writers adjusted.
The special characters are '<','>','|','"','{','}','[',']','^', '`'.

Closes #1640, #2377.

											
										
										
											2015-10-11 17:06:26 -07:00
+								-- | Escape whitespace and some punctuation characters in URI.
-												Shared: enamed stringToURI -> escapeURI.

											
										
										
											2010-03-23 15:05:33 -07:00
+								escapeURI :: String -> String
-												Percent-encode more special characters in URLs.

HTML, LaTeX writers adjusted.
The special characters are '<','>','|','"','{','}','[',']','^', '`'.

Closes #1640, #2377.

											
										
										
											2015-10-11 17:06:26 -07:00
+								escapeURI = escapeURIString (not . needsEscaping)
 								  where needsEscaping c = isSpace c || c `elem`
 								                           ['<','>','|','"','{','}','[',']','^', '`']
-												Move CR filtering from tabFilter to the readers.

The readers previously assumed that CRs had been filtered
from the input.  Now we strip the CRs in the readers themselves,
before parsing.  (The point of this is just to simplify the
parsers.)

Shared now exports a new function `crFilter`. [API change]
And `tabFilter` no longer filters CRs.

											
										
										
											2017-06-20 21:52:13 +02:00
+								-- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0.
-												Changed order of functions in Shared.

											
										
										
											2010-07-06 23:17:06 -07:00
+								tabFilter :: Int       -- ^ Tab stop
-												Rewrote convertTabs to use Text not String.

											
										
										
											2017-06-10 15:22:25 +02:00
+								          -> T.Text    -- ^ Input
 								          -> T.Text
-												Move CR filtering from tabFilter to the readers.

The readers previously assumed that CRs had been filtered
from the input.  Now we strip the CRs in the readers themselves,
before parsing.  (The point of this is just to simplify the
parsers.)

Shared now exports a new function `crFilter`. [API change]
And `tabFilter` no longer filters CRs.

											
										
										
											2017-06-20 21:52:13 +02:00
+								tabFilter 0 = id
 								tabFilter tabStop = T.unlines . map go . T.lines
-												Rewrote convertTabs to use Text not String.

											
										
										
											2017-06-10 15:22:25 +02:00
+								  where go s =
 								         let (s1, s2) = T.break (== '\t') s
 								         in  if T.null s2
 								                then s1
 								                else s1 <> T.replicate
 								                       (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
 								                       <> go (T.drop 1 s2)
-												Changed order of functions in Shared.

											
										
										
											2010-07-06 23:17:06 -07:00
-												Move CR filtering from tabFilter to the readers.

The readers previously assumed that CRs had been filtered
from the input.  Now we strip the CRs in the readers themselves,
before parsing.  (The point of this is just to simplify the
parsers.)

Shared now exports a new function `crFilter`. [API change]
And `tabFilter` no longer filters CRs.

											
										
										
											2017-06-20 21:52:13 +02:00
+								-- | Strip out DOS line endings.
 								crFilter :: T.Text -> T.Text
 								crFilter = T.filter (/= '\r')
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
+								--
 								-- Date/time
 								--
-												Shared: normalizeDate should reject illegal years.

We only allow years between 1601 and 9999, inclusive. The ISO 8601
actually says that years are supposed to start with 1583, but MS Word
only allows 1601-9999. This should stop corrupted word files if the date
is out of that range, or is parsed incorrectly.

											
										
										
											2016-07-09 15:37:47 -04:00
+								-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We
 								-- limit years to the range 1601-9999 (ISO 8601 accepts greater than
-												Shared: improve year sanity check in normalizeDate

Previously we parsed a list of dates, took the first one, and then
tested its year range. That meant that if the first one failed, we
returned nothing, regardless of what the others did. Now we test for
sanity before running `msum` over the list of Maybe values. Anything
failing the test will be Nothing, so will not be a candidate.

											
										
										
											2016-07-09 17:03:39 -04:00
+								-- or equal to 1583, but MS Word only accepts dates starting 1601).
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
+								normalizeDate :: String -> Maybe String
-												Shared: improve year sanity check in normalizeDate

Previously we parsed a list of dates, took the first one, and then
tested its year range. That meant that if the first one failed, we
returned nothing, regardless of what the others did. Now we test for
sanity before running `msum` over the list of Maybe values. Anything
failing the test will be Nothing, so will not be a candidate.

											
										
										
											2016-07-09 17:03:39 -04:00
+								normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
 								  (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
 								  where rejectBadYear day = case toGregorian day of
 								          (y, _, _) | y >= 1601 && y <= 9999 -> Just day
 								          _ -> Nothing
-												Shared: normalizeDate should reject illegal years.

We only allow years between 1601 and 9999, inclusive. The ISO 8601
actually says that years are supposed to start with 1583, but MS Word
only allows 1601-9999. This should stop corrupted word files if the date
is out of that range, or is parsed incorrectly.

											
										
										
											2016-07-09 15:37:47 -04:00
+								        parsetimeWith =
-												More changes to avoid compiler warnings on ghc 7.10.

* CPP around deprecated `parseTime`.
* Text.Pandoc.Compat.Locale -> Text.Pandoc.Compat.Time,
  now exports Data.Time.

											
										
										
											2015-10-14 10:05:17 -07:00
+								#if MIN_VERSION_time(1,5,0)
 								             parseTimeM True defaultTimeLocale
 								#else
 								             parseTime defaultTimeLocale
 								#endif
-												Shared: normalizeDate should reject illegal years.

We only allow years between 1601 and 9999, inclusive. The ISO 8601
actually says that years are supposed to start with 1583, but MS Word
only allows 1601-9999. This should stop corrupted word files if the date
is out of that range, or is parsed incorrectly.

											
										
										
											2016-07-09 15:37:47 -04:00
+								        formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
-												Shared: Add further formats for `normalizeDate`

We want to avoid illegal dates -- in particular years with greater than
four digits. We attempt to parse series of digits first as `%Y%m%d`, then
`%Y%m`, and finally `%Y`.

											
										
										
											2016-07-09 11:13:25 -04:00
+								                    "%d %B %Y", "%b. %d, %Y", "%B %d, %Y",
 								                    "%Y%m%d", "%Y%m", "%Y"]
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								--
 								-- Pandoc block and inline list processing
 								--
 								-- | Generate infinite lazy list of markers for an ordered list,
 								-- depending on list attributes.
 								orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								orderedListMarkers (start, numstyle, numdelim) =
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								  let singleton c = [c]
 								      nums = case numstyle of
 								                     DefaultStyle -> map show [start..]
-												Merge branch 'atlists'.  Added auto-numbered example lists.

											
										
										
											2010-07-11 22:47:52 -07:00
+								                     Example      -> map show [start..]
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     Decimal      -> map show [start..]
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								                     UpperAlpha   -> drop (start - 1) $ cycle $
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                                     map singleton ['A'..'Z']
 								                     LowerAlpha   -> drop (start - 1) $ cycle $
 								                                     map singleton ['a'..'z']
 								                     UpperRoman   -> map toRomanNumeral [start..]
 								                     LowerRoman   -> map (map toLower . toRomanNumeral) [start..]
 								      inDelim str = case numdelim of
 								                            DefaultDelim -> str ++ "."
 								                            Period       -> str ++ "."
 								                            OneParen     -> str ++ ")"
 								                            TwoParens    -> "(" ++ str ++ ")"
 								  in  map inDelim nums
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								-- | Extract the leading and trailing spaces from inside an inline element
-												Implemented SoftBreak and new `--wrap` option.

Added threefold wrapping option.

* Command line option: deprecated `--no-wrap`, added
  `--wrap=[auto|none|preserve]`
* Added WrapOption, exported from Text.Pandoc.Options
* Changed type of writerWrapText in WriterOptions from
  Bool to WrapOption.
* Modified Text.Pandoc.Shared functions for SoftBreak.
* Supported SoftBreak in writers.
* Updated tests.
* Updated README.

Closes #1701.

											
										
										
											2015-12-11 15:58:11 -08:00
+								-- and place them outside the element.  SoftBreaks count as Spaces for
 								-- these purposes.
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
-												Removed space at ends of lines in source.

											
										
										
											2014-07-12 22:57:22 -07:00
+								extractSpaces f is =
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								  let contents = B.unMany is
 								      left  = case viewl contents of
-												Implemented SoftBreak and new `--wrap` option.

Added threefold wrapping option.

* Command line option: deprecated `--no-wrap`, added
  `--wrap=[auto|none|preserve]`
* Added WrapOption, exported from Text.Pandoc.Options
* Changed type of writerWrapText in WriterOptions from
  Bool to WrapOption.
* Modified Text.Pandoc.Shared functions for SoftBreak.
* Supported SoftBreak in writers.
* Updated tests.
* Updated README.

Closes #1701.

											
										
										
											2015-12-11 15:58:11 -08:00
+								                    (Space :< _)     -> B.space
 								                    (SoftBreak :< _) -> B.softbreak
 								                    _                -> mempty
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								      right = case viewr contents of
-												Implemented SoftBreak and new `--wrap` option.

Added threefold wrapping option.

* Command line option: deprecated `--no-wrap`, added
  `--wrap=[auto|none|preserve]`
* Added WrapOption, exported from Text.Pandoc.Options
* Changed type of writerWrapText in WriterOptions from
  Bool to WrapOption.
* Modified Text.Pandoc.Shared functions for SoftBreak.
* Supported SoftBreak in writers.
* Updated tests.
* Updated README.

Closes #1701.

											
										
										
											2015-12-11 15:58:11 -08:00
+								                    (_ :> Space)     -> B.space
 								                    (_ :> SoftBreak) -> B.softbreak
 								                    _                -> mempty in
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								  (left <> f (B.trimInlines . B.Many $ contents) <> right)
-												Better comment on removeFormatting.

											
										
										
											2014-07-13 15:10:27 -07:00
+								-- | Extract inlines, removing formatting.
-												Shared: Generalized type of removeFormatting.

											
										
										
											2014-07-13 14:56:20 -07:00
+								removeFormatting :: Walkable Inline a => a -> [Inline]
-												Shared.stringify, removeFormatting: handle Quoted better.

Previously we were losing the qutation marks in Quoted
elements.  See #3958.

											
										
										
											2017-10-08 21:55:57 -07:00
+								removeFormatting = query go . walk (deNote . deQuote)
-												Shared:  Added removeFormatting.

API change (addition of exported function).

											
										
										
											2014-07-13 10:13:22 -07:00
+								  where go :: Inline -> [Inline]
 								        go (Str xs)     = [Str xs]
 								        go Space        = [Space]
-												Implemented SoftBreak and new `--wrap` option.

Added threefold wrapping option.

* Command line option: deprecated `--no-wrap`, added
  `--wrap=[auto|none|preserve]`
* Added WrapOption, exported from Text.Pandoc.Options
* Changed type of writerWrapText in WriterOptions from
  Bool to WrapOption.
* Modified Text.Pandoc.Shared functions for SoftBreak.
* Supported SoftBreak in writers.
* Updated tests.
* Updated README.

Closes #1701.

											
										
										
											2015-12-11 15:58:11 -08:00
+								        go SoftBreak    = [SoftBreak]
-												Shared:  Added removeFormatting.

API change (addition of exported function).

											
										
										
											2014-07-13 10:13:22 -07:00
+								        go (Code _ x)   = [Str x]
 								        go (Math _ x)   = [Str x]
 								        go LineBreak    = [Space]
 								        go _            = []
-												Factored out deNote in Shared.

											
										
										
											2017-01-15 22:15:35 +01:00
 								deNote :: Inline -> Inline
 								deNote (Note _) = Str ""
 								deNote x        = x
-												Shared:  Added removeFormatting.

API change (addition of exported function).

											
										
										
											2014-07-13 10:13:22 -07:00
-												Shared.stringify, removeFormatting: handle Quoted better.

Previously we were losing the qutation marks in Quoted
elements.  See #3958.

											
										
										
											2017-10-08 21:55:57 -07:00
+								deQuote :: Inline -> Inline
 								deQuote (Quoted SingleQuote xs) =
 								  Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"])
 								deQuote (Quoted DoubleQuote xs) =
 								  Span ("",[],[]) (Str "\8220" : xs ++ [Str "\8221"])
 								deQuote x = x
-												Generalized type of stringify.

											
										
										
											2013-08-28 08:43:51 -07:00
+								-- | Convert pandoc structure to a string with formatting removed.
-												Shared:  Changed stringify so it ignores notes.

Also documented this in README.

											
										
										
											2013-08-16 13:22:27 -07:00
+								-- Footnotes are skipped (since we don't want their contents in link
 								-- labels).
-												Generalized type of stringify.

											
										
										
											2013-08-28 08:43:51 -07:00
+								stringify :: Walkable Inline a => a -> String
-												Shared.stringify, removeFormatting: handle Quoted better.

Previously we were losing the qutation marks in Quoted
elements.  See #3958.

											
										
										
											2017-10-08 21:55:57 -07:00
+								stringify = query go . walk (deNote . deQuote)
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
+								  where go :: Inline -> [Char]
 								        go Space = " "
-												Implemented SoftBreak and new `--wrap` option.

Added threefold wrapping option.

* Command line option: deprecated `--no-wrap`, added
  `--wrap=[auto|none|preserve]`
* Added WrapOption, exported from Text.Pandoc.Options
* Changed type of writerWrapText in WriterOptions from
  Bool to WrapOption.
* Modified Text.Pandoc.Shared functions for SoftBreak.
* Supported SoftBreak in writers.
* Updated tests.
* Updated README.

Closes #1701.

											
										
										
											2015-12-11 15:58:11 -08:00
+								        go SoftBreak = " "
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
+								        go (Str x) = x
-												Add support for attributes in inline Code.

Additional related changes:

* URLs in Code in autolinks now use class "url".
* Require highlighting-kate 0.2.8.2, which omits the final <br/> tag,
  essential for inline code.

											
										
										
											2011-01-26 20:44:25 -08:00
+								        go (Code _ x) = x
-												Added new prettyprinting module.

* Added Text.Pandoc.Pretty.
  This is better suited for pandoc than the 'pretty' package.
  One advantage is that we now get proper wrapping; Emph [Inline]
  is no longer treated as a big unwrappable unit. Previously
  we only got breaks for spaces at the "outer level." We can also
  more easily avoid doubled blank lines.  Performance is
  significantly better as well.

* Removed Text.Pandoc.Blocks.
  Text.Pandoc.Pretty allows you to define blocks and concatenate
  them.

* Modified markdown, RST, org readers to use Text.Pandoc.Pretty
  instead of Text.PrettyPrint.HughesPJ.

* Text.Pandoc.Shared:  Added writerColumns to WriterOptions.

* Markdown, RST, Org writers now break text at writerColumns.

* Added --columns command-line option, which sets stColumns
  and writerColumns.

* Table parsing:  If the size of the header > stColumns,
  use the header size as 100% for purposes of calculating
  relative widths of columns.

											
										
										
											2010-12-12 20:09:14 -08:00
+								        go (Math _ x) = x
-												EPUB TOC: replace literal "<br/>" with space.

Closes #2105.

											
										
										
											2015-08-10 16:58:47 -07:00
+								        go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
-												Shared: Use stringify to simplify inlineListToIdentifier.

											
										
										
											2010-12-19 10:13:36 -08:00
+								        go LineBreak = " "
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
+								        go _ = ""
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								-- | Bring all regular text in a pandoc structure to uppercase.
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								--
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								-- This function correctly handles cases where a lowercase character doesn't
 								-- match to a single uppercase character – e.g. “Straße” would be converted
 								-- to “STRASSE”, not “STRAßE”.
 								capitalize :: Walkable Inline a => a -> a
 								capitalize = walk go
 								  where go :: Inline -> Inline
 								        go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
 								        go x       = x
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
+								-- | Change final list item from @Para@ to @Plain@ if the list contains
 								-- no other @Para@ blocks.  Like compactify, but operates on @Blocks@ rather
 								-- than @[Block]@.
-												Shared: rename compactify', compactify'DL -> compactify, compactifyDL.

											
										
										
											2017-01-27 21:36:45 +01:00
+								compactify :: [Blocks]  -- ^ List of list items (each a list of blocks)
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
+								           -> [Blocks]
-												Shared: rename compactify', compactify'DL -> compactify, compactifyDL.

											
										
										
											2017-01-27 21:36:45 +01:00
+								compactify [] = []
 								compactify items =
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
+								  let (others, final) = (init items, last items)
 								  in  case reverse (B.toList final) of
 								           (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
 								                            -- if this is only Para, change to Plain
 								                            [_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
 								                            _   -> items
 								           _      -> items
-												Shared: rename compactify', compactify'DL -> compactify, compactifyDL.

											
										
										
											2017-01-27 21:36:45 +01:00
+								-- | Like @compactify@, but acts on items of definition lists.
 								compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
 								compactifyDL items =
-												Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.

											
										
										
											2014-04-19 14:48:35 +02:00
+								  let defs = concatMap snd items
-												Fixed runtime error with compactify'DL on certain lists.

Closes #1452.  Added test.

											
										
										
											2014-07-25 10:53:04 -07:00
+								  in  case reverse (concatMap B.toList defs) of
 								           (Para x:xs)
 								             | not (any isPara xs) ->
 								                   let (t,ds) = last items
 								                       lastDef = B.toList $ last ds
 								                       ds' = init ds ++
 								                             if null lastDef
 								                                then [B.fromList lastDef]
 								                                else [B.fromList $ init lastDef ++ [Plain x]]
 								                    in init items ++ [(t, ds')]
 								             | otherwise           -> items
 								           _                       -> items
-												Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.

											
										
										
											2014-04-19 14:48:35 +02:00
-												Shared: add function combining lines using LineBreak

The `linesToBlock` function takes a list of lines and combines them by appending
a hard `LineBreak` to each line and concatenating the result, putting the result
it into a `Para`. This is most useful when dealing when converting `LineBlock`
elements.

											
										
										
											2016-10-13 08:46:38 +02:00
+								-- | Combine a list of lines by adding hard linebreaks.
 								combineLines :: [[Inline]] -> [Inline]
 								combineLines = intercalate [LineBreak]
 								-- | Convert a list of lines into a paragraph with hard line breaks. This is
 								--   useful e.g. for rudimentary support of LineBlock elements in writers.
 								linesToPara :: [[Inline]] -> Block
 								linesToPara = Para . combineLines
-												Changed heuristic in compactify.

compactify has to decide whether a Para that ends a list is a Para
intentionally, or just because of the blank lines at the end of
every list.  In the latter case the Para is turned to a Plain.

The old heuristic was:  change final Para to Plain iff the other
items all end in Plain.  This produces bad results when, for example,
an item contains just a Plain and an HTML comment, as it

- a
<!--
- b
-->
-c

The new heuristic:  change final Para to Plain iff the other items
don't contain a Para.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1616 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-11-01 02:38:18 +00:00
+								isPara :: Block -> Bool
 								isPara (Para _) = True
 								isPara _        = False
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Data structure for defining hierarchical Pandoc documents
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								data Element = Blk Block
-												Shared:  Changed type of Element.

Sec now includes a field for Attr rather than just String
(the identifier).

Note, this is an API change.

											
										
										
											2013-02-12 20:13:23 -08:00
+								             | Sec Int [Int] Attr [Inline] [Element]
 								             --    lvl  num attributes label    contents
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								             deriving (Eq, Read, Show, Typeable, Data)
-												Use query instead of queryWith.

											
										
										
											2013-08-10 18:13:38 -07:00
+								instance Walkable Inline Element where
 								  walk f (Blk x) = Blk (walk f x)
 								  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
 								  walkM f (Blk x) = Blk `fmap` walkM f x
 								  walkM f (Sec lev nums attr ils elts) = do
 								    ils' <- walkM f ils
 								    elts' <- walkM f elts
 								    return $ Sec lev nums attr ils' elts'
 								  query f (Blk x) = query f x
 								  query f (Sec _ _ _ ils elts) = query f ils <> query f elts
 								instance Walkable Block Element where
 								  walk f (Blk x) = Blk (walk f x)
 								  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
 								  walkM f (Blk x) = Blk `fmap` walkM f x
 								  walkM f (Sec lev nums attr ils elts) = do
 								    ils' <- walkM f ils
 								    elts' <- walkM f elts
 								    return $ Sec lev nums attr ils' elts'
 								  query f (Blk x) = query f x
 								  query f (Sec _ _ _ ils elts) = query f ils <> query f elts
-												Shared:  Fixed uniqueIdent so it behaves as described in README.

Previously some characters that are illegal in HTML identifiers,
such as '<', were being allowed in header identifiers.  The logic
has now been fixed. Thanks to Xyne for reporting.

											
										
										
											2010-03-28 22:29:31 -07:00
+								-- | Convert Pandoc inline list to plain text identifier.  HTML
 								-- identifiers must start with a letter, and may contain only
-												Don't allow colon in autogenerated HTML identifiers.

They have a special meaning in XML (e.g. in EPUB).

											
										
										
											2010-07-04 23:26:04 -07:00
+								-- letters, digits, and the characters _-.
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								inlineListToIdentifier :: [Inline] -> String
-												Shared:  Fixed uniqueIdent so it behaves as described in README.

Previously some characters that are illegal in HTML identifiers,
such as '<', were being allowed in header identifiers.  The logic
has now been fixed. Thanks to Xyne for reporting.

											
										
										
											2010-03-28 22:29:31 -07:00
+								inlineListToIdentifier =
-												Shared: Use stringify to simplify inlineListToIdentifier.

											
										
										
											2010-12-19 10:13:36 -08:00
+								  dropWhile (not . isAlpha) . intercalate "-" . words .
 								    map (nbspToSp . toLower) .
-												Revert "Use -XNoImplicitPrelude and 'import Prelude' explicitly."

This reverts commit c423dbb5a34c2d1195020e0f0ca3aae883d0749b.

											
										
										
											2015-11-09 10:08:22 -08:00
+								    filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
-												Shared: Use stringify to simplify inlineListToIdentifier.

											
										
										
											2010-12-19 10:13:36 -08:00
+								    stringify
 								 where nbspToSp '\160'     =  ' '
 								       nbspToSp x          =  x
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
 								hierarchicalize :: [Block] -> [Element]
-												Revert "Shared.hierarchicalize: Don't number subsections of unnumbered sections."

This reverts commit 2a46042661a088096ac54097db5cd3674438bb63.

											
										
										
											2014-07-21 20:47:18 -07:00
+								hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
 								hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
 								hierarchicalizeWithIds [] = return []
 								hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
 								  lastnum <- S.get
 								  let lastnum' = take level lastnum
 								  let newnum = case length lastnum' of
 								                    x | "unnumbered" `elem` classes -> []
 								                      | x >= level -> init lastnum' ++ [last lastnum' + 1]
 								                      | otherwise -> lastnum ++
 								                           replicate (level - length lastnum - 1) 0 ++ [1]
 								  unless (null newnum) $ S.put newnum
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								  let (sectionContents, rest) = break (headerLtEq level) xs
-												Revert "Shared.hierarchicalize: Don't number subsections of unnumbered sections."

This reverts commit 2a46042661a088096ac54097db5cd3674438bb63.

											
										
										
											2014-07-21 20:47:18 -07:00
+								  sectionContents' <- hierarchicalizeWithIds sectionContents
 								  rest' <- hierarchicalizeWithIds rest
-												Shared:  Changed type of Element.

Sec now includes a field for Attr rather than just String
(the identifier).

Note, this is an API change.

											
										
										
											2013-02-12 20:13:23 -08:00
+								  return $ Sec level newnum attr title' sectionContents' : rest'
-												Changed hierarchicalize so it treats references div as top-level header.

Fixes a bug with `--section-divs`, where the final references section
added by pandoc-citeproc, enclosed in its own div, got put in the
div for the section previous to it.

This fixes #2294.  Longer term, we might think about how hierarchicalize
should interact with Div elements.

											
										
										
											2015-07-12 13:57:14 -07:00
+								hierarchicalizeWithIds ((Div ("",["references"],[])
 								                         (Header level (ident,classes,kvs) title' : xs)):ys) =
 								  hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs)
 								                           title') : (xs ++ ys))
-												Revert "Shared.hierarchicalize: Don't number subsections of unnumbered sections."

This reverts commit 2a46042661a088096ac54097db5cd3674438bb63.

											
										
										
											2014-07-21 20:47:18 -07:00
+								hierarchicalizeWithIds (x:rest) = do
 								  rest' <- hierarchicalizeWithIds rest
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								  return $ (Blk x) : rest'
 								headerLtEq :: Int -> Block -> Bool
-												Added Attr field to Header.

Previously header ids were autogenerated by the writers.
Now they are generated (unless supplied explicitly) in the
markdown parser, if the `header_identifiers` extension is
selected.

In addition, the textile reader now supports id attributes on
headers.

											
										
										
											2012-10-29 22:45:52 -07:00
+								headerLtEq level (Header l _ _) = l <= level
-												Changed hierarchicalize so it treats references div as top-level header.

Fixes a bug with `--section-divs`, where the final references section
added by pandoc-citeproc, enclosed in its own div, got put in the
div for the section previous to it.

This fixes #2294.  Longer term, we might think about how hierarchicalize
should interact with Div elements.

											
										
										
											2015-07-12 13:57:14 -07:00
+								headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _))  = l <= level
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								headerLtEq _ _ = False
-												Shared: Export uniqueIdent, don't allow tilde in identifier.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1894 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2010-03-16 06:45:52 +00:00
+								-- | Generate a unique identifier from a list of inlines.
 								-- Second argument is a list of already used identifiers.
-												Changed type of Shared.uniqueIdent argument from [String] to Set String.

This avoids performance problems in documents with many identically
named headers.

Closes #2671.

											
										
										
											2016-01-22 10:16:47 -08:00
+								uniqueIdent :: [Inline] -> Set.Set String -> String
-												Added odt reader

Fully implemented features:

* Paragraphs
* Headers
* Basic styling
* Unordered lists
* Ordered lists
* External Links
* Internal Links
* Footnotes, Endnotes
* Blockquotes

Partly implemented features:

* Citations
  Very basic, but pandoc can't do much more
* Tables
  No headers, no sizing, limited styling

											
										
										
											2015-07-23 09:06:14 +02:00
+								uniqueIdent title' usedIdents
 								  =  let baseIdent = case inlineListToIdentifier title' of
-												Shared:  Fixed uniqueIdent so it behaves as described in README.

Previously some characters that are illegal in HTML identifiers,
such as '<', were being allowed in header identifiers.  The logic
has now been fixed. Thanks to Xyne for reporting.

											
										
										
											2010-03-28 22:29:31 -07:00
+								                        ""   -> "section"
 								                        x    -> x
-												Added odt reader

Fully implemented features:

* Paragraphs
* Headers
* Basic styling
* Unordered lists
* Ordered lists
* External Links
* Internal Links
* Footnotes, Endnotes
* Blockquotes

Partly implemented features:

* Citations
  Very basic, but pandoc can't do much more
* Tables
  No headers, no sizing, limited styling

											
										
										
											2015-07-23 09:06:14 +02:00
+								         numIdent n = baseIdent ++ "-" ++ show n
-												Changed type of Shared.uniqueIdent argument from [String] to Set String.

This avoids performance problems in documents with many identically
named headers.

Closes #2671.

											
										
										
											2016-01-22 10:16:47 -08:00
+								     in  if baseIdent `Set.member` usedIdents
 								           then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								                  Just x  -> numIdent x
 								                  Nothing -> baseIdent   -- if we have more than 60,000, allow repeats
-												Added odt reader

Fully implemented features:

* Paragraphs
* Headers
* Basic styling
* Unordered lists
* Ordered lists
* External Links
* Internal Links
* Footnotes, Endnotes
* Blockquotes

Partly implemented features:

* Citations
  Very basic, but pandoc can't do much more
* Tables
  No headers, no sizing, limited styling

											
										
										
											2015-07-23 09:06:14 +02:00
+								           else baseIdent
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | True if block is a Header block.
 								isHeaderBlock :: Block -> Bool
-												Added Attr field to Header.

Previously header ids were autogenerated by the writers.
Now they are generated (unless supplied explicitly) in the
markdown parser, if the `header_identifiers` extension is
selected.

In addition, the textile reader now supports id attributes on
headers.

											
										
										
											2012-10-29 22:45:52 -07:00
+								isHeaderBlock (Header _ _ _) = True
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								isHeaderBlock _ = False
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
+								-- | Shift header levels up or down.
 								headerShift :: Int -> Pandoc -> Pandoc
-												Use walk, walkM in place of bottomUp, bottomUpM when possible.

They are significantly faster.

											
										
										
											2013-08-10 18:45:00 -07:00
+								headerShift n = walk shift
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
+								  where shift :: Block -> Block
-												Added Attr field to Header.

Previously header ids were autogenerated by the writers.
Now they are generated (unless supplied explicitly) in the
markdown parser, if the `header_identifiers` extension is
selected.

In addition, the textile reader now supports id attributes on
headers.

											
										
										
											2012-10-29 22:45:52 -07:00
+								        shift (Header level attr inner) = Header (level + n) attr inner
 								        shift x                         = x
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
-												Shared: export isTightList.

											
										
										
											2013-01-07 20:12:05 -08:00
+								-- | Detect if a list is tight.
 								isTightList :: [[Block]] -> Bool
-												HLint: Use all

Replace `and . map` with `all`.

											
										
										
											2013-12-19 17:06:27 -05:00
+								isTightList = all firstIsPlain
-												Shared: export isTightList.

											
										
										
											2013-01-07 20:12:05 -08:00
+								  where firstIsPlain (Plain _ : _) = True
 								        firstIsPlain _             = False
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								-- | Set a field of a 'Meta' object.  If the field already has a value,
 								-- convert it into a list with the new value appended to the old value(s).
 								addMetaField :: ToMetaValue a
 								             => String
 								             -> a
 								             -> Meta
 								             -> Meta
 								addMetaField key val (Meta meta) =
 								  Meta $ M.insertWith combine key (toMetaValue val) meta
-												Shared addMetaField:  if old and new values both lists, concatenate.

											
										
										
											2014-05-12 13:05:42 -07:00
+								  where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								        combine newval x             = MetaList [x, newval]
-												Shared addMetaField:  if old and new values both lists, concatenate.

											
										
										
											2014-05-12 13:05:42 -07:00
+								        tolist (MetaList ys)         = ys
 								        tolist y                     = [y]
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
 								-- | Create 'Meta' from old-style title, authors, date.  This is
 								-- provided to ease the transition from the old API.
 								makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
 								makeMeta title authors date =
 								      addMetaField "title" (B.fromList title)
 								    $ addMetaField "author" (map B.fromList authors)
 								    $ addMetaField "date" (B.fromList date)
 								    $ nullMeta
-												Added eastAsianLineBreakFilter to Shared.

This used to live in the Markdown reader.

											
										
										
											2017-05-30 10:22:48 +02:00
+								-- | Remove soft breaks between East Asian characters.
 								eastAsianLineBreakFilter :: Pandoc -> Pandoc
 								eastAsianLineBreakFilter = bottomUp go
 								  where go (x:SoftBreak:y:zs) =
 								         case (stringify x, stringify y) of
 								               (xs@(_:_), (c:_))
 								                 | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
 								               _ -> x:SoftBreak:y:zs
 								        go xs = xs
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
+								--
 								-- TagSoup HTML handling
 								--
 								-- | Render HTML tags.
 								renderTags' :: [Tag String] -> String
 								renderTags' = renderTagsOptions
-												HLint: use `elem` and `notElem`

Replaces long conditional chains with calls to `elem` and `notElem`.

											
										
										
											2013-12-19 20:19:24 -05:00
+								               renderOptions{ optMinimize = matchTags ["hr", "br", "img",
 								                                                       "meta", "link"]
 								                            , optRawTag   = matchTags ["script", "style"] }
 								              where matchTags = \tags -> flip elem tags . map toLower
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
-												Improved template handling:

+ Split template haskell functions into new module,
  Text.Pandoc.TH
+ Distinguish contentsOf and binaryContentsOf; the former
  uses text mode in Windows, while the latter uses binary mode


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1368 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-08-02 17:22:55 +00:00
+								--
 								-- File handling
 								--
-												Reworked Text.Pandoc.ODT to use zip-archive instead of calling external 'zip'.

+ Removed utf8-string and xml-light modules, and unneeded content.xml.
+ Removed code for building reference.odt from Setup.hs.
  The ODT is now built using template haskell in Text.Pandoc.ODT.
+ Removed copyright statements for utf8-string and xml modules,
  since they are no longer included in the source.
+ README: Removed claim that 'zip' is needed for ODT production.
+ Removed dependency on 'zip' from debian/control.
+ Text.Pandoc.Shared: Removed withTempDir, added inDirectory.
+ Added makeZip to Text.Pandoc.TH.
+ pandoc.cabal: Added dependencies on old-time, zip-archive, and utf8-string.
  Added markdown2pdf files to extra-sources list.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1417 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-09-04 02:51:28 +00:00
+								-- | Perform an IO action in a directory, returning to starting directory.
 								inDirectory :: FilePath -> IO a -> IO a
-												fix inDirectory to reset to the original directory in case an exception occurs

											
										
										
											2014-10-08 23:25:01 +02:00
+								inDirectory path action = E.bracket
 								                             getCurrentDirectory
 								                             setCurrentDirectory
 								                             (const $ setCurrentDirectory path >> action)
-												Removed TH module; refactored LaTeXMathML not to use TH.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1692 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-12-31 01:11:23 +00:00
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
+								--
 								-- Error reporting
 								--
-												Move utility error functions to Text.Pandoc.Shared

											
										
										
											2015-02-18 21:05:47 +00:00
+								mapLeft :: (a -> b) -> Either a c -> Either b c
 								mapLeft f (Left x) = Left (f x)
 								mapLeft _ (Right x) = Right x
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								-- | Remove intermediate "." and ".." directories from a path.
 								--
-												Fixed haddock comment.

											
										
										
											2014-08-13 13:59:18 -07:00
+								-- > collapseFilePath "./foo" == "foo"
 								-- > collapseFilePath "/bar/../baz" == "/baz"
 								-- > collapseFilePath "/../baz" == "/../baz"
 								-- > collapseFilePath "parent/foo/baz/../bar" ==  "parent/foo/bar"
 								-- > collapseFilePath "parent/foo/baz/../../bar" ==  "parent/bar"
 								-- > collapseFilePath "parent/foo/.." ==  "parent"
 								-- > collapseFilePath "/parent/foo/../../bar" ==  "/bar"
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								collapseFilePath :: FilePath -> FilePath
-												MediaBag:  ensure that / is always used as path separator.

											
										
										
											2015-09-26 22:40:58 -07:00
+								collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								  where
 								    go rs "." = rs
 								    go r@(p:rs) ".." = case p of
 								                            ".." -> ("..":r)
-												Shared: Make collapseFilePath OS-agnostic

											
										
										
											2014-09-25 12:42:53 +01:00
+								                            (checkPathSeperator -> Just True) -> ("..":r)
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								                            _ -> rs
-												MediaBag:  ensure that / is always used as path separator.

											
										
										
											2015-09-26 22:40:58 -07:00
+								    go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								    go rs x = x:rs
-												Shared: Make collapseFilePath OS-agnostic

											
										
										
											2014-09-25 12:42:53 +01:00
+								    isSingleton [] = Nothing
 								    isSingleton [x] = Just x
 								    isSingleton _ = Nothing
 								    checkPathSeperator = fmap isPathSeparator . isSingleton
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
+								--
 								-- File selection from the archive
 								--
 								filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, BL.ByteString)]
 								filteredFilesFromArchive zf f =
 								  mapMaybe (fileAndBinary zf) (filter f (filesInArchive zf))
 								  where
 								    fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
-												Use bind function instead of pattern matching

											
										
										
											2016-10-17 16:58:53 +02:00
+								    fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
-												Shared: Provide custom isURI that rejects unknown schemes [isURI]

We also export the set of known `schemes`.

The new function replaces the function of the same name
from `Network.URI`, as the latter did not check whether a scheme is
well-known.  E.g. MediaWiki wikis frequently feature pages with names
like `User:John`. These links were interpreted as URIs, thus turning
internal links into global links. This is prevented by also checking
whether the scheme of a URI is frequently used (i.e. is IANA registered
or an otherwise well-known scheme).

Fixes: #2713

Update set of well-known URIs from IANA list
All official IANA schemes (as of 2017-05-22) are included in the set of
known schemes.  The four non-official schemes doi, isbn, javascript, and
pmid are kept.

											
										
										
											2017-05-23 09:48:11 +02:00
 								--
 								-- IANA URIs
 								--
 								-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus
 								-- the unofficial schemes doi, javascript, isbn, pmid.
 								schemes :: Set.Set String
 								schemes = Set.fromList
 								  -- Official IANA schemes
 								  [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs"
 								  , "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin"
 								  , "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension"
 								  , "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs"
 								  , "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle"
 								  , "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed"
 								  , "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg"
 								  , "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham"
 								  , "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon"
 								  , "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6"
 								  , "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs"
 								  , "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap"
 								  , "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market"
 								  , "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access"
 								  , "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel"
 								  , "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath"
 								  , "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint"
 								  , "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller"
 								  , "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode"
 								  , "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular"
 								  , "ms-settings-cloudstorage", "ms-settings-connectabledevices"
 								  , "ms-settings-displays-topology", "ms-settings-emailandaccounts"
 								  , "ms-settings-language", "ms-settings-location", "ms-settings-lock"
 								  , "ms-settings-nfctransactions", "ms-settings-notifications"
 								  , "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity"
 								  , "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace"
 								  , "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad"
 								  , "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word"
 								  , "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs"
 								  , "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd"
 								  , "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop"
 								  , "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis"
 								  , "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp"
 								  , "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn"
 								  , "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews"
 								  , "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam"
 								  , "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid"
 								  , "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn"
 								  , "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi"
 								  , "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid"
 								  , "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire"
 								  , "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r"
 								  , "z39.50s"
 								  -- Inofficial schemes
 								  , "doi", "isbn", "javascript", "pmid"
 								  ]
 								-- | Check if the string is a valid URL with a IANA or frequently used but
 								-- unofficial scheme (see @schemes@).
 								isURI :: String -> Bool
 								isURI = maybe False hasKnownScheme . parseURI
 								  where
-												Shared.isURI: allow uppercase versions of known schemes.

											
										
										
											2017-05-23 09:49:56 +02:00
+								    hasKnownScheme = (`Set.member` schemes) . map toLower .
 								                     filter (/= ':') . uriScheme
-												Shared: Provide custom isURI that rejects unknown schemes [isURI]

We also export the set of known `schemes`.

The new function replaces the function of the same name
from `Network.URI`, as the latter did not check whether a scheme is
well-known.  E.g. MediaWiki wikis frequently feature pages with names
like `User:John`. These links were interpreted as URIs, thus turning
internal links into global links. This is prevented by also checking
whether the scheme of a URI is frequently used (i.e. is IANA registered
or an otherwise well-known scheme).

Fixes: #2713

Update set of well-known URIs from IANA list
All official IANA schemes (as of 2017-05-22) are included in the set of
known schemes.  The four non-official schemes doi, isbn, javascript, and
pmid are kept.

											
										
										
											2017-05-23 09:48:11 +02:00
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								---
 								--- Squash blocks into inlines
 								---
 								blockToInlines :: Block -> [Inline]
 								blockToInlines (Plain ils) = ils
 								blockToInlines (Para ils) = ils
-												Shared: add function combining lines using LineBreak

The `linesToBlock` function takes a list of lines and combines them by appending
a hard `LineBreak` to each line and concatenating the result, putting the result
it into a `Para`. This is most useful when dealing when converting `LineBlock`
elements.

											
										
										
											2016-10-13 08:46:38 +02:00
+								blockToInlines (LineBlock lns) = combineLines lns
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								blockToInlines (CodeBlock attr str) = [Code attr str]
 								blockToInlines (RawBlock fmt str) = [RawInline fmt str]
-												Shared: Add BlockQuote to blocksToInlines

											
										
										
											2016-06-22 13:41:53 -04:00
+								blockToInlines (BlockQuote blks) = blocksToInlines blks
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								blockToInlines (OrderedList _ blkslst) =
 								  concatMap blocksToInlines blkslst
 								blockToInlines (BulletList blkslst) =
 								  concatMap blocksToInlines blkslst
 								blockToInlines (DefinitionList pairslst) =
 								  concatMap f pairslst
 								  where
 								    f (ils, blkslst) = ils ++
 								      [Str ":", Space] ++
 								      (concatMap blocksToInlines blkslst)
 								blockToInlines (Header _ _  ils) = ils
 								blockToInlines (HorizontalRule) = []
 								blockToInlines (Table _ _ _ headers rows) =
 								  intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl
 								  where
 								    tbl = headers : rows
 								blockToInlines (Div _ blks) = blocksToInlines blks
 								blockToInlines Null = []
 								blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline]
 								blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks
 								blocksToInlines :: [Block] -> [Inline]
 								blocksToInlines = blocksToInlinesWithSep [Space, Str "¶", Space]
-												Rename README to MANUAL.txt

											
										
										
											2016-07-20 14:12:57 +02:00
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								--
 								-- Safe read
 								--
-												Make safeRead safe.

Fixes #1801

											
										
										
											2015-02-18 18:40:36 +00:00
+								safeRead :: (MonadPlus m, Read a) => String -> m a
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								safeRead s = case reads s of
-												Removed `--strict`, added extensions to writer/reader names.

* The `--strict` option has been removed.
* Instead of using `--strict`, one can now use `strict` instead of
  `markdown` as an input or output format name.
* The `--enable` and `--disable` optinos have been removed.
* It is now possible to enable or disable specific extensions
  by appending them (with '+' or '-') to the writer or reader
  name.  For example `pandoc -f markdown-footnotes+hard_line_breaks`.
* The lhs extensions are now implemented this way, too; you can
  use either `+lhs` or `+literate_haskell`.

											
										
										
											2012-08-09 20:19:06 -07:00
+								                  (d,x):_
 								                    | all isSpace x -> return d
-												Make safeRead safe.

Fixes #1801

											
										
										
											2015-02-18 18:40:36 +00:00
+								                  _                 -> mzero
-												Moved withTempDir from PDF to Shared, export from Shared.

API change.

											
										
										
											2014-07-30 12:29:04 -07:00
 								--
 								-- Temp directory
 								--
 								withTempDir :: String -> (FilePath -> IO a) -> IO a
 								withTempDir =
 								#ifdef _WINDOWS
 								  withTempDirectory "."
 								#else
 								  withSystemTempDirectory
 								#endif