pandoc/src/Text/Pandoc/Shared.hs

{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE CPP                   #-}
{-# LANGUAGE DeriveDataTypeable    #-}
{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables   #-}
{-# LANGUAGE ViewPatterns          #-}
{-# LANGUAGE FlexibleInstances  #-}
{-# LANGUAGE TypeSynonymInstances  #-}
{- |
   Module      : Text.Pandoc.Shared
   Copyright   : Copyright (C) 2006-2019 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Utility functions and definitions used by the various Pandoc modules.
-}
module Text.Pandoc.Shared (
                     -- * List processing
                     splitBy,
                     splitByIndices,
                     splitStringByIndices,
                     substitute,
                     ordNub,
                     -- * Text processing
                     ToString (..),
                     backslashEscapes,
                     escapeStringUsing,
                     stripTrailingNewlines,
                     trim,
                     triml,
                     trimr,
                     trimMath,
                     stripFirstAndLast,
                     camelCaseToHyphenated,
                     toRomanNumeral,
                     escapeURI,
                     tabFilter,
                     crFilter,
                     -- * Date/time
                     normalizeDate,
                     -- * Pandoc block and inline list processing
                     orderedListMarkers,
                     extractSpaces,
                     removeFormatting,
                     deNote,
                     stringify,
                     capitalize,
                     compactify,
                     compactifyDL,
                     linesToPara,
                     Element (..),
                     hierarchicalize,
                     uniqueIdent,
                     inlineListToIdentifier,
                     isHeaderBlock,
                     headerShift,
                     stripEmptyParagraphs,
                     onlySimpleTableCells,
                     isTightList,
                     taskListItemFromAscii,
                     taskListItemToAscii,
                     addMetaField,
                     makeMeta,
                     eastAsianLineBreakFilter,
                     underlineSpan,
                     splitSentences,
                     filterIpynbOutput,
                     -- * TagSoup HTML handling
                     renderTags',
                     -- * File handling
                     inDirectory,
                     collapseFilePath,
                     uriPathToPath,
                     filteredFilesFromArchive,
                     -- * URI handling
                     schemes,
                     isURI,
                     -- * Error handling
                     mapLeft,
                     -- * for squashing blocks
                     blocksToInlines,
                     blocksToInlines',
                     blocksToInlinesWithSep,
                     defaultBlocksSeparator,
                     -- * Safe read
                     safeRead,
                     -- * User data directory
                     defaultUserDataDirs,
                     -- * Version
                     pandocVersion
                    ) where

import Prelude
import Codec.Archive.Zip
import qualified Control.Exception as E
import Control.Monad (MonadPlus (..), msum, unless)
import qualified Control.Monad.State.Strict as S
import qualified Data.ByteString.Lazy as BL
import qualified Data.Bifunctor as Bifunctor
import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum,
                  generalCategory, GeneralCategory(NonSpacingMark,
                  SpacingCombiningMark, EnclosingMark, ConnectorPunctuation))
import Data.Data (Data, Typeable)
import Data.List (find, intercalate, intersperse, stripPrefix, sortBy)
import Data.Ord (comparing)
import qualified Data.Map as M
import Data.Maybe (mapMaybe)
import Data.Monoid (Any (..))
import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr)
import qualified Data.Set as Set
import qualified Data.Text as T
import Data.Version (showVersion)
import Network.URI (URI (uriScheme), escapeURIString, parseURI)
import Paths_pandoc (version)
import System.Directory
import System.FilePath (isPathSeparator, splitDirectories)
import qualified System.FilePath.Posix as Posix
import Text.HTML.TagSoup (RenderOptions (..), Tag (..), renderOptions,
                          renderTagsOptions)
import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue (..))
import qualified Text.Pandoc.Builder as B
import Data.Time
import Text.Pandoc.Asciify (toAsciiChar)
import Text.Pandoc.Definition
import Text.Pandoc.Extensions (Extensions, Extension(..), extensionEnabled)
import Text.Pandoc.Generic (bottomUp)
import Text.DocLayout (charWidth)
import Text.Pandoc.Walk

-- | Version number of pandoc library.
pandocVersion :: String
pandocVersion = showVersion version

--
-- List processing
--

-- | Split list by groups of one or more sep.
splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy _ [] = []
splitBy isSep lst =
  let (first, rest) = break isSep lst
      rest'         = dropWhile isSep rest
  in  first:splitBy isSep rest'

splitByIndices :: [Int] -> [a] -> [[a]]
splitByIndices [] lst = [lst]
splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x)  xs) rest
  where (first, rest) = splitAt x lst

-- | Split string into chunks divided at specified indices.
splitStringByIndices :: [Int] -> [Char] -> [[Char]]
splitStringByIndices [] lst = [lst]
splitStringByIndices (x:xs) lst =
  let (first, rest) = splitAt' x lst in
  first : splitStringByIndices (map (\y -> y - x) xs) rest

splitAt' :: Int -> [Char] -> ([Char],[Char])
splitAt' _ []          = ([],[])
splitAt' n xs | n <= 0 = ([],xs)
splitAt' n (x:xs)      = (x:ys,zs)
  where (ys,zs) = splitAt' (n - charWidth x) xs

-- | Replace each occurrence of one sublist in a list with another.
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
substitute _ _ [] = []
substitute [] _ xs = xs
substitute target replacement lst@(x:xs) =
    case stripPrefix target lst of
      Just lst' -> replacement ++ substitute target replacement lst'
      Nothing   -> x : substitute target replacement xs

ordNub :: (Ord a) => [a] -> [a]
ordNub l = go Set.empty l
  where
    go _ [] = []
    go s (x:xs) = if x `Set.member` s then go s xs
                                      else x : go (Set.insert x s) xs

--
-- Text processing
--

class ToString a where
  toString :: a -> String

instance ToString String where
  toString = id

instance ToString T.Text where
  toString = T.unpack

-- | Returns an association list of backslash escapes for the
-- designated characters.
backslashEscapes :: [Char]    -- ^ list of special characters to escape
                 -> [(Char, String)]
backslashEscapes = map (\ch -> (ch, ['\\',ch]))

-- | Escape a string of characters, using an association list of
-- characters and strings.
escapeStringUsing :: [(Char, String)] -> String -> String
escapeStringUsing _ [] = ""
escapeStringUsing escapeTable (x:xs) =
  case lookup x escapeTable of
       Just str -> str ++ rest
       Nothing  -> x:rest
  where rest = escapeStringUsing escapeTable xs

-- | Strip trailing newlines from string.
stripTrailingNewlines :: String -> String
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse

-- | Remove leading and trailing space (including newlines) from string.
trim :: String -> String
trim = triml . trimr

-- | Remove leading space (including newlines) from string.
triml :: String -> String
triml = dropWhile (`elem` " \r\n\t")

-- | Remove trailing space (including newlines) from string.
trimr :: String -> String
trimr = reverse . triml . reverse

-- | Trim leading space and trailing space unless after \.
trimMath :: String -> String
trimMath = triml . reverse . stripspace . reverse
  where
  stripspace (c1:c2:cs)
    | c1  `elem` [' ','\t','\n','\r']
    , c2 /= '\\' = stripspace (c2:cs)
  stripspace cs = cs

-- | Strip leading and trailing characters from string
stripFirstAndLast :: String -> String
stripFirstAndLast str =
  drop 1 $ take (length str - 1) str

-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
camelCaseToHyphenated :: String -> String
camelCaseToHyphenated [] = ""
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
  a:'-':toLower b:camelCaseToHyphenated rest
camelCaseToHyphenated (a:rest) = toLower a:camelCaseToHyphenated rest

-- | Convert number < 4000 to uppercase roman numeral.
toRomanNumeral :: Int -> String
toRomanNumeral x
  | x >= 4000 || x < 0 = "?"
  | x >= 1000 = "M" ++ toRomanNumeral (x - 1000)
  | x >= 900  = "CM" ++ toRomanNumeral (x - 900)
  | x >= 500  = "D" ++ toRomanNumeral (x - 500)
  | x >= 400  = "CD" ++ toRomanNumeral (x - 400)
  | x >= 100  = "C" ++ toRomanNumeral (x - 100)
  | x >= 90   = "XC" ++ toRomanNumeral (x - 90)
  | x >= 50   = "L"  ++ toRomanNumeral (x - 50)
  | x >= 40   = "XL" ++ toRomanNumeral (x - 40)
  | x >= 10   = "X" ++ toRomanNumeral (x - 10)
  | x == 9    = "IX"
  | x >= 5    = "V" ++ toRomanNumeral (x - 5)
  | x == 4    = "IV"
  | x >= 1    = "I" ++ toRomanNumeral (x - 1)
  | otherwise = ""

-- | Escape whitespace and some punctuation characters in URI.
escapeURI :: String -> String
escapeURI = escapeURIString (not . needsEscaping)
  where needsEscaping c = isSpace c || c `elem`
                           ['<','>','|','"','{','}','[',']','^', '`']

-- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0.
tabFilter :: Int       -- ^ Tab stop
          -> T.Text    -- ^ Input
          -> T.Text
tabFilter 0 = id
tabFilter tabStop = T.unlines . map go . T.lines
  where go s =
         let (s1, s2) = T.break (== '\t') s
         in  if T.null s2
                then s1
                else s1 <> T.replicate
                       (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
                       <> go (T.drop 1 s2)

-- | Strip out DOS line endings.
crFilter :: T.Text -> T.Text
crFilter = T.filter (/= '\r')

--
-- Date/time
--

-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We
-- limit years to the range 1601-9999 (ISO 8601 accepts greater than
-- or equal to 1583, but MS Word only accepts dates starting 1601).
normalizeDate :: String -> Maybe String
normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
  (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
  where rejectBadYear day = case toGregorian day of
          (y, _, _) | y >= 1601 && y <= 9999 -> Just day
          _         -> Nothing
        parsetimeWith = parseTimeM True defaultTimeLocale
        formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
                    "%e %B %Y", "%b. %e, %Y", "%B %e, %Y",
                    "%Y%m%d", "%Y%m", "%Y"]

--
-- Pandoc block and inline list processing
--

-- | Generate infinite lazy list of markers for an ordered list,
-- depending on list attributes.
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
orderedListMarkers (start, numstyle, numdelim) =
  let singleton c = [c]
      nums = case numstyle of
                     DefaultStyle -> map show [start..]
                     Example      -> map show [start..]
                     Decimal      -> map show [start..]
                     UpperAlpha   -> drop (start - 1) $ cycle $
                                     map singleton ['A'..'Z']
                     LowerAlpha   -> drop (start - 1) $ cycle $
                                     map singleton ['a'..'z']
                     UpperRoman   -> map toRomanNumeral [start..]
                     LowerRoman   -> map (map toLower . toRomanNumeral) [start..]
      inDelim str = case numdelim of
                            DefaultDelim -> str ++ "."
                            Period       -> str ++ "."
                            OneParen     -> str ++ ")"
                            TwoParens    -> "(" ++ str ++ ")"
  in  map inDelim nums

-- | Extract the leading and trailing spaces from inside an inline element
-- and place them outside the element.  SoftBreaks count as Spaces for
-- these purposes.
extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
extractSpaces f is =
  let contents = B.unMany is
      left  = case viewl contents of
                    (Space :< _)     -> B.space
                    (SoftBreak :< _) -> B.softbreak
                    _                -> mempty
      right = case viewr contents of
                    (_ :> Space)     -> B.space
                    (_ :> SoftBreak) -> B.softbreak
                    _                -> mempty in
  (left <> f (B.trimInlines . B.Many $ contents) <> right)

-- | Extract inlines, removing formatting.
removeFormatting :: Walkable Inline a => a -> [Inline]
removeFormatting = query go . walk (deNote . deQuote)
  where go :: Inline -> [Inline]
        go (Str xs)   = [Str xs]
        go Space      = [Space]
        go SoftBreak  = [SoftBreak]
        go (Code _ x) = [Str x]
        go (Math _ x) = [Str x]
        go LineBreak  = [Space]
        go _          = []

deNote :: Inline -> Inline
deNote (Note _) = Str ""
deNote x        = x

deQuote :: Inline -> Inline
deQuote (Quoted SingleQuote xs) =
  Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"])
deQuote (Quoted DoubleQuote xs) =
  Span ("",[],[]) (Str "\8220" : xs ++ [Str "\8221"])
deQuote x = x

-- | Convert pandoc structure to a string with formatting removed.
-- Footnotes are skipped (since we don't want their contents in link
-- labels).
stringify :: Walkable Inline a => a -> String
stringify = query go . walk (deNote . deQuote)
  where go :: Inline -> [Char]
        go Space                                       = " "
        go SoftBreak                                   = " "
        go (Str x)                                     = x
        go (Code _ x)                                  = x
        go (Math _ x)                                  = x
        go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
        go LineBreak                                   = " "
        go _                                           = ""

-- | Bring all regular text in a pandoc structure to uppercase.
--
-- This function correctly handles cases where a lowercase character doesn't
-- match to a single uppercase character – e.g. “Straße” would be converted
-- to “STRASSE”, not “STRAßE”.
capitalize :: Walkable Inline a => a -> a
capitalize = walk go
  where go :: Inline -> Inline
        go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
        go x       = x

-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks.  Otherwise (if the list items contain @Para@
-- blocks besides possibly at the end), turn any @Plain@s into @Para@s (#5285).
compactify :: [Blocks]  -- ^ List of list items (each a list of blocks)
           -> [Blocks]
compactify [] = []
compactify items =
  let (others, final) = (init items, last items)
  in  case reverse (B.toList final) of
           (Para a:xs)
             | null [Para x | Para x <- (xs ++ concatMap B.toList others)]
             -> others ++ [B.fromList (reverse (Plain a : xs))]
           _ | null [Para x | Para x <- concatMap B.toList items]
             -> items
           _ -> map (fmap plainToPara) items

plainToPara :: Block -> Block
plainToPara (Plain ils) = Para ils
plainToPara x = x

-- | Like @compactify@, but acts on items of definition lists.
compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
compactifyDL items =
  let defs = concatMap snd items
  in  case reverse (concatMap B.toList defs) of
           (Para x:xs)
             | not (any isPara xs) ->
                   let (t,ds) = last items
                       lastDef = B.toList $ last ds
                       ds' = init ds ++
                             if null lastDef
                                then [B.fromList lastDef]
                                else [B.fromList $ init lastDef ++ [Plain x]]
                    in init items ++ [(t, ds')]
             | otherwise           -> items
           _                       -> items

-- | Combine a list of lines by adding hard linebreaks.
combineLines :: [[Inline]] -> [Inline]
combineLines = intercalate [LineBreak]

-- | Convert a list of lines into a paragraph with hard line breaks. This is
--   useful e.g. for rudimentary support of LineBlock elements in writers.
linesToPara :: [[Inline]] -> Block
linesToPara = Para . combineLines

isPara :: Block -> Bool
isPara (Para _) = True
isPara _        = False

-- | Data structure for defining hierarchical Pandoc documents
data Element = Blk Block
             | Sec Int [Int] Attr [Inline] [Element]
             --    lvl  num attributes label    contents
             deriving (Eq, Read, Show, Typeable, Data)

instance Walkable Inline Element where
  walk f (Blk x) = Blk (walk f x)
  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
  walkM f (Blk x) = Blk `fmap` walkM f x
  walkM f (Sec lev nums attr ils elts) = do
    ils' <- walkM f ils
    elts' <- walkM f elts
    return $ Sec lev nums attr ils' elts'
  query f (Blk x)              = query f x
  query f (Sec _ _ _ ils elts) = query f ils `mappend` query f elts

instance Walkable Block Element where
  walk f (Blk x) = Blk (walk f x)
  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
  walkM f (Blk x) = Blk `fmap` walkM f x
  walkM f (Sec lev nums attr ils elts) = do
    ils' <- walkM f ils
    elts' <- walkM f elts
    return $ Sec lev nums attr ils' elts'
  query f (Blk x)              = query f x
  query f (Sec _ _ _ ils elts) = query f ils `mappend` query f elts

-- | Convert Pandoc inline list to plain text identifier.  HTML
-- identifiers must start with a letter, and may contain only
-- letters, digits, and the characters _-.
inlineListToIdentifier :: Extensions -> [Inline] -> String
inlineListToIdentifier exts =
  dropNonLetter . filterAscii . toIdent . stringify
  where
    dropNonLetter
      | extensionEnabled Ext_gfm_auto_identifiers exts = id
      | otherwise = dropWhile (not . isAlpha)
    filterAscii
      | extensionEnabled Ext_ascii_identifiers exts
        = mapMaybe toAsciiChar
      | otherwise = id
    toIdent
      | extensionEnabled Ext_gfm_auto_identifiers exts =
        filterPunct . spaceToDash . map toLower
      | otherwise = intercalate "-" . words . filterPunct . map toLower
    filterPunct = filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c)
    isAllowedPunct c
      | extensionEnabled Ext_gfm_auto_identifiers exts
        = c == '-' || c == '_' ||
          generalCategory c `elem` [NonSpacingMark, SpacingCombiningMark,
                                    EnclosingMark, ConnectorPunctuation]
      | otherwise = c == '_' || c == '-' || c == '.'
    spaceToDash = map (\c -> if isSpace c then '-' else c)

-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
hierarchicalize :: [Block] -> [Element]
hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []

hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
hierarchicalizeWithIds [] = return []
hierarchicalizeWithIds (Header level attr@(_,classes,_) title':xs) = do
  lastnum <- S.get
  let lastnum' = take level lastnum
  let newnum = case length lastnum' of
                    x | "unnumbered" `elem` classes -> []
                      | x >= level -> init lastnum' ++ [last lastnum' + 1]
                      | otherwise -> lastnum ++
                           replicate (level - length lastnum - 1) 0 ++ [1]
  unless (null newnum) $ S.put newnum
  let (sectionContents, rest) = break (headerLtEq level) xs
  sectionContents' <- hierarchicalizeWithIds sectionContents
  rest' <- hierarchicalizeWithIds rest
  return $ Sec level newnum attr title' sectionContents' : rest'
hierarchicalizeWithIds (Div ("refs",classes',kvs')
                         (Header level (ident,classes,kvs) title' : xs):ys) =
  hierarchicalizeWithIds (Header level (ident,"references":classes,kvs)
                           title' : Div ("refs",classes',kvs') xs : ys)
hierarchicalizeWithIds (x:rest) = do
  rest' <- hierarchicalizeWithIds rest
  return $ Blk x : rest'

headerLtEq :: Int -> Block -> Bool
headerLtEq level (Header l _ _)                                  = l <= level
headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level
headerLtEq _ _                                                   = False

-- | Generate a unique identifier from a list of inlines.
-- Second argument is a list of already used identifiers.
uniqueIdent :: Extensions -> [Inline] -> Set.Set String -> String
uniqueIdent exts title' usedIdents =
  if baseIdent `Set.member` usedIdents
     then case find (\x -> not $ numIdent x `Set.member` usedIdents)
               ([1..60000] :: [Int]) of
            Just x  -> numIdent x
            Nothing -> baseIdent
            -- if we have more than 60,000, allow repeats
     else baseIdent
  where
    baseIdent = case inlineListToIdentifier exts title' of
                     "" -> "section"
                     x  -> x
    numIdent n = baseIdent ++ "-" ++ show n

-- | True if block is a Header block.
isHeaderBlock :: Block -> Bool
isHeaderBlock Header{} = True
isHeaderBlock _        = False

-- | Shift header levels up or down.
headerShift :: Int -> Pandoc -> Pandoc
headerShift n = walk shift
  where shift :: Block -> Block
        shift (Header level attr inner) = Header (level + n) attr inner
        shift x                         = x

-- | Remove empty paragraphs.
stripEmptyParagraphs :: Pandoc -> Pandoc
stripEmptyParagraphs = walk go
  where go :: [Block] -> [Block]
        go = filter (not . isEmptyParagraph)
        isEmptyParagraph (Para []) = True
        isEmptyParagraph _         = False

-- | Detect if table rows contain only cells consisting of a single
-- paragraph that has no @LineBreak@.
onlySimpleTableCells :: [[TableCell]] -> Bool
onlySimpleTableCells = all isSimpleCell . concat
  where
    isSimpleCell [Plain ils] = not (hasLineBreak ils)
    isSimpleCell [Para ils ] = not (hasLineBreak ils)
    isSimpleCell []          = True
    isSimpleCell _           = False
    hasLineBreak = getAny . query isLineBreak
    isLineBreak LineBreak = Any True
    isLineBreak _         = Any False

-- | Detect if a list is tight.
isTightList :: [[Block]] -> Bool
isTightList = all firstIsPlain
  where firstIsPlain (Plain _ : _) = True
        firstIsPlain _             = False

-- | Convert a list item containing tasklist syntax (e.g. @[x]@)
-- to using @U+2610 BALLOT BOX@ or @U+2612 BALLOT BOX WITH X@.
taskListItemFromAscii :: Extensions -> [Block] -> [Block]
taskListItemFromAscii = handleTaskListItem fromMd
  where
    fromMd (Str "[" : Space : Str "]" : Space : is) = (Str "☐") : Space : is
    fromMd (Str "[x]"                 : Space : is) = (Str "☒") : Space : is
    fromMd (Str "[X]"                 : Space : is) = (Str "☒") : Space : is
    fromMd is = is

-- | Convert a list item containing text starting with @U+2610 BALLOT BOX@
-- or @U+2612 BALLOT BOX WITH X@ to tasklist syntax (e.g. @[x]@).
taskListItemToAscii :: Extensions -> [Block] -> [Block]
taskListItemToAscii = handleTaskListItem toMd
  where
    toMd (Str "☐" : Space : is) = rawMd "[ ]" : Space : is
    toMd (Str "☒" : Space : is) = rawMd "[x]" : Space : is
    toMd is = is
    rawMd = RawInline (Format "markdown")

handleTaskListItem :: ([Inline] -> [Inline]) -> Extensions -> [Block] -> [Block]
handleTaskListItem handleInlines exts bls =
  if Ext_task_lists `extensionEnabled` exts
  then handleItem bls
  else bls
  where
    handleItem (Plain is : bs) = Plain (handleInlines is) : bs
    handleItem (Para is  : bs) = Para  (handleInlines is) : bs
    handleItem bs = bs

-- | Set a field of a 'Meta' object.  If the field already has a value,
-- convert it into a list with the new value appended to the old value(s).
addMetaField :: ToMetaValue a
             => String
             -> a
             -> Meta
             -> Meta
addMetaField key val (Meta meta) =
  Meta $ M.insertWith combine key (toMetaValue val) meta
  where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
        combine newval x             = MetaList [x, newval]
        tolist (MetaList ys) = ys
        tolist y             = [y]

-- | Create 'Meta' from old-style title, authors, date.  This is
-- provided to ease the transition from the old API.
makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
makeMeta title authors date =
      addMetaField "title" (B.fromList title)
    $ addMetaField "author" (map B.fromList authors)
    $ addMetaField "date" (B.fromList date) nullMeta

-- | Remove soft breaks between East Asian characters.
eastAsianLineBreakFilter :: Pandoc -> Pandoc
eastAsianLineBreakFilter = bottomUp go
  where go (x:SoftBreak:y:zs) =
         case (stringify x, stringify y) of
               (xs@(_:_), c:_)
                 | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
               _ -> x:SoftBreak:y:zs
        go xs = xs

-- | Builder for underline.
-- This probably belongs in Builder.hs in pandoc-types.
-- Will be replaced once Underline is an element.
underlineSpan :: Inlines -> Inlines
underlineSpan = B.spanWith ("", ["underline"], [])

-- | Returns the first sentence in a list of inlines, and the rest.
breakSentence :: [Inline] -> ([Inline], [Inline])
breakSentence [] = ([],[])
breakSentence xs =
  let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True
      isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True
      isSentenceEndInline LineBreak      = True
      isSentenceEndInline _              = False
      (as, bs) = break isSentenceEndInline xs
  in  case bs of
           []             -> (as, [])
           [c]            -> (as ++ [c], [])
           (c:Space:cs)   -> (as ++ [c], cs)
           (c:SoftBreak:cs) -> (as ++ [c], cs)
           (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs)
           (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs)
           (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs)
           (c:cs)         -> (as ++ [c] ++ ds, es)
              where (ds, es) = breakSentence cs

-- | Split a list of inlines into sentences.
splitSentences :: [Inline] -> [[Inline]]
splitSentences xs =
  let (sent, rest) = breakSentence xs
  in  if null rest then [sent] else sent : splitSentences rest

-- | Process ipynb output cells.  If mode is Nothing,
-- remove all output.  If mode is Just format, select
-- best output for the format.  If format is not ipynb,
-- strip out ANSI escape sequences from CodeBlocks (see #5633).
filterIpynbOutput :: Maybe Format -> Pandoc -> Pandoc
filterIpynbOutput mode = walk go
  where go (Div (ident, ("output":os), kvs) bs) =
          case mode of
            Nothing  -> Div (ident, ("output":os), kvs) []
            -- "best" for ipynb includes all formats:
            Just fmt
              | fmt == Format "ipynb"
                          -> Div (ident, ("output":os), kvs) bs
              | otherwise -> Div (ident, ("output":os), kvs) $
                              walk removeANSI $
                              take 1 $ sortBy (comparing rank) bs
                 where
                  rank (RawBlock (Format "html") _)
                    | fmt == Format "html" = (1 :: Int)
                    | fmt == Format "markdown" = 2
                    | otherwise = 3
                  rank (RawBlock (Format "latex") _)
                    | fmt == Format "latex" = 1
                    | fmt == Format "markdown" = 2
                    | otherwise = 3
                  rank (RawBlock f _)
                    | fmt == f = 1
                    | otherwise = 3
                  rank (Para [Image{}]) = 1
                  rank _ = 2
                  removeANSI (CodeBlock attr code) =
                    CodeBlock attr (removeANSIEscapes code)
                  removeANSI x = x
                  removeANSIEscapes [] = []
                  removeANSIEscapes ('\x1b':'[':cs) =
                    removeANSIEscapes (drop 1 $ dropWhile (/='m') cs)
                  removeANSIEscapes (c:cs) = c : removeANSIEscapes cs
        go x = x

--
-- TagSoup HTML handling
--

-- | Render HTML tags.
renderTags' :: [Tag String] -> String
renderTags' = renderTagsOptions
               renderOptions{ optMinimize = matchTags ["hr", "br", "img",
                                                       "meta", "link"]
                            , optRawTag   = matchTags ["script", "style"] }
              where matchTags tags = flip elem tags . map toLower

--
-- File handling
--

-- | Perform an IO action in a directory, returning to starting directory.
inDirectory :: FilePath -> IO a -> IO a
inDirectory path action = E.bracket
                             getCurrentDirectory
                             setCurrentDirectory
                             (const $ setCurrentDirectory path >> action)

--
-- Error reporting
--

mapLeft :: (a -> b) -> Either a c -> Either b c
mapLeft = Bifunctor.first

-- | Remove intermediate "." and ".." directories from a path.
--
-- > collapseFilePath "./foo" == "foo"
-- > collapseFilePath "/bar/../baz" == "/baz"
-- > collapseFilePath "/../baz" == "/../baz"
-- > collapseFilePath "parent/foo/baz/../bar" ==  "parent/foo/bar"
-- > collapseFilePath "parent/foo/baz/../../bar" ==  "parent/bar"
-- > collapseFilePath "parent/foo/.." ==  "parent"
-- > collapseFilePath "/parent/foo/../../bar" ==  "/bar"
collapseFilePath :: FilePath -> FilePath
collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
  where
    go rs "." = rs
    go r@(p:rs) ".." = case p of
                            ".."                              -> "..":r
                            (checkPathSeperator -> Just True) -> "..":r
                            _                                 -> rs
    go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
    go rs x = x:rs
    isSingleton []  = Nothing
    isSingleton [x] = Just x
    isSingleton _   = Nothing
    checkPathSeperator = fmap isPathSeparator . isSingleton

-- Convert the path part of a file: URI to a regular path.
-- On windows, @/c:/foo@ should be @c:/foo@.
-- On linux, @/foo@ should be @/foo@.
uriPathToPath :: String -> FilePath
uriPathToPath path =
#ifdef _WINDOWS
  case path of
    '/':ps -> ps
    ps     -> ps
#else
  path
#endif

--
-- File selection from the archive
--
filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, BL.ByteString)]
filteredFilesFromArchive zf f =
  mapMaybe (fileAndBinary zf) (filter f (filesInArchive zf))
  where
    fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
    fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)


--
-- IANA URIs
--

-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus
-- the unofficial schemes doi, javascript, isbn, pmid.
schemes :: Set.Set String
schemes = Set.fromList
  -- Official IANA schemes
  [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs"
  , "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin"
  , "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension"
  , "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs"
  , "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle"
  , "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed"
  , "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg"
  , "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham"
  , "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon"
  , "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6"
  , "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs"
  , "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap"
  , "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market"
  , "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access"
  , "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel"
  , "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath"
  , "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint"
  , "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller"
  , "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode"
  , "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular"
  , "ms-settings-cloudstorage", "ms-settings-connectabledevices"
  , "ms-settings-displays-topology", "ms-settings-emailandaccounts"
  , "ms-settings-language", "ms-settings-location", "ms-settings-lock"
  , "ms-settings-nfctransactions", "ms-settings-notifications"
  , "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity"
  , "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace"
  , "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad"
  , "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word"
  , "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs"
  , "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd"
  , "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop"
  , "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis"
  , "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp"
  , "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn"
  , "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews"
  , "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam"
  , "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid"
  , "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn"
  , "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi"
  , "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid"
  , "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire"
  , "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r"
  , "z39.50s"
  -- Unofficial schemes
  , "doi", "isbn", "javascript", "pmid"
  ]

-- | Check if the string is a valid URL with a IANA or frequently used but
-- unofficial scheme (see @schemes@).
isURI :: String -> Bool
isURI = maybe False hasKnownScheme . parseURI
  where
    hasKnownScheme = (`Set.member` schemes) . map toLower .
                     filter (/= ':') . uriScheme

---
--- Squash blocks into inlines
---

blockToInlines :: Block -> Inlines
blockToInlines (Plain ils) = B.fromList ils
blockToInlines (Para ils) = B.fromList ils
blockToInlines (LineBlock lns) = B.fromList $ combineLines lns
blockToInlines (CodeBlock attr str) = B.codeWith attr str
blockToInlines (RawBlock (Format fmt) str) = B.rawInline fmt str
blockToInlines (BlockQuote blks) = blocksToInlines' blks
blockToInlines (OrderedList _ blkslst) =
  mconcat $ map blocksToInlines' blkslst
blockToInlines (BulletList blkslst) =
  mconcat $ map blocksToInlines' blkslst
blockToInlines (DefinitionList pairslst) =
  mconcat $ map f pairslst
  where
    f (ils, blkslst) = B.fromList ils <> B.str ":" <> B.space <>
      mconcat (map blocksToInlines' blkslst)
blockToInlines (Header _ _  ils) = B.fromList ils
blockToInlines HorizontalRule = mempty
blockToInlines (Table _ _ _ headers rows) =
  mconcat $ intersperse B.linebreak $
    map (mconcat . map blocksToInlines') (headers:rows)
blockToInlines (Div _ blks) = blocksToInlines' blks
blockToInlines Null = mempty

blocksToInlinesWithSep :: Inlines -> [Block] -> Inlines
blocksToInlinesWithSep sep =
  mconcat . intersperse sep . map blockToInlines

blocksToInlines' :: [Block] -> Inlines
blocksToInlines' = blocksToInlinesWithSep defaultBlocksSeparator

blocksToInlines :: [Block] -> [Inline]
blocksToInlines = B.toList . blocksToInlines'

-- | Inline elements used to separate blocks when squashing blocks into
-- inlines.
defaultBlocksSeparator :: Inlines
defaultBlocksSeparator =
  -- This is used in the pandoc.utils.blocks_to_inlines function. Docs
  -- there should be updated if this is changed.
  B.space <> B.str "¶" <> B.space


--
-- Safe read
--

safeRead :: (MonadPlus m, Read a) => String -> m a
safeRead s = case reads s of
                  (d,x):_
                    | all isSpace x -> return d
                  _                 -> mzero

--
-- User data directory
--

-- | Return appropriate user data directory for platform.  We use
-- XDG_DATA_HOME (or its default value), but fall back to the
-- legacy user data directory ($HOME/.pandoc on *nix) if this is
-- missing.
defaultUserDataDirs :: IO [FilePath]
defaultUserDataDirs = E.catch (do
  xdgDir <- getXdgDirectory XdgData "pandoc"
  legacyDir <- getAppUserDataDirectory "pandoc"
  return $ ordNub [xdgDir, legacyDir])
 (\(_ :: E.SomeException) -> return [])
-												Use NoImplicitPrelude and explicitly import Prelude.

This seems to be necessary if we are to use our custom Prelude
with ghci.

Closes #4464.

											
										
										
											2018-03-18 10:46:28 -07:00
+								{-# LANGUAGE NoImplicitPrelude #-}
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								{-# LANGUAGE CPP                   #-}
 								{-# LANGUAGE DeriveDataTypeable    #-}
 								{-# LANGUAGE FlexibleContexts      #-}
 								{-# LANGUAGE MultiParamTypeClasses #-}
 								{-# LANGUAGE ScopedTypeVariables   #-}
 								{-# LANGUAGE ViewPatterns          #-}
-												Add Text.Pandoc.Shared.ToString typeclass (API change)

											
										
										
											2018-11-01 14:09:11 +03:00
+								{-# LANGUAGE FlexibleInstances  #-}
 								{-# LANGUAGE TypeSynonymInstances  #-}
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								{- |
 								   Module      : Text.Pandoc.Shared
-												Add missing copyright notices and remove license boilerplate (#5112)

Quite a few modules were missing copyright notices.

This commit adds copyright notices everywhere via haddock module
headers.  The old license boilerplate comment is redundant with this and has
been removed.

Update copyright years to 2019.

Closes #4592.

											
										
										
											2019-02-04 22:52:31 +01:00
+								   Copyright   : Copyright (C) 2006-2019 John MacFarlane
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								   License     : GNU GPL, version 2 or above
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								   Maintainer  : John MacFarlane <jgm@berkeley.edu>
 								   Stability   : alpha
 								   Portability : portable
 								Utility functions and definitions used by the various Pandoc modules.
 								-}
-												Shared: enamed stringToURI -> escapeURI.

											
										
										
											2010-03-23 15:05:33 -07:00
+								module Text.Pandoc.Shared (
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     -- * List processing
 								                     splitBy,
 								                     splitByIndices,
-												Shared: Added splitStringWithIndices.

This is like splitWithIndices, but it is sensitive to distinctions
between wide, combining, and regular characters.

											
										
										
											2012-01-27 00:37:46 -08:00
+								                     splitStringByIndices,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     substitute,
-												Shared:  Added ordNub.

API change (adds export).

											
										
										
											2014-06-03 11:00:54 -07:00
+								                     ordNub,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     -- * Text processing
-												Add Text.Pandoc.Shared.ToString typeclass (API change)

											
										
										
											2018-11-01 14:09:11 +03:00
+								                     ToString (..),
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     backslashEscapes,
 								                     escapeStringUsing,
 								                     stripTrailingNewlines,
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								                     trim,
 								                     triml,
 								                     trimr,
-												LaTeX reader:  allow space at end of math after `\`.

Closes #5010.

Expose trimMath from T.P.Shared.

											
										
										
											2018-10-29 22:20:14 -07:00
+								                     trimMath,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     stripFirstAndLast,
 								                     camelCaseToHyphenated,
 								                     toRomanNumeral,
-												Shared: enamed stringToURI -> escapeURI.

											
										
										
											2010-03-23 15:05:33 -07:00
+								                     escapeURI,
-												Changed order of functions in Shared.

											
										
										
											2010-07-06 23:17:06 -07:00
+								                     tabFilter,
-												Move CR filtering from tabFilter to the readers.

The readers previously assumed that CRs had been filtered
from the input.  Now we strip the CRs in the readers themselves,
before parsing.  (The point of this is just to simplify the
parsers.)

Shared now exports a new function `crFilter`. [API change]
And `tabFilter` no longer filters CRs.

											
										
										
											2017-06-20 21:52:13 +02:00
+								                     crFilter,
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
+								                     -- * Date/time
 								                     normalizeDate,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     -- * Pandoc block and inline list processing
 								                     orderedListMarkers,
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								                     extractSpaces,
-												Shared:  Added removeFormatting.

API change (addition of exported function).

											
										
										
											2014-07-13 10:13:22 -07:00
+								                     removeFormatting,
-												Removed writerIgnoreNotes.

Instead, just temporarily remove notes when generating
TOC lists in HTML and Markdown (as we already did in LaTeX).

Also export deNote from Text.Pandoc.Shared.

API change in Shared and Options.WriterOptions.

											
										
										
											2017-01-15 22:34:14 +01:00
+								                     deNote,
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
+								                     stringify,
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								                     capitalize,
-												Shared: rename compactify', compactify'DL -> compactify, compactifyDL.

											
										
										
											2017-01-27 21:36:45 +01:00
+								                     compactify,
 								                     compactifyDL,
-												Shared: add function combining lines using LineBreak

The `linesToBlock` function takes a list of lines and combines them by appending
a hard `LineBreak` to each line and concatenating the result, putting the result
it into a `Para`. This is most useful when dealing when converting `LineBlock`
elements.

											
										
										
											2016-10-13 08:46:38 +02:00
+								                     linesToPara,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     Element (..),
 								                     hierarchicalize,
-												Shared: Export uniqueIdent, don't allow tilde in identifier.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1894 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2010-03-16 06:45:52 +00:00
+								                     uniqueIdent,
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
+								                     inlineListToIdentifier,
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     isHeaderBlock,
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
+								                     headerShift,
-												Add --strip-empty-paragraphs option.

This works for any input format.

											
										
										
											2017-12-02 15:21:59 -08:00
+								                     stripEmptyParagraphs,
-												consolidate simple-table detection (#5524)

add `onlySimpleTableCells` to `Text.Pandoc.Shared`

[API change]

This fixes an inconsistency in the HTML reader, which did not treat tables with `<p>` inside cells as simple.

											
										
										
											2019-05-27 19:53:19 +02:00
+								                     onlySimpleTableCells,
-												Shared: export isTightList.

											
										
										
											2013-01-07 20:12:05 -08:00
+								                     isTightList,
-												Implement task lists (#5139)

Closes #3051

											
										
										
											2019-01-02 20:36:37 +01:00
+								                     taskListItemFromAscii,
 								                     taskListItemToAscii,
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								                     addMetaField,
 								                     makeMeta,
-												Added eastAsianLineBreakFilter to Shared.

This used to live in the Markdown reader.

											
										
										
											2017-05-30 10:22:48 +02:00
+								                     eastAsianLineBreakFilter,
-												Consistent underline for Readers (#2270)

* Added underlineSpan builder function.  This can be easily updated if needed. The purpose is for Readers to transform underlines consistently.

* Docx Reader: Use underlineSpan and update test

* Org Reader: Use underlineSpan and add test

* Textile Reader: Use underlineSpan and add test case

* Txt2Tags Reader: Use underlineSpan and update test

* HTML Reader: Use underlineSpan and add test case

											
										
										
											2017-10-27 18:45:00 -04:00
+								                     underlineSpan,
-												Shared: new export `splitSentences` [API change].

This was duplicated in the Man and Ms writers, and really
belongs in Shared.

											
										
										
											2018-10-01 22:47:01 -07:00
+								                     splitSentences,
-												Shared: add filterIpynbOutput. [API change]

Add command line option `--ipynb-output=all|none|best`.

Closes #5339.

											
										
										
											2019-02-28 20:28:16 -08:00
+								                     filterIpynbOutput,
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
+								                     -- * TagSoup HTML handling
 								                     renderTags',
-												Added 'odt' output option to pandoc:
Not a writer, but a module that inserts the output of the OpenDocument
writer into an ODT archive.  This replaces markdown2odt.

+ Added odt output option to Main.hs.
+ Added default for .odt output file.
+ Changed defaults so that .xml and .sgml aren't automatically DocBook.
+ Added odt writer to Text.Pandoc exports.
+ Added Text.Pandoc.ODT and included in pandoc.cabal.
+ Added reference.odt as data-file in pandoc.cabal.
+ Handle picture links in OpenDocument files using xml library.
+ Removed markdown2odt and references from Makefile, README, man.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1345 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-07-31 23:16:02 +00:00
+								                     -- * File handling
-												Removed TH module; refactored LaTeXMathML not to use TH.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1692 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-12-31 01:11:23 +00:00
+								                     inDirectory,
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								                     collapseFilePath,
-												Shared: add uriPathToPath.

This adjusts the path from a file: URI in a way that is sensitive
to Windows/Linux differences.  Thus, on Windows,
`/c:/foo` gets interpreted as `c:/foo`, but on Linux,
`/c:/foo` gets interpreted as `/c:/foo`.

See #4613.

											
										
										
											2018-05-08 09:54:19 -07:00
+								                     uriPathToPath,
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
+								                     filteredFilesFromArchive,
-												Shared: Provide custom isURI that rejects unknown schemes [isURI]

We also export the set of known `schemes`.

The new function replaces the function of the same name
from `Network.URI`, as the latter did not check whether a scheme is
well-known.  E.g. MediaWiki wikis frequently feature pages with names
like `User:John`. These links were interpreted as URIs, thus turning
internal links into global links. This is prevented by also checking
whether the scheme of a URI is frequently used (i.e. is IANA registered
or an otherwise well-known scheme).

Fixes: #2713

Update set of well-known URIs from IANA list
All official IANA schemes (as of 2017-05-22) are included in the set of
known schemes.  The four non-official schemes doi, isbn, javascript, and
pmid are kept.

											
										
										
											2017-05-23 09:48:11 +02:00
+								                     -- * URI handling
 								                     schemes,
 								                     isURI,
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
+								                     -- * Error handling
-												Move utility error functions to Text.Pandoc.Shared

											
										
										
											2015-02-18 21:05:47 +00:00
+								                     mapLeft,
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								                     -- * for squashing blocks
 								                     blocksToInlines,
-												API change: export blocksToInlines' from Text.Pandoc.Shared

											
										
										
											2017-12-22 12:26:06 +01:00
+								                     blocksToInlines',
-												Lua Utils module: add function blocks_to_inlines (#4799)

Exposes a function converting which flattenes a list of blocks into a
list of inlines. An example use case would be the conversion of Note
elements into other inlines.
											
										
										
											2018-07-30 19:55:25 +02:00
+								                     blocksToInlinesWithSep,
 								                     defaultBlocksSeparator,
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								                     -- * Safe read
-												Moved withTempDir from PDF to Shared, export from Shared.

API change.

											
										
										
											2014-07-30 12:29:04 -07:00
+								                     safeRead,
-												Add new exported function defaultUserDataDirs

											
										
										
											2019-03-02 15:03:51 -08:00
+								                     -- * User data directory
 								                     defaultUserDataDirs,
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								                     -- * Version
 								                     pandocVersion
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                    ) where
-												Use NoImplicitPrelude and explicitly import Prelude.

This seems to be necessary if we are to use our custom Prelude
with ghci.

Closes #4464.

											
										
										
											2018-03-18 10:46:28 -07:00
+								import Prelude
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								import Codec.Archive.Zip
 								import qualified Control.Exception as E
 								import Control.Monad (MonadPlus (..), msum, unless)
 								import qualified Control.Monad.State.Strict as S
 								import qualified Data.ByteString.Lazy as BL
-												Reimplement mapLeft using Bifunctor.first

											
										
										
											2018-10-10 01:26:50 +03:00
+								import qualified Data.Bifunctor as Bifunctor
-												Exactly match GitHub's identifier generating algorithm.

See #5057.

											
										
										
											2018-11-11 20:45:38 -08:00
+								import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum,
 								                  generalCategory, GeneralCategory(NonSpacingMark,
 								                  SpacingCombiningMark, EnclosingMark, ConnectorPunctuation))
-												Don't rely on syb when we don't need to.

											
										
										
											2017-10-27 21:44:22 -07:00
+								import Data.Data (Data, Typeable)
-												filterIpynbOutput - go back to just including one block per format.

In the end we need a 1-1 map of mime types to output blocks.

											
										
										
											2019-03-06 11:09:15 -08:00
+								import Data.List (find, intercalate, intersperse, stripPrefix, sortBy)
 								import Data.Ord (comparing)
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								import qualified Data.Map as M
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								import Data.Maybe (mapMaybe)
-												consolidate simple-table detection (#5524)

add `onlySimpleTableCells` to `Text.Pandoc.Shared`

[API change]

This fixes an inconsistency in the HTML reader, which did not treat tables with `<p>` inside cells as simple.

											
										
										
											2019-05-27 19:53:19 +02:00
+								import Data.Monoid (Any (..))
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr)
-												Shared:  Added ordNub.

API change (adds export).

											
										
										
											2014-06-03 11:00:54 -07:00
+								import qualified Data.Set as Set
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								import qualified Data.Text as T
 								import Data.Version (showVersion)
 								import Network.URI (URI (uriScheme), escapeURIString, parseURI)
 								import Paths_pandoc (version)
-												Added 'odt' output option to pandoc:
Not a writer, but a module that inserts the output of the OpenDocument
writer into an ODT archive.  This replaces markdown2odt.

+ Added odt output option to Main.hs.
+ Added default for .odt output file.
+ Changed defaults so that .xml and .sgml aren't automatically DocBook.
+ Added odt writer to Text.Pandoc exports.
+ Added Text.Pandoc.ODT and included in pandoc.cabal.
+ Added reference.odt as data-file in pandoc.cabal.
+ Handle picture links in OpenDocument files using xml library.
+ Removed markdown2odt and references from Makefile, README, man.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1345 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-07-31 23:16:02 +00:00
+								import System.Directory
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								import System.FilePath (isPathSeparator, splitDirectories)
-												MediaBag:  ensure that / is always used as path separator.

											
										
										
											2015-09-26 22:40:58 -07:00
+								import qualified System.FilePath.Posix as Posix
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								import Text.HTML.TagSoup (RenderOptions (..), Tag (..), renderOptions,
 								                          renderTagsOptions)
 								import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue (..))
 								import qualified Text.Pandoc.Builder as B
-												Removed old-locale flag and Text.Pandoc.Compat.Time.

This is no longer necessary since we no longer support ghc 7.8.

											
										
										
											2018-03-18 11:24:29 -07:00
+								import Data.Time
-												Text.Pandoc.Shared: add parameter to uniqueIdent, inlineListToIdentifier.

The parameter is Extensions. This allows these functions to
be sensitive to the settings of `Ext_gfm_auto_identifiers` and
`Ext_ascii_identifiers`.

This allows us to use `uniqueIdent` in the CommonMark reader,
replacing some custom code.

It also means that `gfm_auto_identifiers` can now be used
in all formats.

Semantically, `gfm_auto_identifiers` is now a modifier of
`auto_identifiers`; for identifiers to be set, `auto_identifiers`
must be turned on, and then the type of identifier produced
depends on `gfm_auto_identifiers` and `ascii_identifiers` are set.

Closes #5057.

											
										
										
											2018-11-11 13:27:25 -08:00
+								import Text.Pandoc.Asciify (toAsciiChar)
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								import Text.Pandoc.Definition
-												Text.Pandoc.Shared: add parameter to uniqueIdent, inlineListToIdentifier.

The parameter is Extensions. This allows these functions to
be sensitive to the settings of `Ext_gfm_auto_identifiers` and
`Ext_ascii_identifiers`.

This allows us to use `uniqueIdent` in the CommonMark reader,
replacing some custom code.

It also means that `gfm_auto_identifiers` can now be used
in all formats.

Semantically, `gfm_auto_identifiers` is now a modifier of
`auto_identifiers`; for identifiers to be set, `auto_identifiers`
must be turned on, and then the type of identifier produced
depends on `gfm_auto_identifiers` and `ascii_identifiers` are set.

Closes #5057.

											
										
										
											2018-11-11 13:27:25 -08:00
+								import Text.Pandoc.Extensions (Extensions, Extension(..), extensionEnabled)
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								import Text.Pandoc.Generic (bottomUp)
-												Use new doctemplates, doclayout.

+ Remove Text.Pandoc.Pretty; use doclayout instead. [API change]
+ Text.Pandoc.Writers.Shared: remove metaToJSON, metaToJSON'
  [API change].
+ Text.Pandoc.Writers.Shared: modify `addVariablesToContext`,
  `defField`, `setField`, `getField`, `resetField` to work with
  Context rather than JSON values. [API change]
+ Text.Pandoc.Writers.Shared: export new function `endsWithPlain` [API
  change].
+ Use new templates and doclayout in writers.
+ Use Doc-based templates in all writers.
+ Adjust three tests for minor template rendering differences.
+ Added indentation to body in docbook4, docbook5 templates.

The main impact of this change is better reflowing of content
interpolated into templates.  Previously, interpolated variables
were rendered independently and intepolated as strings, which could lead
to overly long lines.  Now the templates interpolated as Doc values
which may include breaking spaces, and reflowing occurs
after template interpolation rather than before.

											
										
										
											2019-08-14 22:11:05 -07:00
+								import Text.DocLayout (charWidth)
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								import Text.Pandoc.Walk
-												fix build failure with --flags=-https

The issue was originally reported by CasperVector as
    https://github.com/gentoo-haskell/gentoo-haskell/issues/427

Mainfests itself as a builg failure full of missing zip-archive
names:

    src/Text/Pandoc/Shared.hs:756:49:
        Not in scope: type constructor or class ‘Archive’
    src/Text/Pandoc/Shared.hs:777:38: Not in scope: ‘toEntry’
    src/Text/Pandoc/Shared.hs:786:19:
        Not in scope: ‘toArchive’
        Perhaps you meant ‘mbArchive’ (line 778)

Included Codec.Archive.Zip unconditionally.

Signed-off-by: Sergei Trofimovich <siarheit@google.com>

											
										
										
											2015-07-30 22:39:25 +01:00
-												Move the variable pandocVersion from `src/Text/Pandoc.hs` to
`src/Text/Pandoc/Shared.hs`, so that all Writers can access this variable
without importing `src/Text/Pandoc.hs`, preventing circular import.

* pandoc.hs: Import pandocVersion from `Text.Pandoc.Shared`.
* src/Text/Pandoc.hs: Remove the definition of pandocVersion
 and relevant import.
* src/Text/Pandoc/Shared.hs: Add the definition of pandocVersion
 and relevant import.

											
										
										
											2015-09-25 03:54:41 +08:00
+								-- | Version number of pandoc library.
 								pandocVersion :: String
 								pandocVersion = showVersion version
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								--
 								-- List processing
 								--
 								-- | Split list by groups of one or more sep.
-												Shared: Made splitBy take a test instead of an element.

											
										
										
											2010-12-21 08:41:24 -08:00
+								splitBy :: (a -> Bool) -> [a] -> [[a]]
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								splitBy _ [] = []
-												Shared: Made splitBy take a test instead of an element.

											
										
										
											2010-12-21 08:41:24 -08:00
+								splitBy isSep lst =
 								  let (first, rest) = break isSep lst
 								      rest'         = dropWhile isSep rest
-												hlint suggestions.

											
										
										
											2017-10-27 23:13:55 -07:00
+								  in  first:splitBy isSep rest'
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								splitByIndices :: [Int] -> [a] -> [[a]]
 								splitByIndices [] lst = [lst]
-												hlint suggestions.

											
										
										
											2017-10-27 23:13:55 -07:00
+								splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x)  xs) rest
-												Shared: Added splitStringWithIndices.

This is like splitWithIndices, but it is sensitive to distinctions
between wide, combining, and regular characters.

											
										
										
											2012-01-27 00:37:46 -08:00
+								  where (first, rest) = splitAt x lst
 								-- | Split string into chunks divided at specified indices.
 								splitStringByIndices :: [Int] -> [Char] -> [[Char]]
 								splitStringByIndices [] lst = [lst]
 								splitStringByIndices (x:xs) lst =
 								  let (first, rest) = splitAt' x lst in
-												hlint suggestions.

											
										
										
											2017-10-27 23:13:55 -07:00
+								  first : splitStringByIndices (map (\y -> y - x) xs) rest
-												Shared: Added splitStringWithIndices.

This is like splitWithIndices, but it is sensitive to distinctions
between wide, combining, and regular characters.

											
										
										
											2012-01-27 00:37:46 -08:00
 								splitAt' :: Int -> [Char] -> ([Char],[Char])
 								splitAt' _ []          = ([],[])
 								splitAt' n xs | n <= 0 = ([],xs)
 								splitAt' n (x:xs)      = (x:ys,zs)
 								  where (ys,zs) = splitAt' (n - charWidth x) xs
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Replace each occurrence of one sublist in a list with another.
 								substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
 								substitute _ _ [] = []
-												Slight code cleanup on substitute function.

											
										
										
											2010-07-11 12:22:18 -07:00
+								substitute [] _ xs = xs
 								substitute target replacement lst@(x:xs) =
-												Use `stripPrefix` where appropriate.

											
										
										
											2014-08-03 14:44:39 +04:00
+								    case stripPrefix target lst of
 								      Just lst' -> replacement ++ substitute target replacement lst'
 								      Nothing   -> x : substitute target replacement xs
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
-												Shared:  Added ordNub.

API change (adds export).

											
										
										
											2014-06-03 11:00:54 -07:00
+								ordNub :: (Ord a) => [a] -> [a]
 								ordNub l = go Set.empty l
 								  where
 								    go _ [] = []
 								    go s (x:xs) = if x `Set.member` s then go s xs
 								                                      else x : go (Set.insert x s) xs
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								--
 								-- Text processing
 								--
-												Add Text.Pandoc.Shared.ToString typeclass (API change)

											
										
										
											2018-11-01 14:09:11 +03:00
+								class ToString a where
 								  toString :: a -> String
 								instance ToString String where
 								  toString = id
 								instance ToString T.Text where
 								  toString = T.unpack
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								-- | Returns an association list of backslash escapes for the
 								-- designated characters.
 								backslashEscapes :: [Char]    -- ^ list of special characters to escape
 								                 -> [(Char, String)]
 								backslashEscapes = map (\ch -> (ch, ['\\',ch]))
 								-- | Escape a string of characters, using an association list of
 								-- characters and strings.
 								escapeStringUsing :: [(Char, String)] -> String -> String
 								escapeStringUsing _ [] = ""
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								escapeStringUsing escapeTable (x:xs) =
-												hlint suggestions.

											
										
										
											2017-10-27 23:13:55 -07:00
+								  case lookup x escapeTable of
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								       Just str -> str ++ rest
 								       Nothing  -> x:rest
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								  where rest = escapeStringUsing escapeTable xs
 								-- | Strip trailing newlines from string.
 								stripTrailingNewlines :: String -> String
 								stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
 								-- | Remove leading and trailing space (including newlines) from string.
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								trim :: String -> String
 								trim = triml . trimr
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Remove leading space (including newlines) from string.
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								triml :: String -> String
-												Revert "Use -XNoImplicitPrelude and 'import Prelude' explicitly."

This reverts commit c423dbb5a34c2d1195020e0f0ca3aae883d0749b.

											
										
										
											2015-11-09 10:08:22 -08:00
+								triml = dropWhile (`elem` " \r\n\t")
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Remove trailing space (including newlines) from string.
-												Renamed removedLeadingTrailingSpace to trim.

Also removeLeadingSpace to triml,
removeTrailingSpace to trimr.

											
										
										
											2012-09-29 17:09:34 -04:00
+								trimr :: String -> String
 								trimr = reverse . triml . reverse
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
-												LaTeX reader:  allow space at end of math after `\`.

Closes #5010.

Expose trimMath from T.P.Shared.

											
										
										
											2018-10-29 22:20:14 -07:00
+								-- | Trim leading space and trailing space unless after \.
 								trimMath :: String -> String
 								trimMath = triml . reverse . stripspace . reverse
 								  where
 								  stripspace (c1:c2:cs)
 								    | c1  `elem` [' ','\t','\n','\r']
 								    , c2 /= '\\' = stripspace (c2:cs)
 								  stripspace cs = cs
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								-- | Strip leading and trailing characters from string
 								stripFirstAndLast :: String -> String
 								stripFirstAndLast str =
-												hlint suggestions.

											
										
										
											2017-10-27 23:13:55 -07:00
+								  drop 1 $ take (length str - 1) str
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								camelCaseToHyphenated :: String -> String
 								camelCaseToHyphenated [] = ""
 								camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
-												hlint suggestions.

											
										
										
											2017-10-27 23:13:55 -07:00
+								  a:'-':toLower b:camelCaseToHyphenated rest
 								camelCaseToHyphenated (a:rest) = toLower a:camelCaseToHyphenated rest
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Convert number < 4000 to uppercase roman numeral.
 								toRomanNumeral :: Int -> String
-												Simplify toRomanNumeral using guards (#3445)


											
										
										
											2017-02-15 02:00:23 +04:00
+								toRomanNumeral x
 								  | x >= 4000 || x < 0 = "?"
 								  | x >= 1000 = "M" ++ toRomanNumeral (x - 1000)
 								  | x >= 900  = "CM" ++ toRomanNumeral (x - 900)
 								  | x >= 500  = "D" ++ toRomanNumeral (x - 500)
 								  | x >= 400  = "CD" ++ toRomanNumeral (x - 400)
 								  | x >= 100  = "C" ++ toRomanNumeral (x - 100)
 								  | x >= 90   = "XC" ++ toRomanNumeral (x - 90)
 								  | x >= 50   = "L"  ++ toRomanNumeral (x - 50)
 								  | x >= 40   = "XL" ++ toRomanNumeral (x - 40)
 								  | x >= 10   = "X" ++ toRomanNumeral (x - 10)
 								  | x == 9    = "IX"
 								  | x >= 5    = "V" ++ toRomanNumeral (x - 5)
 								  | x == 4    = "IV"
 								  | x >= 1    = "I" ++ toRomanNumeral (x - 1)
 								  | otherwise = ""
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
-												Percent-encode more special characters in URLs.

HTML, LaTeX writers adjusted.
The special characters are '<','>','|','"','{','}','[',']','^', '`'.

Closes #1640, #2377.

											
										
										
											2015-10-11 17:06:26 -07:00
+								-- | Escape whitespace and some punctuation characters in URI.
-												Shared: enamed stringToURI -> escapeURI.

											
										
										
											2010-03-23 15:05:33 -07:00
+								escapeURI :: String -> String
-												Percent-encode more special characters in URLs.

HTML, LaTeX writers adjusted.
The special characters are '<','>','|','"','{','}','[',']','^', '`'.

Closes #1640, #2377.

											
										
										
											2015-10-11 17:06:26 -07:00
+								escapeURI = escapeURIString (not . needsEscaping)
 								  where needsEscaping c = isSpace c || c `elem`
 								                           ['<','>','|','"','{','}','[',']','^', '`']
-												Move CR filtering from tabFilter to the readers.

The readers previously assumed that CRs had been filtered
from the input.  Now we strip the CRs in the readers themselves,
before parsing.  (The point of this is just to simplify the
parsers.)

Shared now exports a new function `crFilter`. [API change]
And `tabFilter` no longer filters CRs.

											
										
										
											2017-06-20 21:52:13 +02:00
+								-- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0.
-												Changed order of functions in Shared.

											
										
										
											2010-07-06 23:17:06 -07:00
+								tabFilter :: Int       -- ^ Tab stop
-												Rewrote convertTabs to use Text not String.

											
										
										
											2017-06-10 15:22:25 +02:00
+								          -> T.Text    -- ^ Input
 								          -> T.Text
-												Move CR filtering from tabFilter to the readers.

The readers previously assumed that CRs had been filtered
from the input.  Now we strip the CRs in the readers themselves,
before parsing.  (The point of this is just to simplify the
parsers.)

Shared now exports a new function `crFilter`. [API change]
And `tabFilter` no longer filters CRs.

											
										
										
											2017-06-20 21:52:13 +02:00
+								tabFilter 0 = id
 								tabFilter tabStop = T.unlines . map go . T.lines
-												Rewrote convertTabs to use Text not String.

											
										
										
											2017-06-10 15:22:25 +02:00
+								  where go s =
 								         let (s1, s2) = T.break (== '\t') s
 								         in  if T.null s2
 								                then s1
 								                else s1 <> T.replicate
 								                       (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
 								                       <> go (T.drop 1 s2)
-												Changed order of functions in Shared.

											
										
										
											2010-07-06 23:17:06 -07:00
-												Move CR filtering from tabFilter to the readers.

The readers previously assumed that CRs had been filtered
from the input.  Now we strip the CRs in the readers themselves,
before parsing.  (The point of this is just to simplify the
parsers.)

Shared now exports a new function `crFilter`. [API change]
And `tabFilter` no longer filters CRs.

											
										
										
											2017-06-20 21:52:13 +02:00
+								-- | Strip out DOS line endings.
 								crFilter :: T.Text -> T.Text
 								crFilter = T.filter (/= '\r')
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
+								--
 								-- Date/time
 								--
-												Shared: normalizeDate should reject illegal years.

We only allow years between 1601 and 9999, inclusive. The ISO 8601
actually says that years are supposed to start with 1583, but MS Word
only allows 1601-9999. This should stop corrupted word files if the date
is out of that range, or is parsed incorrectly.

											
										
										
											2016-07-09 15:37:47 -04:00
+								-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We
 								-- limit years to the range 1601-9999 (ISO 8601 accepts greater than
-												Shared: improve year sanity check in normalizeDate

Previously we parsed a list of dates, took the first one, and then
tested its year range. That meant that if the first one failed, we
returned nothing, regardless of what the others did. Now we test for
sanity before running `msum` over the list of Maybe values. Anything
failing the test will be Nothing, so will not be a candidate.

											
										
										
											2016-07-09 17:03:39 -04:00
+								-- or equal to 1583, but MS Word only accepts dates starting 1601).
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
+								normalizeDate :: String -> Maybe String
-												Shared: improve year sanity check in normalizeDate

Previously we parsed a list of dates, took the first one, and then
tested its year range. That meant that if the first one failed, we
returned nothing, regardless of what the others did. Now we test for
sanity before running `msum` over the list of Maybe values. Anything
failing the test will be Nothing, so will not be a candidate.

											
										
										
											2016-07-09 17:03:39 -04:00
+								normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
 								  (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
 								  where rejectBadYear day = case toGregorian day of
 								          (y, _, _) | y >= 1601 && y <= 9999 -> Just day
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								          _         -> Nothing
-												Removed old-locale flag and Text.Pandoc.Compat.Time.

This is no longer necessary since we no longer support ghc 7.8.

											
										
										
											2018-03-18 11:24:29 -07:00
+								        parsetimeWith = parseTimeM True defaultTimeLocale
-												Shared: normalizeDate should reject illegal years.

We only allow years between 1601 and 9999, inclusive. The ISO 8601
actually says that years are supposed to start with 1583, but MS Word
only allows 1601-9999. This should stop corrupted word files if the date
is out of that range, or is parsed incorrectly.

											
										
										
											2016-07-09 15:37:47 -04:00
+								        formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
-												make normalizeDate more forgiving (#4101)

also parse two-digit days, e.g. "April 20, 2017"
											
										
										
											2017-11-28 19:15:35 +01:00
+								                    "%e %B %Y", "%b. %e, %Y", "%B %e, %Y",
-												Shared: Add further formats for `normalizeDate`

We want to avoid illegal dates -- in particular years with greater than
four digits. We attempt to parse series of digits first as `%Y%m%d`, then
`%Y%m`, and finally `%Y`.

											
										
										
											2016-07-09 11:13:25 -04:00
+								                    "%Y%m%d", "%Y%m", "%Y"]
-												Put date in YYYY-MM-DD format if possible for HTML, docx metadata.

Added normalizeDate to Text.Pandoc.Shared.

											
										
										
											2012-01-28 15:54:05 -08:00
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								--
 								-- Pandoc block and inline list processing
 								--
 								-- | Generate infinite lazy list of markers for an ordered list,
 								-- depending on list attributes.
 								orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								orderedListMarkers (start, numstyle, numdelim) =
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								  let singleton c = [c]
 								      nums = case numstyle of
 								                     DefaultStyle -> map show [start..]
-												Merge branch 'atlists'.  Added auto-numbered example lists.

											
										
										
											2010-07-11 22:47:52 -07:00
+								                     Example      -> map show [start..]
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                     Decimal      -> map show [start..]
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								                     UpperAlpha   -> drop (start - 1) $ cycle $
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
+								                                     map singleton ['A'..'Z']
 								                     LowerAlpha   -> drop (start - 1) $ cycle $
 								                                     map singleton ['a'..'z']
 								                     UpperRoman   -> map toRomanNumeral [start..]
 								                     LowerRoman   -> map (map toLower . toRomanNumeral) [start..]
 								      inDelim str = case numdelim of
 								                            DefaultDelim -> str ++ "."
 								                            Period       -> str ++ "."
 								                            OneParen     -> str ++ ")"
 								                            TwoParens    -> "(" ++ str ++ ")"
 								  in  map inDelim nums
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								-- | Extract the leading and trailing spaces from inside an inline element
-												Implemented SoftBreak and new `--wrap` option.

Added threefold wrapping option.

* Command line option: deprecated `--no-wrap`, added
  `--wrap=[auto|none|preserve]`
* Added WrapOption, exported from Text.Pandoc.Options
* Changed type of writerWrapText in WriterOptions from
  Bool to WrapOption.
* Modified Text.Pandoc.Shared functions for SoftBreak.
* Supported SoftBreak in writers.
* Updated tests.
* Updated README.

Closes #1701.

											
										
										
											2015-12-11 15:58:11 -08:00
+								-- and place them outside the element.  SoftBreaks count as Spaces for
 								-- these purposes.
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
-												Removed space at ends of lines in source.

											
										
										
											2014-07-12 22:57:22 -07:00
+								extractSpaces f is =
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								  let contents = B.unMany is
 								      left  = case viewl contents of
-												Implemented SoftBreak and new `--wrap` option.

Added threefold wrapping option.

* Command line option: deprecated `--no-wrap`, added
  `--wrap=[auto|none|preserve]`
* Added WrapOption, exported from Text.Pandoc.Options
* Changed type of writerWrapText in WriterOptions from
  Bool to WrapOption.
* Modified Text.Pandoc.Shared functions for SoftBreak.
* Supported SoftBreak in writers.
* Updated tests.
* Updated README.

Closes #1701.

											
										
										
											2015-12-11 15:58:11 -08:00
+								                    (Space :< _)     -> B.space
 								                    (SoftBreak :< _) -> B.softbreak
 								                    _                -> mempty
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								      right = case viewr contents of
-												Implemented SoftBreak and new `--wrap` option.

Added threefold wrapping option.

* Command line option: deprecated `--no-wrap`, added
  `--wrap=[auto|none|preserve]`
* Added WrapOption, exported from Text.Pandoc.Options
* Changed type of writerWrapText in WriterOptions from
  Bool to WrapOption.
* Modified Text.Pandoc.Shared functions for SoftBreak.
* Supported SoftBreak in writers.
* Updated tests.
* Updated README.

Closes #1701.

											
										
										
											2015-12-11 15:58:11 -08:00
+								                    (_ :> Space)     -> B.space
 								                    (_ :> SoftBreak) -> B.softbreak
 								                    _                -> mempty in
-												Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.

											
										
										
											2014-06-16 20:45:54 +01:00
+								  (left <> f (B.trimInlines . B.Many $ contents) <> right)
-												Better comment on removeFormatting.

											
										
										
											2014-07-13 15:10:27 -07:00
+								-- | Extract inlines, removing formatting.
-												Shared: Generalized type of removeFormatting.

											
										
										
											2014-07-13 14:56:20 -07:00
+								removeFormatting :: Walkable Inline a => a -> [Inline]
-												Shared.stringify, removeFormatting: handle Quoted better.

Previously we were losing the qutation marks in Quoted
elements.  See #3958.

											
										
										
											2017-10-08 21:55:57 -07:00
+								removeFormatting = query go . walk (deNote . deQuote)
-												Shared:  Added removeFormatting.

API change (addition of exported function).

											
										
										
											2014-07-13 10:13:22 -07:00
+								  where go :: Inline -> [Inline]
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								        go (Str xs)   = [Str xs]
 								        go Space      = [Space]
 								        go SoftBreak  = [SoftBreak]
 								        go (Code _ x) = [Str x]
 								        go (Math _ x) = [Str x]
 								        go LineBreak  = [Space]
 								        go _          = []
-												Factored out deNote in Shared.

											
										
										
											2017-01-15 22:15:35 +01:00
 								deNote :: Inline -> Inline
 								deNote (Note _) = Str ""
 								deNote x        = x
-												Shared:  Added removeFormatting.

API change (addition of exported function).

											
										
										
											2014-07-13 10:13:22 -07:00
-												Shared.stringify, removeFormatting: handle Quoted better.

Previously we were losing the qutation marks in Quoted
elements.  See #3958.

											
										
										
											2017-10-08 21:55:57 -07:00
+								deQuote :: Inline -> Inline
 								deQuote (Quoted SingleQuote xs) =
 								  Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"])
 								deQuote (Quoted DoubleQuote xs) =
 								  Span ("",[],[]) (Str "\8220" : xs ++ [Str "\8221"])
 								deQuote x = x
-												Generalized type of stringify.

											
										
										
											2013-08-28 08:43:51 -07:00
+								-- | Convert pandoc structure to a string with formatting removed.
-												Shared:  Changed stringify so it ignores notes.

Also documented this in README.

											
										
										
											2013-08-16 13:22:27 -07:00
+								-- Footnotes are skipped (since we don't want their contents in link
 								-- labels).
-												Generalized type of stringify.

											
										
										
											2013-08-28 08:43:51 -07:00
+								stringify :: Walkable Inline a => a -> String
-												Shared.stringify, removeFormatting: handle Quoted better.

Previously we were losing the qutation marks in Quoted
elements.  See #3958.

											
										
										
											2017-10-08 21:55:57 -07:00
+								stringify = query go . walk (deNote . deQuote)
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
+								  where go :: Inline -> [Char]
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								        go Space                                       = " "
 								        go SoftBreak                                   = " "
 								        go (Str x)                                     = x
 								        go (Code _ x)                                  = x
 								        go (Math _ x)                                  = x
-												EPUB TOC: replace literal "<br/>" with space.

Closes #2105.

											
										
										
											2015-08-10 16:58:47 -07:00
+								        go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								        go LineBreak                                   = " "
 								        go _                                           = ""
-												Added 'stringify' to Text.Pandoc.Shared.

											
										
										
											2010-11-27 07:08:06 -08:00
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								-- | Bring all regular text in a pandoc structure to uppercase.
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								--
-												Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.

											
										
										
											2014-08-03 16:48:55 +04:00
+								-- This function correctly handles cases where a lowercase character doesn't
 								-- match to a single uppercase character – e.g. “Straße” would be converted
 								-- to “STRASSE”, not “STRAßE”.
 								capitalize :: Walkable Inline a => a -> a
 								capitalize = walk go
 								  where go :: Inline -> Inline
 								        go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
 								        go x       = x
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
+								-- | Change final list item from @Para@ to @Plain@ if the list contains
-												Improve tight/loose list handling.

Closes #5285. Previously the algorithm allowed list items
with a mix of Para and Plain, which is never wanted.

compactify in T.P.Shared has been modified so that, if
a list's items contain (at the top level) Para elements
(aside from perhaps at the very end), ALL Plains are
converted to Paras.

											
										
										
											2019-02-08 23:16:01 -08:00
+								-- no other @Para@ blocks.  Otherwise (if the list items contain @Para@
 								-- blocks besides possibly at the end), turn any @Plain@s into @Para@s (#5285).
-												Shared: rename compactify', compactify'DL -> compactify, compactifyDL.

											
										
										
											2017-01-27 21:36:45 +01:00
+								compactify :: [Blocks]  -- ^ List of list items (each a list of blocks)
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
+								           -> [Blocks]
-												Shared: rename compactify', compactify'DL -> compactify, compactifyDL.

											
										
										
											2017-01-27 21:36:45 +01:00
+								compactify [] = []
 								compactify items =
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
+								  let (others, final) = (init items, last items)
 								  in  case reverse (B.toList final) of
-												Shared.compactify: Avoid mixed lists.

This improves on the original fix to #5285 by preventing
other mixed lists (lists with a mix of Plain and Para
elements) that were allowed given the original fix.

											
										
										
											2019-02-23 15:40:06 -07:00
+								           (Para a:xs)
 								             | null [Para x | Para x <- (xs ++ concatMap B.toList others)]
 								             -> others ++ [B.fromList (reverse (Plain a : xs))]
 								           _ | null [Para x | Para x <- concatMap B.toList items]
 								             -> items
 								           _ -> map (fmap plainToPara) items
-												Shared: Export compactify', formerly in Markdown reader.

											
										
										
											2012-09-27 17:22:17 -07:00
-												Improve tight/loose list handling.

Closes #5285. Previously the algorithm allowed list items
with a mix of Para and Plain, which is never wanted.

compactify in T.P.Shared has been modified so that, if
a list's items contain (at the top level) Para elements
(aside from perhaps at the very end), ALL Plains are
converted to Paras.

											
										
										
											2019-02-08 23:16:01 -08:00
+								plainToPara :: Block -> Block
 								plainToPara (Plain ils) = Para ils
 								plainToPara x = x
-												Shared: rename compactify', compactify'DL -> compactify, compactifyDL.

											
										
										
											2017-01-27 21:36:45 +01:00
+								-- | Like @compactify@, but acts on items of definition lists.
 								compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
 								compactifyDL items =
-												Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.

											
										
										
											2014-04-19 14:48:35 +02:00
+								  let defs = concatMap snd items
-												Fixed runtime error with compactify'DL on certain lists.

Closes #1452.  Added test.

											
										
										
											2014-07-25 10:53:04 -07:00
+								  in  case reverse (concatMap B.toList defs) of
 								           (Para x:xs)
 								             | not (any isPara xs) ->
 								                   let (t,ds) = last items
 								                       lastDef = B.toList $ last ds
 								                       ds' = init ds ++
 								                             if null lastDef
 								                                then [B.fromList lastDef]
 								                                else [B.fromList $ init lastDef ++ [Plain x]]
 								                    in init items ++ [(t, ds')]
 								             | otherwise           -> items
 								           _                       -> items
-												Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.

											
										
										
											2014-04-19 14:48:35 +02:00
-												Shared: add function combining lines using LineBreak

The `linesToBlock` function takes a list of lines and combines them by appending
a hard `LineBreak` to each line and concatenating the result, putting the result
it into a `Para`. This is most useful when dealing when converting `LineBlock`
elements.

											
										
										
											2016-10-13 08:46:38 +02:00
+								-- | Combine a list of lines by adding hard linebreaks.
 								combineLines :: [[Inline]] -> [Inline]
 								combineLines = intercalate [LineBreak]
 								-- | Convert a list of lines into a paragraph with hard line breaks. This is
 								--   useful e.g. for rudimentary support of LineBlock elements in writers.
 								linesToPara :: [[Inline]] -> Block
 								linesToPara = Para . combineLines
-												Changed heuristic in compactify.

compactify has to decide whether a Para that ends a list is a Para
intentionally, or just because of the blank lines at the end of
every list.  In the latter case the Para is turned to a Plain.

The old heuristic was:  change final Para to Plain iff the other
items all end in Plain.  This produces bad results when, for example,
an item contains just a Plain and an HTML comment, as it

- a
<!--
- b
-->
-c

The new heuristic:  change final Para to Plain iff the other items
don't contain a Para.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1616 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-11-01 02:38:18 +00:00
+								isPara :: Block -> Bool
 								isPara (Para _) = True
 								isPara _        = False
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Data structure for defining hierarchical Pandoc documents
-												Fixed whitespace errors.

											
										
										
											2012-07-26 22:32:53 -07:00
+								data Element = Blk Block
-												Shared:  Changed type of Element.

Sec now includes a field for Attr rather than just String
(the identifier).

Note, this is an API change.

											
										
										
											2013-02-12 20:13:23 -08:00
+								             | Sec Int [Int] Attr [Inline] [Element]
 								             --    lvl  num attributes label    contents
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								             deriving (Eq, Read, Show, Typeable, Data)
-												Use query instead of queryWith.

											
										
										
											2013-08-10 18:13:38 -07:00
+								instance Walkable Inline Element where
 								  walk f (Blk x) = Blk (walk f x)
 								  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
 								  walkM f (Blk x) = Blk `fmap` walkM f x
 								  walkM f (Sec lev nums attr ils elts) = do
 								    ils' <- walkM f ils
 								    elts' <- walkM f elts
 								    return $ Sec lev nums attr ils' elts'
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								  query f (Blk x)              = query f x
-												Monoid/Semiground cleanup relying on custom Prelude.

											
										
										
											2018-03-16 12:11:51 -07:00
+								  query f (Sec _ _ _ ils elts) = query f ils `mappend` query f elts
-												Use query instead of queryWith.

											
										
										
											2013-08-10 18:13:38 -07:00
 								instance Walkable Block Element where
 								  walk f (Blk x) = Blk (walk f x)
 								  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
 								  walkM f (Blk x) = Blk `fmap` walkM f x
 								  walkM f (Sec lev nums attr ils elts) = do
 								    ils' <- walkM f ils
 								    elts' <- walkM f elts
 								    return $ Sec lev nums attr ils' elts'
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								  query f (Blk x)              = query f x
-												Monoid/Semiground cleanup relying on custom Prelude.

											
										
										
											2018-03-16 12:11:51 -07:00
+								  query f (Sec _ _ _ ils elts) = query f ils `mappend` query f elts
-												Use query instead of queryWith.

											
										
										
											2013-08-10 18:13:38 -07:00
-												Shared:  Fixed uniqueIdent so it behaves as described in README.

Previously some characters that are illegal in HTML identifiers,
such as '<', were being allowed in header identifiers.  The logic
has now been fixed. Thanks to Xyne for reporting.

											
										
										
											2010-03-28 22:29:31 -07:00
+								-- | Convert Pandoc inline list to plain text identifier.  HTML
 								-- identifiers must start with a letter, and may contain only
-												Don't allow colon in autogenerated HTML identifiers.

They have a special meaning in XML (e.g. in EPUB).

											
										
										
											2010-07-04 23:26:04 -07:00
+								-- letters, digits, and the characters _-.
-												Text.Pandoc.Shared: add parameter to uniqueIdent, inlineListToIdentifier.

The parameter is Extensions. This allows these functions to
be sensitive to the settings of `Ext_gfm_auto_identifiers` and
`Ext_ascii_identifiers`.

This allows us to use `uniqueIdent` in the CommonMark reader,
replacing some custom code.

It also means that `gfm_auto_identifiers` can now be used
in all formats.

Semantically, `gfm_auto_identifiers` is now a modifier of
`auto_identifiers`; for identifiers to be set, `auto_identifiers`
must be turned on, and then the type of identifier produced
depends on `gfm_auto_identifiers` and `ascii_identifiers` are set.

Closes #5057.

											
										
										
											2018-11-11 13:27:25 -08:00
+								inlineListToIdentifier :: Extensions -> [Inline] -> String
 								inlineListToIdentifier exts =
 								  dropNonLetter . filterAscii . toIdent . stringify
 								  where
 								    dropNonLetter
 								      | extensionEnabled Ext_gfm_auto_identifiers exts = id
 								      | otherwise = dropWhile (not . isAlpha)
 								    filterAscii
 								      | extensionEnabled Ext_ascii_identifiers exts
 								        = mapMaybe toAsciiChar
 								      | otherwise = id
 								    toIdent
 								      | extensionEnabled Ext_gfm_auto_identifiers exts =
 								        filterPunct . spaceToDash . map toLower
 								      | otherwise = intercalate "-" . words . filterPunct . map toLower
 								    filterPunct = filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c)
 								    isAllowedPunct c
-												Exactly match GitHub's identifier generating algorithm.

See #5057.

											
										
										
											2018-11-11 20:45:38 -08:00
+								      | extensionEnabled Ext_gfm_auto_identifiers exts
 								        = c == '-' || c == '_' ||
 								          generalCategory c `elem` [NonSpacingMark, SpacingCombiningMark,
 								                                    EnclosingMark, ConnectorPunctuation]
-												Text.Pandoc.Shared: add parameter to uniqueIdent, inlineListToIdentifier.

The parameter is Extensions. This allows these functions to
be sensitive to the settings of `Ext_gfm_auto_identifiers` and
`Ext_ascii_identifiers`.

This allows us to use `uniqueIdent` in the CommonMark reader,
replacing some custom code.

It also means that `gfm_auto_identifiers` can now be used
in all formats.

Semantically, `gfm_auto_identifiers` is now a modifier of
`auto_identifiers`; for identifiers to be set, `auto_identifiers`
must be turned on, and then the type of identifier produced
depends on `gfm_auto_identifiers` and `ascii_identifiers` are set.

Closes #5057.

											
										
										
											2018-11-11 13:27:25 -08:00
+								      | otherwise = c == '_' || c == '-' || c == '.'
 								    spaceToDash = map (\c -> if isSpace c then '-' else c)
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
 								hierarchicalize :: [Block] -> [Element]
-												Revert "Shared.hierarchicalize: Don't number subsections of unnumbered sections."

This reverts commit 2a46042661a088096ac54097db5cd3674438bb63.

											
										
										
											2014-07-21 20:47:18 -07:00
+								hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
 								hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
 								hierarchicalizeWithIds [] = return []
-												hlint suggestions.

											
										
										
											2017-10-27 23:13:55 -07:00
+								hierarchicalizeWithIds (Header level attr@(_,classes,_) title':xs) = do
-												Revert "Shared.hierarchicalize: Don't number subsections of unnumbered sections."

This reverts commit 2a46042661a088096ac54097db5cd3674438bb63.

											
										
										
											2014-07-21 20:47:18 -07:00
+								  lastnum <- S.get
 								  let lastnum' = take level lastnum
 								  let newnum = case length lastnum' of
 								                    x | "unnumbered" `elem` classes -> []
 								                      | x >= level -> init lastnum' ++ [last lastnum' + 1]
 								                      | otherwise -> lastnum ++
 								                           replicate (level - length lastnum - 1) 0 ++ [1]
 								  unless (null newnum) $ S.put newnum
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
+								  let (sectionContents, rest) = break (headerLtEq level) xs
-												Revert "Shared.hierarchicalize: Don't number subsections of unnumbered sections."

This reverts commit 2a46042661a088096ac54097db5cd3674438bb63.

											
										
										
											2014-07-21 20:47:18 -07:00
+								  sectionContents' <- hierarchicalizeWithIds sectionContents
 								  rest' <- hierarchicalizeWithIds rest
-												Shared:  Changed type of Element.

Sec now includes a field for Attr rather than just String
(the identifier).

Note, this is an API change.

											
										
										
											2013-02-12 20:13:23 -08:00
+								  return $ Sec level newnum attr title' sectionContents' : rest'
-												For bibliography match Div with id 'refs', not class 'references'.

This was a mismatch between pandoc's docx, epub, latex, and markdown
writers and the behavior of pandoc-citeproc, which actually looks
for a div with id 'refs' rather than one with class 'references'.

											
										
										
											2018-11-19 11:08:09 -08:00
+								hierarchicalizeWithIds (Div ("refs",classes',kvs')
-												hlint suggestions.

											
										
										
											2017-10-27 23:13:55 -07:00
+								                         (Header level (ident,classes,kvs) title' : xs):ys) =
-												hlint code improvements.

											
										
										
											2018-01-19 21:25:24 -08:00
+								  hierarchicalizeWithIds (Header level (ident,"references":classes,kvs)
-												For bibliography match Div with id 'refs', not class 'references'.

This was a mismatch between pandoc's docx, epub, latex, and markdown
writers and the behavior of pandoc-citeproc, which actually looks
for a div with id 'refs' rather than one with class 'references'.

											
										
										
											2018-11-19 11:08:09 -08:00
+								                           title' : Div ("refs",classes',kvs') xs : ys)
-												Revert "Shared.hierarchicalize: Don't number subsections of unnumbered sections."

This reverts commit 2a46042661a088096ac54097db5cd3674438bb63.

											
										
										
											2014-07-21 20:47:18 -07:00
+								hierarchicalizeWithIds (x:rest) = do
 								  rest' <- hierarchicalizeWithIds rest
-												hlint suggestions.

											
										
										
											2017-10-27 23:13:55 -07:00
+								  return $ Blk x : rest'
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
 								headerLtEq :: Int -> Block -> Bool
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								headerLtEq level (Header l _ _)                                  = l <= level
 								headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level
 								headerLtEq _ _                                                   = False
-												HTML writer:  wrap sections in divs.  Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-04-25 00:29:58 +00:00
-												Shared: Export uniqueIdent, don't allow tilde in identifier.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1894 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2010-03-16 06:45:52 +00:00
+								-- | Generate a unique identifier from a list of inlines.
 								-- Second argument is a list of already used identifiers.
-												Text.Pandoc.Shared: add parameter to uniqueIdent, inlineListToIdentifier.

The parameter is Extensions. This allows these functions to
be sensitive to the settings of `Ext_gfm_auto_identifiers` and
`Ext_ascii_identifiers`.

This allows us to use `uniqueIdent` in the CommonMark reader,
replacing some custom code.

It also means that `gfm_auto_identifiers` can now be used
in all formats.

Semantically, `gfm_auto_identifiers` is now a modifier of
`auto_identifiers`; for identifiers to be set, `auto_identifiers`
must be turned on, and then the type of identifier produced
depends on `gfm_auto_identifiers` and `ascii_identifiers` are set.

Closes #5057.

											
										
										
											2018-11-11 13:27:25 -08:00
+								uniqueIdent :: Extensions -> [Inline] -> Set.Set String -> String
 								uniqueIdent exts title' usedIdents =
 								  if baseIdent `Set.member` usedIdents
 								     then case find (\x -> not $ numIdent x `Set.member` usedIdents)
 								               ([1..60000] :: [Int]) of
 								            Just x  -> numIdent x
 								            Nothing -> baseIdent
 								            -- if we have more than 60,000, allow repeats
 								     else baseIdent
 								  where
 								    baseIdent = case inlineListToIdentifier exts title' of
 								                     "" -> "section"
 								                     x  -> x
 								    numIdent n = baseIdent ++ "-" ++ show n
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
 								-- | True if block is a Header block.
 								isHeaderBlock :: Block -> Bool
-												hlint

											
										
										
											2017-11-01 14:20:03 +03:00
+								isHeaderBlock Header{} = True
 								isHeaderBlock _        = False
-												Reverted back to state as of r1062.  The template haskell changes
are more trouble than they're worth.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1064 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2007-11-03 23:27:58 +00:00
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
+								-- | Shift header levels up or down.
 								headerShift :: Int -> Pandoc -> Pandoc
-												Use walk, walkM in place of bottomUp, bottomUpM when possible.

They are significantly faster.

											
										
										
											2013-08-10 18:45:00 -07:00
+								headerShift n = walk shift
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
+								  where shift :: Block -> Block
-												Added Attr field to Header.

Previously header ids were autogenerated by the writers.
Now they are generated (unless supplied explicitly) in the
markdown parser, if the `header_identifiers` extension is
selected.

In addition, the textile reader now supports id attributes on
headers.

											
										
										
											2012-10-29 22:45:52 -07:00
+								        shift (Header level attr inner) = Header (level + n) attr inner
 								        shift x                         = x
-												Moved headerShift from pandoc.hs to Shared.

											
										
										
											2010-07-11 20:03:55 -07:00
-												Add --strip-empty-paragraphs option.

This works for any input format.

											
										
										
											2017-12-02 15:21:59 -08:00
+								-- | Remove empty paragraphs.
 								stripEmptyParagraphs :: Pandoc -> Pandoc
 								stripEmptyParagraphs = walk go
 								  where go :: [Block] -> [Block]
 								        go = filter (not . isEmptyParagraph)
 								        isEmptyParagraph (Para []) = True
 								        isEmptyParagraph _         = False
-												consolidate simple-table detection (#5524)

add `onlySimpleTableCells` to `Text.Pandoc.Shared`

[API change]

This fixes an inconsistency in the HTML reader, which did not treat tables with `<p>` inside cells as simple.

											
										
										
											2019-05-27 19:53:19 +02:00
+								-- | Detect if table rows contain only cells consisting of a single
 								-- paragraph that has no @LineBreak@.
 								onlySimpleTableCells :: [[TableCell]] -> Bool
 								onlySimpleTableCells = all isSimpleCell . concat
 								  where
 								    isSimpleCell [Plain ils] = not (hasLineBreak ils)
 								    isSimpleCell [Para ils ] = not (hasLineBreak ils)
 								    isSimpleCell []          = True
 								    isSimpleCell _           = False
 								    hasLineBreak = getAny . query isLineBreak
 								    isLineBreak LineBreak = Any True
 								    isLineBreak _         = Any False
-												Shared: export isTightList.

											
										
										
											2013-01-07 20:12:05 -08:00
+								-- | Detect if a list is tight.
 								isTightList :: [[Block]] -> Bool
-												HLint: Use all

Replace `and . map` with `all`.

											
										
										
											2013-12-19 17:06:27 -05:00
+								isTightList = all firstIsPlain
-												Shared: export isTightList.

											
										
										
											2013-01-07 20:12:05 -08:00
+								  where firstIsPlain (Plain _ : _) = True
 								        firstIsPlain _             = False
-												Implement task lists (#5139)

Closes #3051

											
										
										
											2019-01-02 20:36:37 +01:00
+								-- | Convert a list item containing tasklist syntax (e.g. @[x]@)
 								-- to using @U+2610 BALLOT BOX@ or @U+2612 BALLOT BOX WITH X@.
 								taskListItemFromAscii :: Extensions -> [Block] -> [Block]
 								taskListItemFromAscii = handleTaskListItem fromMd
 								  where
 								    fromMd (Str "[" : Space : Str "]" : Space : is) = (Str "☐") : Space : is
 								    fromMd (Str "[x]"                 : Space : is) = (Str "☒") : Space : is
 								    fromMd (Str "[X]"                 : Space : is) = (Str "☒") : Space : is
 								    fromMd is = is
 								-- | Convert a list item containing text starting with @U+2610 BALLOT BOX@
 								-- or @U+2612 BALLOT BOX WITH X@ to tasklist syntax (e.g. @[x]@).
 								taskListItemToAscii :: Extensions -> [Block] -> [Block]
 								taskListItemToAscii = handleTaskListItem toMd
 								  where
 								    toMd (Str "☐" : Space : is) = rawMd "[ ]" : Space : is
 								    toMd (Str "☒" : Space : is) = rawMd "[x]" : Space : is
 								    toMd is = is
 								    rawMd = RawInline (Format "markdown")
 								handleTaskListItem :: ([Inline] -> [Inline]) -> Extensions -> [Block] -> [Block]
 								handleTaskListItem handleInlines exts bls =
 								  if Ext_task_lists `extensionEnabled` exts
 								  then handleItem bls
 								  else bls
 								  where
 								    handleItem (Plain is : bs) = Plain (handleInlines is) : bs
 								    handleItem (Para is  : bs) = Para  (handleInlines is) : bs
 								    handleItem bs = bs
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								-- | Set a field of a 'Meta' object.  If the field already has a value,
 								-- convert it into a list with the new value appended to the old value(s).
 								addMetaField :: ToMetaValue a
 								             => String
 								             -> a
 								             -> Meta
 								             -> Meta
 								addMetaField key val (Meta meta) =
 								  Meta $ M.insertWith combine key (toMetaValue val) meta
-												Shared addMetaField:  if old and new values both lists, concatenate.

											
										
										
											2014-05-12 13:05:42 -07:00
+								  where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
+								        combine newval x             = MetaList [x, newval]
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								        tolist (MetaList ys) = ys
 								        tolist y             = [y]
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
 								-- | Create 'Meta' from old-style title, authors, date.  This is
 								-- provided to ease the transition from the old API.
 								makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
 								makeMeta title authors date =
 								      addMetaField "title" (B.fromList title)
 								    $ addMetaField "author" (map B.fromList authors)
-												hlint suggestions.

											
										
										
											2017-10-27 23:13:55 -07:00
+								    $ addMetaField "date" (B.fromList date) nullMeta
-												Use new flexible metadata type.

* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`.  (API change.)
* `Text.Pandoc.Shared`:  Added `metaToJSON`.
  This will be used in writers to create a JSON object for use
  in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader:  Added support for YAML metadata block.
  Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`:  Replace `stateTitle`,
  `stateAuthors`, `stateDate` with `stateMeta`.
* RST reader:  Improved metadata.
  Treat initial field list as metadata when standalone specified.
  Previously ALL fields "title", "author", "date" in field lists
  were treated as metadata, even if not at the beginning.
  Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`:  Export `renderTemplate'` that takes a string
  instead of a compiled template..
* OPML template:  Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
  Previously the writer did this.

											
										
										
											2013-05-10 22:53:35 -07:00
-												Added eastAsianLineBreakFilter to Shared.

This used to live in the Markdown reader.

											
										
										
											2017-05-30 10:22:48 +02:00
+								-- | Remove soft breaks between East Asian characters.
 								eastAsianLineBreakFilter :: Pandoc -> Pandoc
 								eastAsianLineBreakFilter = bottomUp go
 								  where go (x:SoftBreak:y:zs) =
 								         case (stringify x, stringify y) of
-												hlint suggestions.

											
										
										
											2017-10-27 23:13:55 -07:00
+								               (xs@(_:_), c:_)
-												Added eastAsianLineBreakFilter to Shared.

This used to live in the Markdown reader.

											
										
										
											2017-05-30 10:22:48 +02:00
+								                 | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
 								               _ -> x:SoftBreak:y:zs
 								        go xs = xs
-												Consistent underline for Readers (#2270)

* Added underlineSpan builder function.  This can be easily updated if needed. The purpose is for Readers to transform underlines consistently.

* Docx Reader: Use underlineSpan and update test

* Org Reader: Use underlineSpan and add test

* Textile Reader: Use underlineSpan and add test case

* Txt2Tags Reader: Use underlineSpan and update test

* HTML Reader: Use underlineSpan and add test case

											
										
										
											2017-10-27 18:45:00 -04:00
+								-- | Builder for underline.
 								-- This probably belongs in Builder.hs in pandoc-types.
 								-- Will be replaced once Underline is an element.
 								underlineSpan :: Inlines -> Inlines
 								underlineSpan = B.spanWith ("", ["underline"], [])
-												Shared: new export `splitSentences` [API change].

This was duplicated in the Man and Ms writers, and really
belongs in Shared.

											
										
										
											2018-10-01 22:47:01 -07:00
+								-- | Returns the first sentence in a list of inlines, and the rest.
 								breakSentence :: [Inline] -> ([Inline], [Inline])
 								breakSentence [] = ([],[])
 								breakSentence xs =
 								  let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True
 								      isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True
 								      isSentenceEndInline LineBreak      = True
 								      isSentenceEndInline _              = False
 								      (as, bs) = break isSentenceEndInline xs
 								  in  case bs of
 								           []             -> (as, [])
 								           [c]            -> (as ++ [c], [])
 								           (c:Space:cs)   -> (as ++ [c], cs)
 								           (c:SoftBreak:cs) -> (as ++ [c], cs)
 								           (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs)
 								           (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs)
 								           (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs)
 								           (c:cs)         -> (as ++ [c] ++ ds, es)
 								              where (ds, es) = breakSentence cs
 								-- | Split a list of inlines into sentences.
 								splitSentences :: [Inline] -> [[Inline]]
 								splitSentences xs =
 								  let (sent, rest) = breakSentence xs
 								  in  if null rest then [sent] else sent : splitSentences rest
-												Consistent underline for Readers (#2270)

* Added underlineSpan builder function.  This can be easily updated if needed. The purpose is for Readers to transform underlines consistently.

* Docx Reader: Use underlineSpan and update test

* Org Reader: Use underlineSpan and add test

* Textile Reader: Use underlineSpan and add test case

* Txt2Tags Reader: Use underlineSpan and update test

* HTML Reader: Use underlineSpan and add test case

											
										
										
											2017-10-27 18:45:00 -04:00
-												Shared: add filterIpynbOutput. [API change]

Add command line option `--ipynb-output=all|none|best`.

Closes #5339.

											
										
										
											2019-02-28 20:28:16 -08:00
+								-- | Process ipynb output cells.  If mode is Nothing,
 								-- remove all output.  If mode is Just format, select
-												Make filterIpynbOutput strip ANSI escapes from code in output...

for non-ipynb formats, when the default "best" option is used with
--ipynb-output.  The escape sequences cause problems in many formats,
including LaTeX.  Closes #5633.

											
										
										
											2019-07-16 09:27:51 -07:00
+								-- best output for the format.  If format is not ipynb,
 								-- strip out ANSI escape sequences from CodeBlocks (see #5633).
-												Shared: add filterIpynbOutput. [API change]

Add command line option `--ipynb-output=all|none|best`.

Closes #5339.

											
										
										
											2019-02-28 20:28:16 -08:00
+								filterIpynbOutput :: Maybe Format -> Pandoc -> Pandoc
 								filterIpynbOutput mode = walk go
 								  where go (Div (ident, ("output":os), kvs) bs) =
 								          case mode of
 								            Nothing  -> Div (ident, ("output":os), kvs) []
-												Shared.filterIpynbOutput: 'best' should include everything for ipynb.

											
										
										
											2019-03-06 10:00:18 -08:00
+								            -- "best" for ipynb includes all formats:
 								            Just fmt
 								              | fmt == Format "ipynb"
 								                          -> Div (ident, ("output":os), kvs) bs
 								              | otherwise -> Div (ident, ("output":os), kvs) $
-												Make filterIpynbOutput strip ANSI escapes from code in output...

for non-ipynb formats, when the default "best" option is used with
--ipynb-output.  The escape sequences cause problems in many formats,
including LaTeX.  Closes #5633.

											
										
										
											2019-07-16 09:27:51 -07:00
+								                              walk removeANSI $
-												filterIpynbOutput - go back to just including one block per format.

In the end we need a 1-1 map of mime types to output blocks.

											
										
										
											2019-03-06 11:09:15 -08:00
+								                              take 1 $ sortBy (comparing rank) bs
-												Improve filterIpynbOutput.

- Ensure that images are prioritized over text.
- Allow multiple RawBlocks for same format.

											
										
										
											2019-03-06 10:36:03 -08:00
+								                 where
-												Shared: add filterIpynbOutput. [API change]

Add command line option `--ipynb-output=all|none|best`.

Closes #5339.

											
										
										
											2019-02-28 20:28:16 -08:00
+								                  rank (RawBlock (Format "html") _)
 								                    | fmt == Format "html" = (1 :: Int)
 								                    | fmt == Format "markdown" = 2
 								                    | otherwise = 3
 								                  rank (RawBlock (Format "latex") _)
 								                    | fmt == Format "latex" = 1
 								                    | fmt == Format "markdown" = 2
 								                    | otherwise = 3
 								                  rank (RawBlock f _)
 								                    | fmt == f = 1
 								                    | otherwise = 3
-												Improve filterIpynbOutput.

- Ensure that images are prioritized over text.
- Allow multiple RawBlocks for same format.

											
										
										
											2019-03-06 10:36:03 -08:00
+								                  rank (Para [Image{}]) = 1
-												Shared: add filterIpynbOutput. [API change]

Add command line option `--ipynb-output=all|none|best`.

Closes #5339.

											
										
										
											2019-02-28 20:28:16 -08:00
+								                  rank _ = 2
-												Make filterIpynbOutput strip ANSI escapes from code in output...

for non-ipynb formats, when the default "best" option is used with
--ipynb-output.  The escape sequences cause problems in many formats,
including LaTeX.  Closes #5633.

											
										
										
											2019-07-16 09:27:51 -07:00
+								                  removeANSI (CodeBlock attr code) =
 								                    CodeBlock attr (removeANSIEscapes code)
 								                  removeANSI x = x
 								                  removeANSIEscapes [] = []
 								                  removeANSIEscapes ('\x1b':'[':cs) =
 								                    removeANSIEscapes (drop 1 $ dropWhile (/='m') cs)
 								                  removeANSIEscapes (c:cs) = c : removeANSIEscapes cs
-												Shared: add filterIpynbOutput. [API change]

Add command line option `--ipynb-output=all|none|best`.

Closes #5339.

											
										
										
											2019-02-28 20:28:16 -08:00
+								        go x = x
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
+								--
 								-- TagSoup HTML handling
 								--
 								-- | Render HTML tags.
 								renderTags' :: [Tag String] -> String
 								renderTags' = renderTagsOptions
-												HLint: use `elem` and `notElem`

Replaces long conditional chains with calls to `elem` and `notElem`.

											
										
										
											2013-12-19 20:19:24 -05:00
+								               renderOptions{ optMinimize = matchTags ["hr", "br", "img",
 								                                                       "meta", "link"]
 								                            , optRawTag   = matchTags ["script", "style"] }
-												hlint

											
										
										
											2017-11-01 14:20:03 +03:00
+								              where matchTags tags = flip elem tags . map toLower
-												Moved renderTags' from HTML reader & SelfContained to Shared.

Improved removal of markdown="1" attribute in Markdow reader.

											
										
										
											2012-08-15 09:42:16 -07:00
-												Improved template handling:

+ Split template haskell functions into new module,
  Text.Pandoc.TH
+ Distinguish contentsOf and binaryContentsOf; the former
  uses text mode in Windows, while the latter uses binary mode


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1368 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-08-02 17:22:55 +00:00
+								--
 								-- File handling
 								--
-												Reworked Text.Pandoc.ODT to use zip-archive instead of calling external 'zip'.

+ Removed utf8-string and xml-light modules, and unneeded content.xml.
+ Removed code for building reference.odt from Setup.hs.
  The ODT is now built using template haskell in Text.Pandoc.ODT.
+ Removed copyright statements for utf8-string and xml modules,
  since they are no longer included in the source.
+ README: Removed claim that 'zip' is needed for ODT production.
+ Removed dependency on 'zip' from debian/control.
+ Text.Pandoc.Shared: Removed withTempDir, added inDirectory.
+ Added makeZip to Text.Pandoc.TH.
+ pandoc.cabal: Added dependencies on old-time, zip-archive, and utf8-string.
  Added markdown2pdf files to extra-sources list.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1417 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2008-09-04 02:51:28 +00:00
+								-- | Perform an IO action in a directory, returning to starting directory.
 								inDirectory :: FilePath -> IO a -> IO a
-												fix inDirectory to reset to the original directory in case an exception occurs

											
										
										
											2014-10-08 23:25:01 +02:00
+								inDirectory path action = E.bracket
 								                             getCurrentDirectory
 								                             setCurrentDirectory
 								                             (const $ setCurrentDirectory path >> action)
-												Removed TH module; refactored LaTeXMathML not to use TH.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1692 788f1e2b-df1e-0410-8736-df70ead52e1b

											
										
										
											2009-12-31 01:11:23 +00:00
-												Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible.
  It is still quite incomplete, but it should be easier
  now to add features.

* Text.Pandoc.Parsing: Added withRaw combinator.

* Markdown reader: do escapedChar before raw latex inline.
  Otherwise we capture commands like \{.

* Fixed latex citation tests for new citeproc.

* Handle \include{} commands in latex.
  This is done in pandoc.hs, not the (pure) latex reader.
  But the reader exports the needed function, handleIncludes.

* Moved err and warn from pandoc.hs to Shared.

* Fixed tests - raw tex should sometimes have trailing space.

* Updated lhs-test for highlighting-kate changes.

											
										
										
											2012-01-29 23:54:00 -08:00
+								--
 								-- Error reporting
 								--
-												Move utility error functions to Text.Pandoc.Shared

											
										
										
											2015-02-18 21:05:47 +00:00
+								mapLeft :: (a -> b) -> Either a c -> Either b c
-												Reimplement mapLeft using Bifunctor.first

											
										
										
											2018-10-10 01:26:50 +03:00
+								mapLeft = Bifunctor.first
-												Move utility error functions to Text.Pandoc.Shared

											
										
										
											2015-02-18 21:05:47 +00:00
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								-- | Remove intermediate "." and ".." directories from a path.
 								--
-												Fixed haddock comment.

											
										
										
											2014-08-13 13:59:18 -07:00
+								-- > collapseFilePath "./foo" == "foo"
 								-- > collapseFilePath "/bar/../baz" == "/baz"
 								-- > collapseFilePath "/../baz" == "/../baz"
 								-- > collapseFilePath "parent/foo/baz/../bar" ==  "parent/foo/bar"
 								-- > collapseFilePath "parent/foo/baz/../../bar" ==  "parent/bar"
 								-- > collapseFilePath "parent/foo/.." ==  "parent"
 								-- > collapseFilePath "/parent/foo/../../bar" ==  "/bar"
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								collapseFilePath :: FilePath -> FilePath
-												MediaBag:  ensure that / is always used as path separator.

											
										
										
											2015-09-26 22:40:58 -07:00
+								collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								  where
 								    go rs "." = rs
 								    go r@(p:rs) ".." = case p of
-												hlint suggestions.

											
										
										
											2017-10-27 23:13:55 -07:00
+								                            ".."                              -> "..":r
 								                            (checkPathSeperator -> Just True) -> "..":r
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								                            _                                 -> rs
-												MediaBag:  ensure that / is always used as path separator.

											
										
										
											2015-09-26 22:40:58 -07:00
+								    go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
+								    go rs x = x:rs
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								    isSingleton []  = Nothing
-												Shared: Make collapseFilePath OS-agnostic

											
										
										
											2014-09-25 12:42:53 +01:00
+								    isSingleton [x] = Just x
-												Automatic reformating by stylish-haskell.

											
										
										
											2017-10-27 20:28:29 -07:00
+								    isSingleton _   = Nothing
-												Shared: Make collapseFilePath OS-agnostic

											
										
										
											2014-09-25 12:42:53 +01:00
+								    checkPathSeperator = fmap isPathSeparator . isSingleton
-												Shared: Added collapseFilePath function

This function removes intermediate "." and ".." from a path.

											
										
										
											2014-08-08 20:10:58 +01:00
-												Shared: add uriPathToPath.

This adjusts the path from a file: URI in a way that is sensitive
to Windows/Linux differences.  Thus, on Windows,
`/c:/foo` gets interpreted as `c:/foo`, but on Linux,
`/c:/foo` gets interpreted as `/c:/foo`.

See #4613.

											
										
										
											2018-05-08 09:54:19 -07:00
+								-- Convert the path part of a file: URI to a regular path.
 								-- On windows, @/c:/foo@ should be @c:/foo@.
 								-- On linux, @/foo@ should be @/foo@.
 								uriPathToPath :: String -> FilePath
 								uriPathToPath path =
 								#ifdef _WINDOWS
 								  case path of
 								    '/':ps -> ps
-												Fixed bug in uriPathToPath for Windows.

											
										
										
											2018-05-08 11:31:00 -07:00
+								    ps     -> ps
-												Shared: add uriPathToPath.

This adjusts the path from a file: URI in a way that is sensitive
to Windows/Linux differences.  Thus, on Windows,
`/c:/foo` gets interpreted as `c:/foo`, but on Linux,
`/c:/foo` gets interpreted as `/c:/foo`.

See #4613.

											
										
										
											2018-05-08 09:54:19 -07:00
+								#else
 								  path
 								#endif
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
+								--
 								-- File selection from the archive
 								--
 								filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, BL.ByteString)]
 								filteredFilesFromArchive zf f =
 								  mapMaybe (fileAndBinary zf) (filter f (filesInArchive zf))
 								  where
 								    fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
-												Use bind function instead of pattern matching

											
										
										
											2016-10-17 16:58:53 +02:00
+								    fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)
-												Basic support for images in ODT documents

Highly influenced by the docx support, refactored
some code to avoid DRY.

											
										
										
											2016-10-12 17:42:30 +02:00
-												Shared: Provide custom isURI that rejects unknown schemes [isURI]

We also export the set of known `schemes`.

The new function replaces the function of the same name
from `Network.URI`, as the latter did not check whether a scheme is
well-known.  E.g. MediaWiki wikis frequently feature pages with names
like `User:John`. These links were interpreted as URIs, thus turning
internal links into global links. This is prevented by also checking
whether the scheme of a URI is frequently used (i.e. is IANA registered
or an otherwise well-known scheme).

Fixes: #2713

Update set of well-known URIs from IANA list
All official IANA schemes (as of 2017-05-22) are included in the set of
known schemes.  The four non-official schemes doi, isbn, javascript, and
pmid are kept.

											
										
										
											2017-05-23 09:48:11 +02:00
 								--
 								-- IANA URIs
 								--
 								-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus
 								-- the unofficial schemes doi, javascript, isbn, pmid.
 								schemes :: Set.Set String
 								schemes = Set.fromList
 								  -- Official IANA schemes
 								  [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs"
 								  , "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin"
 								  , "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension"
 								  , "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs"
 								  , "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle"
 								  , "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed"
 								  , "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg"
 								  , "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham"
 								  , "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon"
 								  , "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6"
 								  , "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs"
 								  , "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap"
 								  , "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market"
 								  , "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access"
 								  , "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel"
 								  , "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath"
 								  , "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint"
 								  , "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller"
 								  , "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode"
 								  , "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular"
 								  , "ms-settings-cloudstorage", "ms-settings-connectabledevices"
 								  , "ms-settings-displays-topology", "ms-settings-emailandaccounts"
 								  , "ms-settings-language", "ms-settings-location", "ms-settings-lock"
 								  , "ms-settings-nfctransactions", "ms-settings-notifications"
 								  , "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity"
 								  , "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace"
 								  , "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad"
 								  , "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word"
 								  , "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs"
 								  , "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd"
 								  , "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop"
 								  , "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis"
 								  , "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp"
 								  , "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn"
 								  , "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews"
 								  , "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam"
 								  , "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid"
 								  , "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn"
 								  , "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi"
 								  , "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid"
 								  , "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire"
 								  , "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r"
 								  , "z39.50s"
-												Spellcheck comments

											
										
										
											2018-07-02 18:30:37 +03:00
+								  -- Unofficial schemes
-												Shared: Provide custom isURI that rejects unknown schemes [isURI]

We also export the set of known `schemes`.

The new function replaces the function of the same name
from `Network.URI`, as the latter did not check whether a scheme is
well-known.  E.g. MediaWiki wikis frequently feature pages with names
like `User:John`. These links were interpreted as URIs, thus turning
internal links into global links. This is prevented by also checking
whether the scheme of a URI is frequently used (i.e. is IANA registered
or an otherwise well-known scheme).

Fixes: #2713

Update set of well-known URIs from IANA list
All official IANA schemes (as of 2017-05-22) are included in the set of
known schemes.  The four non-official schemes doi, isbn, javascript, and
pmid are kept.

											
										
										
											2017-05-23 09:48:11 +02:00
+								  , "doi", "isbn", "javascript", "pmid"
 								  ]
 								-- | Check if the string is a valid URL with a IANA or frequently used but
 								-- unofficial scheme (see @schemes@).
 								isURI :: String -> Bool
 								isURI = maybe False hasKnownScheme . parseURI
 								  where
-												Shared.isURI: allow uppercase versions of known schemes.

											
										
										
											2017-05-23 09:49:56 +02:00
+								    hasKnownScheme = (`Set.member` schemes) . map toLower .
 								                     filter (/= ':') . uriScheme
-												Shared: Provide custom isURI that rejects unknown schemes [isURI]

We also export the set of known `schemes`.

The new function replaces the function of the same name
from `Network.URI`, as the latter did not check whether a scheme is
well-known.  E.g. MediaWiki wikis frequently feature pages with names
like `User:John`. These links were interpreted as URIs, thus turning
internal links into global links. This is prevented by also checking
whether the scheme of a URI is frequently used (i.e. is IANA registered
or an otherwise well-known scheme).

Fixes: #2713

Update set of well-known URIs from IANA list
All official IANA schemes (as of 2017-05-22) are included in the set of
known schemes.  The four non-official schemes doi, isbn, javascript, and
pmid are kept.

											
										
										
											2017-05-23 09:48:11 +02:00
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								---
 								--- Squash blocks into inlines
 								---
-												Shared.blocksToInlines:  rewrote using builder.

This gives us automatic normalization, so we don't get
for example two consecutive Spaces.

											
										
										
											2017-12-02 16:26:26 -08:00
+								blockToInlines :: Block -> Inlines
 								blockToInlines (Plain ils) = B.fromList ils
 								blockToInlines (Para ils) = B.fromList ils
 								blockToInlines (LineBlock lns) = B.fromList $ combineLines lns
 								blockToInlines (CodeBlock attr str) = B.codeWith attr str
 								blockToInlines (RawBlock (Format fmt) str) = B.rawInline fmt str
 								blockToInlines (BlockQuote blks) = blocksToInlines' blks
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								blockToInlines (OrderedList _ blkslst) =
-												Shared.blocksToInlines:  rewrote using builder.

This gives us automatic normalization, so we don't get
for example two consecutive Spaces.

											
										
										
											2017-12-02 16:26:26 -08:00
+								  mconcat $ map blocksToInlines' blkslst
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								blockToInlines (BulletList blkslst) =
-												Shared.blocksToInlines:  rewrote using builder.

This gives us automatic normalization, so we don't get
for example two consecutive Spaces.

											
										
										
											2017-12-02 16:26:26 -08:00
+								  mconcat $ map blocksToInlines' blkslst
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								blockToInlines (DefinitionList pairslst) =
-												Shared.blocksToInlines:  rewrote using builder.

This gives us automatic normalization, so we don't get
for example two consecutive Spaces.

											
										
										
											2017-12-02 16:26:26 -08:00
+								  mconcat $ map f pairslst
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								  where
-												Shared.blocksToInlines:  rewrote using builder.

This gives us automatic normalization, so we don't get
for example two consecutive Spaces.

											
										
										
											2017-12-02 16:26:26 -08:00
+								    f (ils, blkslst) = B.fromList ils <> B.str ":" <> B.space <>
 								      mconcat (map blocksToInlines' blkslst)
 								blockToInlines (Header _ _  ils) = B.fromList ils
 								blockToInlines HorizontalRule = mempty
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
+								blockToInlines (Table _ _ _ headers rows) =
-												Shared.blocksToInlines:  rewrote using builder.

This gives us automatic normalization, so we don't get
for example two consecutive Spaces.

											
										
										
											2017-12-02 16:26:26 -08:00
+								  mconcat $ intersperse B.linebreak $
 								    map (mconcat . map blocksToInlines') (headers:rows)
 								blockToInlines (Div _ blks) = blocksToInlines' blks
 								blockToInlines Null = mempty
 								blocksToInlinesWithSep :: Inlines -> [Block] -> Inlines
 								blocksToInlinesWithSep sep =
 								  mconcat . intersperse sep . map blockToInlines
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
-												Shared.blocksToInlines:  rewrote using builder.

This gives us automatic normalization, so we don't get
for example two consecutive Spaces.

											
										
										
											2017-12-02 16:26:26 -08:00
+								blocksToInlines' :: [Block] -> Inlines
-												Lua Utils module: add function blocks_to_inlines (#4799)

Exposes a function converting which flattenes a list of blocks into a
list of inlines. An example use case would be the conversion of Note
elements into other inlines.
											
										
										
											2018-07-30 19:55:25 +02:00
+								blocksToInlines' = blocksToInlinesWithSep defaultBlocksSeparator
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
 								blocksToInlines :: [Block] -> [Inline]
-												Shared.blocksToInlines:  rewrote using builder.

This gives us automatic normalization, so we don't get
for example two consecutive Spaces.

											
										
										
											2017-12-02 16:26:26 -08:00
+								blocksToInlines = B.toList . blocksToInlines'
-												Rename README to MANUAL.txt

											
										
										
											2016-07-20 14:12:57 +02:00
-												Lua Utils module: add function blocks_to_inlines (#4799)

Exposes a function converting which flattenes a list of blocks into a
list of inlines. An example use case would be the conversion of Note
elements into other inlines.
											
										
										
											2018-07-30 19:55:25 +02:00
+								-- | Inline elements used to separate blocks when squashing blocks into
 								-- inlines.
 								defaultBlocksSeparator :: Inlines
 								defaultBlocksSeparator =
 								  -- This is used in the pandoc.utils.blocks_to_inlines function. Docs
 								  -- there should be updated if this is changed.
 								  B.space <> B.str "¶" <> B.space
-												Shared: introduce blocksToInlines function

This is a lossy function for converting `[Block] -> [Inline]`. Its main
use, at the moment, is for docx comments, which can contain arbitrary
blocks (except for footnotes), but which will be converted to spans.

This is, at the moment, pretty useless for everything but the basic
`Para` and `Plain` comments. It can be improved, but the docx reader
should probably emit a warning if the comment contains more than this.

											
										
										
											2016-06-22 13:04:25 -04:00
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								--
 								-- Safe read
 								--
-												Make safeRead safe.

Fixes #1801

											
										
										
											2015-02-18 18:40:36 +00:00
+								safeRead :: (MonadPlus m, Read a) => String -> m a
-												Added safeRead to Text.Pandoc.Shared.

											
										
										
											2012-08-09 07:52:39 -07:00
+								safeRead s = case reads s of
-												Removed `--strict`, added extensions to writer/reader names.

* The `--strict` option has been removed.
* Instead of using `--strict`, one can now use `strict` instead of
  `markdown` as an input or output format name.
* The `--enable` and `--disable` optinos have been removed.
* It is now possible to enable or disable specific extensions
  by appending them (with '+' or '-') to the writer or reader
  name.  For example `pandoc -f markdown-footnotes+hard_line_breaks`.
* The lhs extensions are now implemented this way, too; you can
  use either `+lhs` or `+literate_haskell`.

											
										
										
											2012-08-09 20:19:06 -07:00
+								                  (d,x):_
 								                    | all isSpace x -> return d
-												Make safeRead safe.

Fixes #1801

											
										
										
											2015-02-18 18:40:36 +00:00
+								                  _                 -> mzero
-												Moved withTempDir from PDF to Shared, export from Shared.

API change.

											
										
										
											2014-07-30 12:29:04 -07:00
-												Add new exported function defaultUserDataDirs

											
										
										
											2019-03-02 15:03:51 -08:00
+								--
 								-- User data directory
 								--
 								-- | Return appropriate user data directory for platform.  We use
 								-- XDG_DATA_HOME (or its default value), but fall back to the
 								-- legacy user data directory ($HOME/.pandoc on *nix) if this is
 								-- missing.
 								defaultUserDataDirs :: IO [FilePath]
 								defaultUserDataDirs = E.catch (do
 								  xdgDir <- getXdgDirectory XdgData "pandoc"
 								  legacyDir <- getAppUserDataDirectory "pandoc"
 								  return $ ordNub [xdgDir, legacyDir])
 								 (\(_ :: E.SomeException) -> return [])