2008-03-19 18:46:01 +00:00
|
|
|
{-
|
2017-05-13 23:30:13 +02:00
|
|
|
Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>
|
2008-03-19 18:46:01 +00:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.XML
|
2017-05-13 23:30:13 +02:00
|
|
|
Copyright : Copyright (C) 2006-2017 John MacFarlane
|
2008-03-19 18:46:01 +00:00
|
|
|
License : GNU GPL, version 2 or above
|
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
|
|
Stability : alpha
|
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
Functions for escaping and formatting XML.
|
|
|
|
-}
|
2013-05-10 22:53:35 -07:00
|
|
|
module Text.Pandoc.XML ( escapeCharForXML,
|
2008-03-19 18:46:01 +00:00
|
|
|
escapeStringForXML,
|
|
|
|
inTags,
|
|
|
|
selfClosingTag,
|
|
|
|
inTagsSimple,
|
2012-01-28 09:30:31 -08:00
|
|
|
inTagsIndented,
|
2012-02-05 14:37:33 -08:00
|
|
|
toEntities,
|
|
|
|
fromEntities ) where
|
2010-12-21 16:46:21 -08:00
|
|
|
|
2017-03-04 13:03:41 +01:00
|
|
|
import Data.Char (isAscii, isSpace, ord)
|
2017-06-10 23:39:49 +02:00
|
|
|
import Data.Text (Text)
|
|
|
|
import qualified Data.Text as T
|
2016-09-02 11:35:28 -04:00
|
|
|
import Text.HTML.TagSoup.Entity (lookupEntity)
|
2017-03-04 13:03:41 +01:00
|
|
|
import Text.Pandoc.Pretty
|
2008-03-19 18:46:01 +00:00
|
|
|
|
|
|
|
-- | Escape one character as needed for XML.
|
|
|
|
escapeCharForXML :: Char -> String
|
|
|
|
escapeCharForXML x = case x of
|
2017-03-04 13:03:41 +01:00
|
|
|
'&' -> "&"
|
|
|
|
'<' -> "<"
|
|
|
|
'>' -> ">"
|
|
|
|
'"' -> """
|
|
|
|
c -> [c]
|
2008-03-19 18:46:01 +00:00
|
|
|
|
|
|
|
-- | Escape string as needed for XML. Entity references are not preserved.
|
|
|
|
escapeStringForXML :: String -> String
|
2010-12-21 08:22:08 -08:00
|
|
|
escapeStringForXML = concatMap escapeCharForXML
|
2008-03-19 18:46:01 +00:00
|
|
|
|
2013-03-19 20:35:14 -07:00
|
|
|
-- | Escape newline characters as
|
|
|
|
escapeNls :: String -> String
|
|
|
|
escapeNls (x:xs)
|
|
|
|
| x == '\n' = " " ++ escapeNls xs
|
|
|
|
| otherwise = x : escapeNls xs
|
|
|
|
escapeNls [] = []
|
|
|
|
|
2008-03-19 18:46:01 +00:00
|
|
|
-- | Return a text object with a string of formatted XML attributes.
|
|
|
|
attributeList :: [(String, String)] -> Doc
|
2010-12-21 16:46:21 -08:00
|
|
|
attributeList = hcat . map
|
|
|
|
(\(a, b) -> text (' ' : escapeStringForXML a ++ "=\"" ++
|
2013-03-19 20:35:14 -07:00
|
|
|
escapeNls (escapeStringForXML b) ++ "\""))
|
2008-03-19 18:46:01 +00:00
|
|
|
|
|
|
|
-- | Put the supplied contents between start and end tags of tagType,
|
|
|
|
-- with specified attributes and (if specified) indentation.
|
|
|
|
inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
|
|
|
|
inTags isIndented tagType attribs contents =
|
|
|
|
let openTag = char '<' <> text tagType <> attributeList attribs <>
|
|
|
|
char '>'
|
|
|
|
closeTag = text "</" <> text tagType <> char '>'
|
|
|
|
in if isIndented
|
|
|
|
then openTag $$ nest 2 contents $$ closeTag
|
|
|
|
else openTag <> contents <> closeTag
|
|
|
|
|
|
|
|
-- | Return a self-closing tag of tagType with specified attributes
|
|
|
|
selfClosingTag :: String -> [(String, String)] -> Doc
|
|
|
|
selfClosingTag tagType attribs =
|
|
|
|
char '<' <> text tagType <> attributeList attribs <> text " />"
|
|
|
|
|
|
|
|
-- | Put the supplied contents between start and end tags of tagType.
|
|
|
|
inTagsSimple :: String -> Doc -> Doc
|
|
|
|
inTagsSimple tagType = inTags False tagType []
|
|
|
|
|
|
|
|
-- | Put the supplied contents in indented block btw start and end tags.
|
|
|
|
inTagsIndented :: String -> Doc -> Doc
|
|
|
|
inTagsIndented tagType = inTags True tagType []
|
2012-01-28 09:30:31 -08:00
|
|
|
|
|
|
|
-- | Escape all non-ascii characters using numerical entities.
|
2017-06-10 23:39:49 +02:00
|
|
|
toEntities :: Text -> Text
|
|
|
|
toEntities = T.concatMap go
|
|
|
|
where go c | isAscii c = T.singleton c
|
|
|
|
| otherwise = T.pack ("&#" ++ show (ord c) ++ ";")
|
2012-02-05 14:37:33 -08:00
|
|
|
|
|
|
|
-- Unescapes XML entities
|
|
|
|
fromEntities :: String -> String
|
|
|
|
fromEntities ('&':xs) =
|
2016-01-08 17:08:01 -08:00
|
|
|
case lookupEntity ent' of
|
2016-09-02 11:35:28 -04:00
|
|
|
Just c -> c ++ fromEntities rest
|
2012-04-14 22:52:14 -07:00
|
|
|
Nothing -> '&' : fromEntities xs
|
|
|
|
where (ent, rest) = case break (\c -> isSpace c || c == ';') xs of
|
2012-02-05 14:37:33 -08:00
|
|
|
(zs,';':ys) -> (zs,ys)
|
2016-01-08 17:08:01 -08:00
|
|
|
(zs, ys) -> (zs,ys)
|
|
|
|
ent' = case ent of
|
2016-01-08 20:20:37 -08:00
|
|
|
'#':'X':ys -> '#':'x':ys -- workaround tagsoup bug
|
2017-03-04 13:03:41 +01:00
|
|
|
'#':_ -> ent
|
|
|
|
_ -> ent ++ ";"
|
2016-01-08 17:08:01 -08:00
|
|
|
|
2012-02-05 14:37:33 -08:00
|
|
|
fromEntities (x:xs) = x : fromEntities xs
|
|
|
|
fromEntities [] = []
|