2008-03-19 18:46:01 +00:00
|
|
|
{-
|
2010-03-23 13:31:09 -07:00
|
|
|
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
|
2008-03-19 18:46:01 +00:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.XML
|
2010-03-23 13:31:09 -07:00
|
|
|
Copyright : Copyright (C) 2006-2010 John MacFarlane
|
2008-03-19 18:46:01 +00:00
|
|
|
License : GNU GPL, version 2 or above
|
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
|
|
Stability : alpha
|
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
Functions for escaping and formatting XML.
|
|
|
|
-}
|
2009-12-05 07:28:45 +00:00
|
|
|
module Text.Pandoc.XML ( stripTags,
|
|
|
|
escapeCharForXML,
|
2008-03-19 18:46:01 +00:00
|
|
|
escapeStringForXML,
|
|
|
|
inTags,
|
|
|
|
selfClosingTag,
|
|
|
|
inTagsSimple,
|
2012-01-28 09:30:31 -08:00
|
|
|
inTagsIndented,
|
2012-02-05 14:37:33 -08:00
|
|
|
toEntities,
|
|
|
|
fromEntities ) where
|
2010-12-21 16:46:21 -08:00
|
|
|
|
|
|
|
import Text.Pandoc.Pretty
|
2012-04-14 22:52:14 -07:00
|
|
|
import Data.Char (ord, isAscii, isSpace)
|
2012-02-05 14:37:33 -08:00
|
|
|
import Text.HTML.TagSoup.Entity (lookupEntity)
|
2008-03-19 18:46:01 +00:00
|
|
|
|
2009-12-05 07:28:45 +00:00
|
|
|
-- | Remove everything between <...>
|
|
|
|
stripTags :: String -> String
|
|
|
|
stripTags ('<':xs) =
|
|
|
|
let (_,rest) = break (=='>') xs
|
|
|
|
in if null rest
|
|
|
|
then ""
|
|
|
|
else stripTags (tail rest) -- leave off >
|
|
|
|
stripTags (x:xs) = x : stripTags xs
|
|
|
|
stripTags [] = []
|
|
|
|
|
2008-03-19 18:46:01 +00:00
|
|
|
-- | Escape one character as needed for XML.
|
|
|
|
escapeCharForXML :: Char -> String
|
|
|
|
escapeCharForXML x = case x of
|
2008-07-13 16:31:34 +00:00
|
|
|
'&' -> "&"
|
|
|
|
'<' -> "<"
|
|
|
|
'>' -> ">"
|
|
|
|
'"' -> """
|
|
|
|
c -> [c]
|
2008-03-19 18:46:01 +00:00
|
|
|
|
|
|
|
-- | Escape string as needed for XML. Entity references are not preserved.
|
|
|
|
escapeStringForXML :: String -> String
|
2010-12-21 08:22:08 -08:00
|
|
|
escapeStringForXML = concatMap escapeCharForXML
|
2008-03-19 18:46:01 +00:00
|
|
|
|
|
|
|
-- | Return a text object with a string of formatted XML attributes.
|
|
|
|
attributeList :: [(String, String)] -> Doc
|
2010-12-21 16:46:21 -08:00
|
|
|
attributeList = hcat . map
|
|
|
|
(\(a, b) -> text (' ' : escapeStringForXML a ++ "=\"" ++
|
|
|
|
escapeStringForXML b ++ "\""))
|
2008-03-19 18:46:01 +00:00
|
|
|
|
|
|
|
-- | Put the supplied contents between start and end tags of tagType,
|
|
|
|
-- with specified attributes and (if specified) indentation.
|
|
|
|
inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
|
|
|
|
inTags isIndented tagType attribs contents =
|
|
|
|
let openTag = char '<' <> text tagType <> attributeList attribs <>
|
|
|
|
char '>'
|
|
|
|
closeTag = text "</" <> text tagType <> char '>'
|
|
|
|
in if isIndented
|
|
|
|
then openTag $$ nest 2 contents $$ closeTag
|
|
|
|
else openTag <> contents <> closeTag
|
|
|
|
|
|
|
|
-- | Return a self-closing tag of tagType with specified attributes
|
|
|
|
selfClosingTag :: String -> [(String, String)] -> Doc
|
|
|
|
selfClosingTag tagType attribs =
|
|
|
|
char '<' <> text tagType <> attributeList attribs <> text " />"
|
|
|
|
|
|
|
|
-- | Put the supplied contents between start and end tags of tagType.
|
|
|
|
inTagsSimple :: String -> Doc -> Doc
|
|
|
|
inTagsSimple tagType = inTags False tagType []
|
|
|
|
|
|
|
|
-- | Put the supplied contents in indented block btw start and end tags.
|
|
|
|
inTagsIndented :: String -> Doc -> Doc
|
|
|
|
inTagsIndented tagType = inTags True tagType []
|
2012-01-28 09:30:31 -08:00
|
|
|
|
|
|
|
-- | Escape all non-ascii characters using numerical entities.
|
|
|
|
toEntities :: String -> String
|
|
|
|
toEntities [] = ""
|
|
|
|
toEntities (c:cs)
|
|
|
|
| isAscii c = c : toEntities cs
|
|
|
|
| otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs
|
2012-02-05 14:37:33 -08:00
|
|
|
|
|
|
|
-- Unescapes XML entities
|
|
|
|
fromEntities :: String -> String
|
|
|
|
fromEntities ('&':xs) =
|
|
|
|
case lookupEntity ent of
|
|
|
|
Just c -> c : fromEntities rest
|
2012-04-14 22:52:14 -07:00
|
|
|
Nothing -> '&' : fromEntities xs
|
|
|
|
where (ent, rest) = case break (\c -> isSpace c || c == ';') xs of
|
2012-02-05 14:37:33 -08:00
|
|
|
(zs,';':ys) -> (zs,ys)
|
2012-02-05 23:02:01 -08:00
|
|
|
_ -> ("",xs)
|
2012-02-05 14:37:33 -08:00
|
|
|
fromEntities (x:xs) = x : fromEntities xs
|
|
|
|
fromEntities [] = []
|