Added code for xml library (Text.XML.Light) to source tree,

since there is currently no debian package.  Removed
dependency on xml library.  Added license to debian/copyright.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1351 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2008-08-01 00:45:07 +00:00
parent 8440385f45
commit 0c6dd105b7
8 changed files with 1116 additions and 4 deletions

96
Text/XML/Light.hs Normal file
View file

@ -0,0 +1,96 @@
{-# LANGUAGE FlexibleInstances #-}
--------------------------------------------------------------------
-- |
-- Module : Text.XML.Light
-- Copyright : (c) Galois, Inc. 2007
-- License : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability: portability
--
-- A lightweight XML parsing, filtering and generating library.
--
-- This module reexports functions from:
--
-- * "Text.XML.Light.Types"
--
-- * "Text.XML.Light.Proc"
--
-- * "Text.XML.Light.Input"
--
-- * "Text.XML.Light.Output"
--
module Text.XML.Light (
module Text.XML.Light,
module Text.XML.Light.Types,
module Text.XML.Light.Proc,
module Text.XML.Light.Input,
module Text.XML.Light.Output
) where
import Text.XML.Light.Types
import Text.XML.Light.Proc
import Text.XML.Light.Input
import Text.XML.Light.Output
-- | Add an attribute to an element.
add_attr :: Attr -> Element -> Element
add_attr a e = add_attrs [a] e
-- | Add some attributes to an element.
add_attrs :: [Attr] -> Element -> Element
add_attrs as e = e { elAttribs = as ++ elAttribs e }
-- | Create an unqualified name.
unqual :: String -> QName
unqual x = blank_name { qName = x }
-- | A smart element constructor which uses the type of its argument
-- to determine what sort of element to make.
class Node t where
node :: QName -> t -> Element
instance Node ([Attr],[Content]) where
node n (attrs,cont) = blank_element { elName = n
, elAttribs = attrs
, elContent = cont
}
instance Node [Attr] where node n as = node n (as,[]::[Content])
instance Node Attr where node n a = node n [a]
instance Node () where node n () = node n ([]::[Attr])
instance Node [Content] where node n cs = node n ([]::[Attr],cs)
instance Node Content where node n c = node n [c]
instance Node ([Attr],Content) where node n (as,c) = node n (as,[c])
instance Node (Attr,Content) where node n (a,c) = node n ([a],[c])
instance Node ([Attr],[Element]) where
node n (as,cs) = node n (as,map Elem cs)
instance Node ([Attr],Element) where node n (as,c) = node n (as,[c])
instance Node (Attr,Element) where node n (a,c) = node n ([a],c)
instance Node ([Element]) where node n es = node n ([]::[Attr],es)
instance Node (Element) where node n e = node n [e]
instance Node ([Attr],[CData]) where
node n (as,cs) = node n (as,map Text cs)
instance Node ([Attr],CData) where node n (as,c) = node n (as,[c])
instance Node (Attr,CData) where node n (a,c) = node n ([a],c)
instance Node [CData] where node n es = node n ([]::[Attr],es)
instance Node CData where node n e = node n [e]
instance Node ([Attr],String) where
node n (as,t) = node n (as,blank_cdata { cdData = t })
instance Node (Attr,String) where node n (a,t) = node n ([a],t)
instance Node [Char] where node n t = node n ([]::[Attr],t)
-- | Create node with unqualified name
unode :: Node t => String -> t -> Element
unode = node . unqual

327
Text/XML/Light/Cursor.hs Normal file
View file

@ -0,0 +1,327 @@
--------------------------------------------------------------------
-- |
-- Module : Text.XML.Light.Cursor
-- Copyright : (c) Galois, Inc. 2008
-- License : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability:
--
-- XML cursors for working XML content withing the context of
-- an XML document. This implemntation is based on the general
-- tree zipper written by Krasimir Angelov and Iavor S. Diatchki.
--
module Text.XML.Light.Cursor
( Tag(..), getTag, setTag, fromTag
, Cursor(..), Path
-- * Conversions
, fromContent
, fromElement
, fromForest
, toForest
, toTree
-- * Moving around
, parent
, root
, getChild
, firstChild
, lastChild
, left
, right
-- ** Searching
, findChild
, findLeft
, findRight
-- * Node classification
, isRoot
, isFirst
, isLast
, isLeaf
, isChild
, hasChildren
, getNodeIndex
-- * Updates
, setContent
, modifyContent
, modifyContentM
-- ** Inserting content
, insertLeft
, insertRight
, insertGoLeft
, insertGoRight
-- ** Removing content
, removeLeft
, removeRight
, removeGoLeft
, removeGoRight
, removeGoUp
) where
import Text.XML.Light.Types
import Data.Maybe(isNothing)
data Tag = Tag { tagName :: QName
, tagAttribs :: [Attr]
, tagLine :: Maybe Line
} deriving (Show)
getTag :: Element -> Tag
getTag e = Tag { tagName = elName e
, tagAttribs = elAttribs e
, tagLine = elLine e
}
setTag :: Tag -> Element -> Element
setTag t e = fromTag t (elContent e)
fromTag :: Tag -> [Content] -> Element
fromTag t cs = Element { elName = tagName t
, elAttribs = tagAttribs t
, elLine = tagLine t
, elContent = cs
}
type Path = [([Content],Tag,[Content])]
-- | The position of a piece of content in an XML document.
data Cursor = Cur
{ current :: Content -- ^ The currently selected content.
, lefts :: [Content] -- ^ Siblings on the left, closest first.
, rights :: [Content] -- ^ Siblings on the right, closest first.
, parents :: Path -- ^ The contexts of the parent elements of this location.
} deriving (Show)
-- Moving around ---------------------------------------------------------------
-- | The parent of the given location.
parent :: Cursor -> Maybe Cursor
parent loc =
case parents loc of
(pls,v,prs) : ps -> Just
Cur { current = Elem
(fromTag v
(combChildren (lefts loc) (current loc) (rights loc)))
, lefts = pls, rights = prs, parents = ps
}
[] -> Nothing
-- | The top-most parent of the given location.
root :: Cursor -> Cursor
root loc = maybe loc root (parent loc)
-- | The left sibling of the given location.
left :: Cursor -> Maybe Cursor
left loc =
case lefts loc of
t : ts -> Just loc { current = t, lefts = ts
, rights = current loc : rights loc }
[] -> Nothing
-- | The right sibling of the given location.
right :: Cursor -> Maybe Cursor
right loc =
case rights loc of
t : ts -> Just loc { current = t, lefts = current loc : lefts loc
, rights = ts }
[] -> Nothing
-- | The first child of the given location.
firstChild :: Cursor -> Maybe Cursor
firstChild loc =
do (t : ts, ps) <- downParents loc
return Cur { current = t, lefts = [], rights = ts , parents = ps }
-- | The last child of the given location.
lastChild :: Cursor -> Maybe Cursor
lastChild loc =
do (ts, ps) <- downParents loc
case reverse ts of
l : ls -> return Cur { current = l, lefts = ls, rights = []
, parents = ps }
[] -> Nothing
-- | Find the next left sibling that satisfies a predicate.
findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
findLeft p loc = do loc1 <- left loc
if p loc1 then return loc1 else findLeft p loc1
-- | Find the next right sibling that satisfies a predicate.
findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
findRight p loc = do loc1 <- right loc
if p loc1 then return loc1 else findRight p loc1
-- | The first child that satisfies a predicate.
findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
findChild p loc =
do loc1 <- firstChild loc
if p loc1 then return loc1 else findRight p loc1
-- | The child with the given index (starting from 0).
getChild :: Int -> Cursor -> Maybe Cursor
getChild n loc =
do (ts,ps) <- downParents loc
(ls,t,rs) <- splitChildren ts n
return Cur { current = t, lefts = ls, rights = rs, parents = ps }
-- | private: computes the parent for "down" operations.
downParents :: Cursor -> Maybe ([Content], Path)
downParents loc =
case current loc of
Elem e -> Just ( elContent e
, (lefts loc, getTag e, rights loc) : parents loc
)
_ -> Nothing
-- Conversions -----------------------------------------------------------------
-- | A cursor for the guven content.
fromContent :: Content -> Cursor
fromContent t = Cur { current = t, lefts = [], rights = [], parents = [] }
-- | A cursor for the guven element.
fromElement :: Element -> Cursor
fromElement e = fromContent (Elem e)
-- | The location of the first tree in a forest.
fromForest :: [Content] -> Maybe Cursor
fromForest (t:ts) = Just Cur { current = t, lefts = [], rights = ts
, parents = [] }
fromForest [] = Nothing
-- | Computes the tree containing this location.
toTree :: Cursor -> Content
toTree loc = current (root loc)
-- | Computes the forest containing this location.
toForest :: Cursor -> [Content]
toForest loc = let r = root loc in combChildren (lefts r) (current r) (rights r)
-- Queries ---------------------------------------------------------------------
-- | Are we at the top of the document?
isRoot :: Cursor -> Bool
isRoot loc = null (parents loc)
-- | Are we at the left end of the the document?
isFirst :: Cursor -> Bool
isFirst loc = null (lefts loc)
-- | Are we at the right end of the document?
isLast :: Cursor -> Bool
isLast loc = null (rights loc)
-- | Are we at the bottom of the document?
isLeaf :: Cursor -> Bool
isLeaf loc = isNothing (downParents loc)
-- | Do we have a parent?
isChild :: Cursor -> Bool
isChild loc = not (isRoot loc)
-- | Get the node index inside the sequence of children
getNodeIndex :: Cursor -> Int
getNodeIndex loc = length (lefts loc)
-- | Do we have children?
hasChildren :: Cursor -> Bool
hasChildren loc = not (isLeaf loc)
-- Updates ---------------------------------------------------------------------
-- | Change the current content.
setContent :: Content -> Cursor -> Cursor
setContent t loc = loc { current = t }
-- | Modify the current content.
modifyContent :: (Content -> Content) -> Cursor -> Cursor
modifyContent f loc = setContent (f (current loc)) loc
-- | Modify the current content, allowing for an effect.
modifyContentM :: Monad m => (Content -> m Content) -> Cursor -> m Cursor
modifyContentM f loc = do x <- f (current loc)
return (setContent x loc)
-- | Insert content to the left of the current position.
insertLeft :: Content -> Cursor -> Cursor
insertLeft t loc = loc { lefts = t : lefts loc }
-- | Insert content to the right of the current position.
insertRight :: Content -> Cursor -> Cursor
insertRight t loc = loc { rights = t : rights loc }
-- | Remove the conent on the left of the current position, if any.
removeLeft :: Cursor -> Maybe (Content,Cursor)
removeLeft loc = case lefts loc of
l : ls -> return (l,loc { lefts = ls })
[] -> Nothing
-- | Remove the conent on the right of the current position, if any.
removeRight :: Cursor -> Maybe (Content,Cursor)
removeRight loc = case rights loc of
l : ls -> return (l,loc { rights = ls })
[] -> Nothing
-- | Insert content to the left of the current position.
-- The new content becomes the current position.
insertGoLeft :: Content -> Cursor -> Cursor
insertGoLeft t loc = loc { current = t, rights = current loc : rights loc }
-- | Insert content to the right of the current position.
-- The new content becomes the current position.
insertGoRight :: Content -> Cursor -> Cursor
insertGoRight t loc = loc { current = t, lefts = current loc : lefts loc }
-- | Remove the current element.
-- The new position is the one on the left.
removeGoLeft :: Cursor -> Maybe Cursor
removeGoLeft loc = case lefts loc of
l : ls -> Just loc { current = l, lefts = ls }
[] -> Nothing
-- | Remove the current element.
-- The new position is the one on the right.
removeGoRight :: Cursor -> Maybe Cursor
removeGoRight loc = case rights loc of
l : ls -> Just loc { current = l, rights = ls }
[] -> Nothing
-- | Remove the current element.
-- The new position is the parent of the old position.
removeGoUp :: Cursor -> Maybe Cursor
removeGoUp loc =
case parents loc of
(pls,v,prs) : ps -> Just
Cur { current = Elem (fromTag v (reverse (lefts loc) ++ rights loc))
, lefts = pls, rights = prs, parents = ps
}
[] -> Nothing
-- | private: Gets the given element of a list.
-- Also returns the preceeding elements (reversed) and the folloing elements.
splitChildren :: [a] -> Int -> Maybe ([a],a,[a])
splitChildren _ n | n < 0 = Nothing
splitChildren cs pos = loop [] cs pos
where loop acc (x:xs) 0 = Just (acc,x,xs)
loop acc (x:xs) n = loop (x:acc) xs $! n-1
loop _ _ _ = Nothing
-- | private: combChildren ls x ys = reverse ls ++ [x] ++ ys
combChildren :: [a] -> a -> [a] -> [a]
combChildren ls t rs = foldl (flip (:)) (t:rs) ls

307
Text/XML/Light/Input.hs Normal file
View file

@ -0,0 +1,307 @@
--------------------------------------------------------------------
-- |
-- Module : Text.XML.Light.Input
-- Copyright : (c) Galois, Inc. 2007
-- License : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability: portable
--
-- Lightweight XML parsing
--
module Text.XML.Light.Input (parseXML,parseXMLDoc) where
import Text.XML.Light.Types
import Text.XML.Light.Proc
import Text.XML.Light.Output(tagEnd)
import Data.Char(isSpace)
import Data.List(isPrefixOf)
import Numeric(readHex)
-- | parseXMLDoc, parse a XMLl document to maybe an element
parseXMLDoc :: String -> Maybe Element
parseXMLDoc xs = strip (parseXML xs)
where strip cs = case onlyElems cs of
e : es
| "?xml" `isPrefixOf` qName (elName e)
-> strip (map Elem es)
| otherwise -> Just e
_ -> Nothing
-- | parseXML to a list of content chunks
parseXML :: String -> [Content]
parseXML xs = parse $ tokens $ preprocess xs
------------------------------------------------------------------------
parse :: [Token] -> [Content]
parse [] = []
parse ts = let (es,_,ts1) = nodes ([],Nothing) [] ts
in es ++ parse ts1
-- Information about namespaces.
-- The first component is a map that associates prefixes to URIs,
-- the second is the URI for the default namespace, if one was provided.
type NSInfo = ([(String,String)],Maybe String)
nodes :: NSInfo -> [QName] -> [Token] -> ([Content], [QName], [Token])
nodes ns ps (TokCRef ref : ts) =
let (es,qs,ts1) = nodes ns ps ts
in (CRef ref : es, qs, ts1)
nodes ns ps (TokText txt : ts) =
let (es,qs,ts1) = nodes ns ps ts
(more,es1) = case es of
Text cd : es1'
| cdVerbatim cd == cdVerbatim txt -> (cdData cd,es1')
_ -> ([],es)
in (Text txt { cdData = cdData txt ++ more } : es1, qs, ts1)
nodes cur_info ps (TokStart p t as empty : ts) = (node : siblings, open, toks)
where
new_name = annotName new_info t
new_info = foldr addNS cur_info as
node = Elem Element { elLine = Just p
, elName = new_name
, elAttribs = map (annotAttr new_info) as
, elContent = children
}
(children,(siblings,open,toks))
| empty = ([], nodes cur_info ps ts)
| otherwise = let (es1,qs1,ts1) = nodes new_info (new_name:ps) ts
in (es1,
case qs1 of
[] -> nodes cur_info ps ts1
_ : qs3 -> ([],qs3,ts1))
nodes ns ps (TokEnd p t : ts) = let t1 = annotName ns t
in case break (t1 ==) ps of
(as,_:_) -> ([],as,ts)
-- Unknown closing tag. Insert as text.
(_,[]) ->
let (es,qs,ts1) = nodes ns ps ts
in (Text CData {
cdLine = Just p,
cdVerbatim = CDataText,
cdData = tagEnd t ""
} : es,qs, ts1)
nodes _ ps [] = ([],ps,[])
annotName :: NSInfo -> QName -> QName
annotName (namespaces,def_ns) n =
n { qURI = maybe def_ns (`lookup` namespaces) (qPrefix n) }
annotAttr :: NSInfo -> Attr -> Attr
annotAttr ns a@(Attr { attrKey = k}) =
case (qPrefix k, qName k) of
(Nothing,"xmlns") -> a
_ -> a { attrKey = annotName ns k }
addNS :: Attr -> NSInfo -> NSInfo
addNS (Attr { attrKey = key, attrVal = val }) (ns,def) =
case (qPrefix key, qName key) of
(Nothing,"xmlns") -> (ns, if null val then Nothing else Just val)
(Just "xmlns", k) -> ((k, val) : ns, def)
_ -> (ns,def)
-- Lexer -----------------------------------------------------------------------
type LChar = (Line,Char)
type LString = [LChar]
data Token = TokStart Line QName [Attr] Bool -- is empty?
| TokEnd Line QName
| TokCRef String
| TokText CData
deriving Show
tokens :: String -> [Token]
tokens = tokens' . linenumber 1
tokens' :: LString -> [Token]
tokens' ((_,'<') : c@(_,'!') : cs) = special c cs
tokens' ((_,'<') : cs) = tag (dropSpace cs) -- we are being nice here
tokens' [] = []
tokens' cs@((l,_):_) = let (as,bs) = breakn ('<' ==) cs
in map cvt (decode_text as) ++ tokens' bs
-- XXX: Note, some of the lines might be a bit inacuarate
where cvt (TxtBit x) = TokText CData { cdLine = Just l
, cdVerbatim = CDataText
, cdData = x
}
cvt (CRefBit x) = case cref_to_char x of
Just c -> TokText CData { cdLine = Just l
, cdVerbatim = CDataText
, cdData = [c]
}
Nothing -> TokCRef x
special :: LChar -> LString -> [Token]
special _ ((_,'-') : (_,'-') : cs) = skip cs
where skip ((_,'-') : (_,'-') : (_,'>') : ds) = tokens' ds
skip (_ : ds) = skip ds
skip [] = [] -- unterminated comment
special c ((_,'[') : (_,'C') : (_,'D') : (_,'A') : (_,'T') : (_,'A') : (_,'[')
: cs) =
let (xs,ts) = cdata cs
in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataVerbatim, cdData = xs }
: tokens' ts
where cdata ((_,']') : (_,']') : (_,'>') : ds) = ([],ds)
cdata ((_,d) : ds) = let (xs,ys) = cdata ds in (d:xs,ys)
cdata [] = ([],[])
special c cs =
let (xs,ts) = munch "" 0 cs
in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataRaw, cdData = '<':'!':(reverse xs) } : tokens' ts
where munch acc nesting ((_,'>') : ds)
| nesting == (0::Int) = ('>':acc,ds)
| otherwise = munch ('>':acc) (nesting-1) ds
munch acc nesting ((_,'<') : ds)
= munch ('<':acc) (nesting+1) ds
munch acc n ((_,x) : ds) = munch (x:acc) n ds
munch acc _ [] = (acc,[]) -- unterminated DTD markup
--special c cs = tag (c : cs) -- invalid specials are processed as tags
qualName :: LString -> (QName,LString)
qualName xs = let (as,bs) = breakn endName xs
(q,n) = case break (':'==) as of
(q1,_:n1) -> (Just q1, n1)
_ -> (Nothing, as)
in (QName { qURI = Nothing, qPrefix = q, qName = n }, bs)
where endName x = isSpace x || x == '=' || x == '>' || x == '/'
tag :: LString -> [Token]
tag ((p,'/') : cs) = let (n,ds) = qualName (dropSpace cs)
in TokEnd p n : case ds of
(_,'>') : es -> tokens' es
-- tag was not properly closed...
_ -> tokens' ds
tag [] = []
tag cs = let (n,ds) = qualName cs
(as,b,ts) = attribs (dropSpace ds)
in TokStart (fst (head cs)) n as b : ts
attribs :: LString -> ([Attr], Bool, [Token])
attribs cs = case cs of
(_,'>') : ds -> ([], False, tokens' ds)
(_,'/') : ds -> ([], True, case ds of
(_,'>') : es -> tokens' es
-- insert missing > ...
_ -> tokens' ds)
(_,'?') : (_,'>') : ds -> ([], True, tokens' ds)
-- doc ended within a tag..
[] -> ([],False,[])
_ -> let (a,cs1) = attrib cs
(as,b,ts) = attribs cs1
in (a:as,b,ts)
attrib :: LString -> (Attr,LString)
attrib cs = let (ks,cs1) = qualName cs
(vs,cs2) = attr_val (dropSpace cs1)
in ((Attr ks (decode_attr vs)),dropSpace cs2)
attr_val :: LString -> (String,LString)
attr_val ((_,'=') : cs) = string (dropSpace cs)
attr_val cs = ("",cs)
dropSpace :: LString -> LString
dropSpace = dropWhile (isSpace . snd)
-- | Match the value for an attribute. For malformed XML we do
-- our best to guess the programmer's intention.
string :: LString -> (String,LString)
string ((_,'"') : cs) = break' ('"' ==) cs
-- Allow attributes to be enclosed between ' '.
string ((_,'\'') : cs) = break' ('\'' ==) cs
-- Allow attributes that are not enclosed by anything.
string cs = breakn eos cs
where eos x = isSpace x || x == '>' || x == '/'
break' :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
break' p xs = let (as,bs) = breakn p xs
in (as, case bs of
[] -> []
_ : cs -> cs)
breakn :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
breakn p l = (map snd as,bs) where (as,bs) = break (p . snd) l
decode_attr :: String -> String
decode_attr cs = concatMap cvt (decode_text cs)
where cvt (TxtBit x) = x
cvt (CRefBit x) = case cref_to_char x of
Just c -> [c]
Nothing -> '&' : x ++ ";"
data Txt = TxtBit String | CRefBit String deriving Show
decode_text :: [Char] -> [Txt]
decode_text xs@('&' : cs) = case break (';' ==) cs of
(as,_:bs) -> CRefBit as : decode_text bs
_ -> [TxtBit xs]
decode_text [] = []
decode_text cs = let (as,bs) = break ('&' ==) cs
in TxtBit as : decode_text bs
cref_to_char :: [Char] -> Maybe Char
cref_to_char cs = case cs of
'#' : ds -> num_esc ds
"lt" -> Just '<'
"gt" -> Just '>'
"amp" -> Just '&'
"apos" -> Just '\''
"quot" -> Just '"'
_ -> Nothing
num_esc :: String -> Maybe Char
num_esc cs = case cs of
'x' : ds -> check (readHex ds)
_ -> check (reads cs)
where check [(n,"")] = cvt_char n
check _ = Nothing
cvt_char :: Int -> Maybe Char
cvt_char x
| fromEnum (minBound :: Char) <= x && x <= fromEnum (maxBound::Char)
= Just (toEnum x)
| otherwise = Nothing
preprocess :: String -> String
preprocess ('\r' : '\n' : cs) = '\n' : preprocess cs
preprocess ('\r' : cs) = '\n' : preprocess cs
preprocess (c : cs) = c : preprocess cs
preprocess [] = []
linenumber :: Line -> String -> LString
linenumber _ [] = []
linenumber n ('\n':s) = n' `seq` ((n,'\n'):linenumber n' s) where n' = n + 1
linenumber n (c:s) = (n,c) : linenumber n s

150
Text/XML/Light/Output.hs Normal file
View file

@ -0,0 +1,150 @@
--------------------------------------------------------------------
-- |
-- Module : Text.XML.Light.Output
-- Copyright : (c) Galois, Inc. 2007
-- License : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability:
--
-- Output handling for the lightweight XML lib.
--
module Text.XML.Light.Output
( showTopElement, showContent, showElement, showCData, showQName, showAttr
, ppTopElement, ppContent, ppElement
, tagEnd, xml_header
) where
import Text.XML.Light.Types
import Data.Char
import Data.List ( isPrefixOf )
-- | The XML 1.0 header
xml_header :: String
xml_header = "<?xml version='1.0' ?>"
-- | Pretty printing renders XML documents faithfully,
-- with the exception that whitespace may be added\/removed
-- in non-verbatim character data.
ppTopElement :: Element -> String
ppTopElement e = unlines [xml_header,ppElement e]
-- | Pretty printing elements
ppElement :: Element -> String
ppElement e = ppElementS "" e ""
-- | Pretty printing content
ppContent :: Content -> String
ppContent x = ppContentS "" x ""
-- | Pretty printing content using ShowS
ppContentS :: String -> Content -> ShowS
ppContentS i x xs = case x of
Elem e -> ppElementS i e xs
Text c -> ppCData i c xs
CRef r -> showCRefS r xs
ppElementS :: String -> Element -> ShowS
ppElementS i e xs = i ++ (tagStart (elName e) (elAttribs e) $
case elContent e of
[]
| not ("?xml" `isPrefixOf` (qName $ elName e)) -> " />" ++ xs
| otherwise -> " ?>" ++ xs
[Text t] -> ">" ++ ppCData "" t (tagEnd (elName e) xs)
cs -> ">\n" ++ foldr ppSub (i ++ tagEnd (elName e) xs) cs
where ppSub e1 = ppContentS (" " ++ i) e1 . showChar '\n'
)
ppCData :: String -> CData -> ShowS
ppCData i c xs = i ++ if (cdVerbatim c /= CDataText )
then showCDataS c xs
else foldr cons xs (showCData c)
where cons :: Char -> String -> String
cons '\n' ys = "\n" ++ i ++ ys
cons y ys = y : ys
--------------------------------------------------------------------------------
-- | Adds the <?xml?> header.
showTopElement :: Element -> String
showTopElement c = xml_header ++ showElement c
showContent :: Content -> String
showContent c = showContentS c ""
showElement :: Element -> String
showElement c = showElementS c ""
showCData :: CData -> String
showCData c = showCDataS c ""
-- Note: crefs should not contain '&', ';', etc.
showCRefS :: String -> ShowS
showCRefS r xs = '&' : r ++ ';' : xs
-- | Good for transmition (no extra white space etc.) but less readable.
showContentS :: Content -> ShowS
showContentS (Elem e) = showElementS e
showContentS (Text cs) = showCDataS cs
showContentS (CRef cs) = showCRefS cs
-- | Good for transmition (no extra white space etc.) but less readable.
showElementS :: Element -> ShowS
showElementS e xs =
tagStart (elName e) (elAttribs e)
$ case elContent e of
[] -> " />" ++ xs
ch -> '>' : foldr showContentS (tagEnd (elName e) xs) ch
-- | Convert a text element to characters.
showCDataS :: CData -> ShowS
showCDataS cd =
case cdVerbatim cd of
CDataText -> escStr (cdData cd)
CDataVerbatim -> showString "<![CDATA[" . escCData (cdData cd) . showString "]]>"
CDataRaw -> \ xs -> cdData cd ++ xs
--------------------------------------------------------------------------------
escCData :: String -> ShowS
escCData (']' : ']' : '>' : cs) = showString "]]]]><![CDATA[>" . escCData cs
escCData (c : cs) = showChar c . escCData cs
escCData [] = id
escChar :: Char -> ShowS
escChar c = case c of
'<' -> showString "&lt;"
'>' -> showString "&gt;"
'&' -> showString "&amp;"
'"' -> showString "&quot;"
'\'' -> showString "&apos;"
-- XXX: Is this really wortherd?
-- We could deal with these issues when we convert characters to bytes.
_ | (oc <= 0x7f && isPrint c) || c == '\n' || c == '\r' -> showChar c
| otherwise -> showString "&#" . shows oc . showChar ';'
where oc = ord c
escStr :: String -> ShowS
escStr cs rs = foldr escChar rs cs
tagEnd :: QName -> ShowS
tagEnd qn rs = '<':'/':showQName qn ++ '>':rs
tagStart :: QName -> [Attr] -> ShowS
tagStart qn as rs = '<':showQName qn ++ as_str ++ rs
where as_str = if null as then "" else ' ' : unwords (map showAttr as)
showAttr :: Attr -> String
showAttr (Attr qn v) = showQName qn ++ '=' : '"' : escStr v "\""
showQName :: QName -> String
showQName q = pre ++ qName q
where pre = case qPrefix q of
Nothing -> ""
Just p -> p ++ ":"

103
Text/XML/Light/Proc.hs Normal file
View file

@ -0,0 +1,103 @@
--------------------------------------------------------------------
-- |
-- Module : Text.XML.Light.Proc
-- Copyright : (c) Galois, Inc. 2007
-- License : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability:
--
--------------------------------------------------------------------
module Text.XML.Light.Proc where
import Text.XML.Light.Types
import Data.Maybe(listToMaybe)
import Data.List(find)
-- | Get the text value of an XML element. This function
-- ignores non-text elements, and concatenates all text elements.
strContent :: Element -> String
strContent e = concatMap cdData $ onlyText $ elContent e
-- | Select only the elements from a list of XML content.
onlyElems :: [Content] -> [Element]
onlyElems xs = [ x | Elem x <- xs ]
-- | Select only the elements from a parent.
elChildren :: Element -> [Element]
elChildren e = [ x | Elem x <- elContent e ]
-- | Select only the text from a list of XML content.
onlyText :: [Content] -> [CData]
onlyText xs = [ x | Text x <- xs ]
-- | Find all immediate children with the given name.
findChildren :: QName -> Element -> [Element]
findChildren q e = filterChildren ((q ==) . elName) e
-- | Filter all immediate children wrt a given predicate.
filterChildren :: (Element -> Bool) -> Element -> [Element]
filterChildren p e = filter p (onlyElems (elContent e))
-- | Filter all immediate children wrt a given predicate over their names.
filterChildrenName :: (QName -> Bool) -> Element -> [Element]
filterChildrenName p e = filter (p.elName) (onlyElems (elContent e))
-- | Find an immediate child with the given name.
findChild :: QName -> Element -> Maybe Element
findChild q e = listToMaybe (findChildren q e)
-- | Find an immediate child with the given name.
filterChild :: (Element -> Bool) -> Element -> Maybe Element
filterChild p e = listToMaybe (filterChildren p e)
-- | Find an immediate child with name matching a predicate.
filterChildName :: (QName -> Bool) -> Element -> Maybe Element
filterChildName p e = listToMaybe (filterChildrenName p e)
-- | Find the left-most occurrence of an element matching given name.
findElement :: QName -> Element -> Maybe Element
findElement q e = listToMaybe (findElements q e)
-- | Filter the left-most occurrence of an element wrt. given predicate.
filterElement :: (Element -> Bool) -> Element -> Maybe Element
filterElement p e = listToMaybe (filterElements p e)
-- | Filter the left-most occurrence of an element wrt. given predicate.
filterElementName :: (QName -> Bool) -> Element -> Maybe Element
filterElementName p e = listToMaybe (filterElementsName p e)
-- | Find all non-nested occurances of an element.
-- (i.e., once we have found an element, we do not search
-- for more occurances among the element's children).
findElements :: QName -> Element -> [Element]
findElements qn e = filterElementsName (qn==) e
-- | Find all non-nested occurrences of an element wrt. given predicate.
-- (i.e., once we have found an element, we do not search
-- for more occurances among the element's children).
filterElements :: (Element -> Bool) -> Element -> [Element]
filterElements p e
| p e = [e]
| otherwise = concatMap (filterElements p) $ onlyElems $ elContent e
-- | Find all non-nested occurences of an element wrt a predicate over element names.
-- (i.e., once we have found an element, we do not search
-- for more occurances among the element's children).
filterElementsName :: (QName -> Bool) -> Element -> [Element]
filterElementsName p e = filterElements (p.elName) e
-- | Lookup the value of an attribute.
findAttr :: QName -> Element -> Maybe String
findAttr x e = attrVal `fmap` find ((x ==) . attrKey) (elAttribs e)
-- | Lookup attribute name from list.
lookupAttr :: QName -> [Attr] -> Maybe String
lookupAttr x as = attrVal `fmap` find ((x ==) . attrKey) as

91
Text/XML/Light/Types.hs Normal file
View file

@ -0,0 +1,91 @@
--------------------------------------------------------------------
-- |
-- Module : Text.XML.Light.Types
-- Copyright : (c) Galois, Inc. 2007
-- License : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability:
--
-- Basic XML types.
--
module Text.XML.Light.Types where
-- | A line is an Integer
type Line = Integer
-- | XML content
data Content = Elem Element
| Text CData
| CRef String
deriving Show
-- | XML elements
data Element = Element {
elName :: QName,
elAttribs :: [Attr],
elContent :: [Content],
elLine :: Maybe Line
} deriving Show
-- | XML attributes
data Attr = Attr {
attrKey :: QName,
attrVal :: String
} deriving (Eq,Ord,Show)
-- | XML CData
data CData = CData {
cdVerbatim :: CDataKind,
cdData :: String,
cdLine :: Maybe Line
} deriving Show
data CDataKind
= CDataText -- ^ Ordinary character data; pretty printer escapes &, < etc.
| CDataVerbatim -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[..
| CDataRaw -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up.
deriving ( Eq, Show )
-- | XML qualified names
data QName = QName {
qName :: String,
qURI :: Maybe String,
qPrefix :: Maybe String
} deriving Show
instance Eq QName where
q1 == q2 = compare q1 q2 == EQ
instance Ord QName where
compare q1 q2 =
case compare (qName q1) (qName q2) of
EQ -> case (qURI q1, qURI q2) of
(Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2)
(u1,u2) -> compare u1 u2
x -> x
-- blank elements --------------------------------------------------------------
-- | Blank names
blank_name :: QName
blank_name = QName { qName = "", qURI = Nothing, qPrefix = Nothing }
-- | Blank cdata
blank_cdata :: CData
blank_cdata = CData { cdVerbatim = CDataText, cdData = "", cdLine = Nothing }
-- | Blank elements
blank_element :: Element
blank_element = Element
{ elName = blank_name
, elAttribs = []
, elContent = []
, elLine = Nothing
}

33
debian/copyright vendored
View file

@ -50,3 +50,36 @@ by Eric A. Meyer
<http://meyerweb.com/eric/tools/s5
Released under an explicit Public Domain License
----------------------------------------------------------------------
Text/XML/Light/*
(c) 2007 Galois Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the author nor the names of his contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

View file

@ -79,8 +79,7 @@ Library
if flag(highlighting)
Build-depends: highlighting-kate
cpp-options: -DHIGHLIGHTING
Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory, xml,
utf8-string
Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory, utf8-string
Hs-Source-Dirs: .
Exposed-Modules: Text.Pandoc,
Text.Pandoc.Blocks,
@ -108,10 +107,16 @@ Library
Text.Pandoc.Writers.MediaWiki,
Text.Pandoc.Writers.RTF,
Text.Pandoc.Writers.S5
Other-Modules: Text.Pandoc.XML
Other-Modules: Text.Pandoc.XML,
Text.XML.Light,
Text.XML.Light.Types,
Text.XML.Light.Output,
Text.XML.Light.Input,
Text.XML.Light.Cursor
Ghc-Options: -O2 -Wall -threaded
Ghc-Prof-Options: -auto-all
Extensions: CPP
Extensions: CPP, FlexibleInstances
-- FlexibleInstances needed for Text.XML.Light
if flag(library)
Buildable: True