2012-04-14 16:44:21 -07:00
|
|
|
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
|
2012-04-14 17:41:04 -07:00
|
|
|
import Text.Pandoc.Parsing (ParserState(..))
|
2012-04-14 16:44:21 -07:00
|
|
|
import Text.Pandoc.Definition
|
|
|
|
import Text.Pandoc.Builder
|
|
|
|
import Text.XML.Light
|
2012-04-14 21:43:07 -07:00
|
|
|
import Data.Maybe (fromMaybe)
|
2012-04-14 16:44:21 -07:00
|
|
|
import Data.Monoid
|
|
|
|
import Data.Char (isSpace)
|
2012-04-14 17:33:56 -07:00
|
|
|
import Control.Monad.State
|
|
|
|
import Control.Applicative ((<$>))
|
|
|
|
|
|
|
|
type DB = State DBState
|
|
|
|
|
2012-04-14 17:41:04 -07:00
|
|
|
data DBState = DBState{ dbSectionLevel :: Int
|
2012-04-14 18:27:46 -07:00
|
|
|
, dbDocTitle :: Inlines
|
|
|
|
, dbDocAuthors :: [Inlines]
|
|
|
|
, dbDocDate :: Inlines
|
2012-04-14 17:41:04 -07:00
|
|
|
} deriving Show
|
2012-04-14 16:44:21 -07:00
|
|
|
|
|
|
|
readDocBook :: ParserState -> String -> Pandoc
|
2012-04-14 18:27:46 -07:00
|
|
|
readDocBook st inp = setTitle (dbDocTitle st')
|
|
|
|
$ setAuthors (dbDocAuthors st')
|
|
|
|
$ setDate (dbDocDate st')
|
|
|
|
$ doc $ mconcat bs
|
|
|
|
where (bs, st') = runState (mapM parseBlock $ parseXML inp)
|
|
|
|
DBState{ dbSectionLevel = 0
|
|
|
|
, dbDocTitle = mempty
|
|
|
|
, dbDocAuthors = []
|
|
|
|
, dbDocDate = mempty
|
|
|
|
}
|
2012-04-14 16:44:21 -07:00
|
|
|
|
2012-04-14 17:33:56 -07:00
|
|
|
parseBlock :: Content -> DB Blocks
|
2012-04-14 18:27:46 -07:00
|
|
|
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
|
2012-04-14 17:33:56 -07:00
|
|
|
parseBlock (Text (CData _ s _)) = if all isSpace s
|
2012-04-14 18:27:46 -07:00
|
|
|
then return mempty
|
|
|
|
else return $ plain $ text s
|
2012-04-14 17:33:56 -07:00
|
|
|
parseBlock (Elem e) =
|
2012-04-14 16:44:21 -07:00
|
|
|
case qName (elName e) of
|
2012-04-14 17:33:56 -07:00
|
|
|
"para" -> para <$> getInlines e
|
2012-04-14 18:31:28 -07:00
|
|
|
"blockquote" -> blockQuote <$> getBlocks e
|
2012-04-14 17:33:56 -07:00
|
|
|
"sect1" -> sect 1
|
|
|
|
"sect2" -> sect 2
|
|
|
|
"sect3" -> sect 3
|
|
|
|
"sect4" -> sect 4
|
|
|
|
"sect5" -> sect 5
|
|
|
|
"sect6" -> sect 6
|
2012-04-14 17:41:04 -07:00
|
|
|
"section" -> gets dbSectionLevel >>= sect . (+1)
|
2012-04-14 18:31:28 -07:00
|
|
|
"itemizedlist" -> bulletList <$> listitems
|
2012-04-14 21:43:07 -07:00
|
|
|
"orderedlist" -> orderedList <$> listitems -- TODO list attributes
|
2012-04-14 18:27:46 -07:00
|
|
|
"articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty
|
2012-04-14 21:43:07 -07:00
|
|
|
"programlisting" -> return $ codeBlock $ strContent e -- TODO attrs
|
2012-04-14 18:27:46 -07:00
|
|
|
"?xml" -> return mempty
|
2012-04-14 18:31:28 -07:00
|
|
|
_ -> getBlocks e
|
|
|
|
where getBlocks e' = mconcat <$> (mapM parseBlock $ elContent e')
|
2012-04-14 17:33:56 -07:00
|
|
|
getInlines e' = (trimInlines . mconcat) <$>
|
|
|
|
(mapM parseInline $ elContent e')
|
|
|
|
isTitle e' = qName (elName e') == "title"
|
|
|
|
skipWhite (Text (CData _ s _):xs) | all isSpace s = skipWhite xs
|
|
|
|
| otherwise = xs
|
|
|
|
skipWhite xs = xs
|
2012-04-14 18:31:28 -07:00
|
|
|
listitems = mapM getBlocks $ findChildren (unqual "listitem") e
|
2012-04-14 18:27:46 -07:00
|
|
|
getTitle = case findChild (unqual "title") e of
|
|
|
|
Just t -> do
|
|
|
|
tit <- getInlines t
|
|
|
|
modify $ \st -> st{dbDocTitle = tit}
|
|
|
|
Nothing -> return ()
|
|
|
|
getAuthors = do
|
|
|
|
auths <- mapM getInlines
|
|
|
|
$ findChildren (unqual "author") e
|
|
|
|
modify $ \st -> st{dbDocAuthors = auths}
|
|
|
|
getDate = case findChild (unqual "date") e of
|
|
|
|
Just t -> do
|
|
|
|
dat <- getInlines t
|
|
|
|
modify $ \st -> st{dbDocDate = dat}
|
|
|
|
Nothing -> return ()
|
2012-04-14 17:33:56 -07:00
|
|
|
sect n = case skipWhite (elContent e) of
|
|
|
|
((Elem t):body)
|
|
|
|
| isTitle t -> do
|
|
|
|
h <- header n <$> (getInlines t)
|
2012-04-14 17:41:04 -07:00
|
|
|
modify $ \st -> st{ dbSectionLevel = n }
|
|
|
|
b <- mconcat <$> (mapM parseBlock body)
|
|
|
|
modify $ \st -> st{ dbSectionLevel = n - 1 }
|
|
|
|
return $ h <> b
|
|
|
|
body -> do
|
|
|
|
let h = header n mempty
|
|
|
|
modify $ \st -> st{ dbSectionLevel = n }
|
2012-04-14 17:33:56 -07:00
|
|
|
b <- mconcat <$> (mapM parseBlock body)
|
2012-04-14 17:41:04 -07:00
|
|
|
modify $ \st -> st{ dbSectionLevel = n - 1 }
|
2012-04-14 17:33:56 -07:00
|
|
|
return $ h <> b
|
|
|
|
parseBlock (CRef _) = return mempty
|
2012-04-14 16:44:21 -07:00
|
|
|
|
2012-04-14 17:33:56 -07:00
|
|
|
parseInline :: Content -> DB Inlines
|
|
|
|
parseInline (Text (CData _ s _)) = return $ text s
|
|
|
|
parseInline (Elem e) =
|
2012-04-14 16:44:21 -07:00
|
|
|
case qName (elName e) of
|
2012-04-14 17:59:40 -07:00
|
|
|
"subscript" -> subscript <$> innerInlines
|
|
|
|
"superscript" -> superscript <$> innerInlines
|
2012-04-14 21:43:07 -07:00
|
|
|
"ulink" -> link
|
|
|
|
(fromMaybe "" (lookupAttrBy (\attr -> qName attr == "url")
|
|
|
|
(elAttribs e))) "" <$> innerInlines
|
2012-04-14 16:44:21 -07:00
|
|
|
"emphasis" -> case lookupAttrBy (\attr -> qName attr == "role")
|
|
|
|
(elAttribs e) of
|
2012-04-14 17:33:56 -07:00
|
|
|
Just "strong" -> strong <$> innerInlines
|
|
|
|
_ -> emph <$> innerInlines
|
2012-04-14 17:46:17 -07:00
|
|
|
"footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e)
|
2012-04-14 16:44:21 -07:00
|
|
|
_ -> innerInlines
|
2012-04-14 17:33:56 -07:00
|
|
|
where innerInlines = (trimInlines . mconcat) <$>
|
|
|
|
(mapM parseInline $ elContent e)
|
|
|
|
parseInline (CRef _) = return mempty
|
2012-04-14 16:44:21 -07:00
|
|
|
|