pandoc/src/Text/Pandoc/Readers/OPML.hs

104 lines
4.1 KiB
Haskell
Raw Normal View History

2015-02-18 13:05:05 +00:00
{-# LANGUAGE FlexibleContexts #-}
module Text.Pandoc.Readers.OPML ( readOPML ) where
import Data.Char (toUpper)
import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import Text.XML.Light
import Text.HTML.TagSoup.Entity (lookupEntity)
import Data.Generics
import Control.Monad.State
2015-02-18 13:05:05 +00:00
import Data.Default
2016-12-01 12:13:51 -05:00
import Text.Pandoc.Class (PandocMonad)
2016-11-28 17:13:46 -05:00
type OPML m = StateT OPMLState m
data OPMLState = OPMLState{
opmlSectionLevel :: Int
, opmlDocTitle :: Inlines
, opmlDocAuthors :: [Inlines]
, opmlDocDate :: Inlines
} deriving Show
2015-02-18 13:05:05 +00:00
instance Default OPMLState where
def = OPMLState{ opmlSectionLevel = 0
, opmlDocTitle = mempty
, opmlDocAuthors = []
, opmlDocDate = mempty
}
2016-11-28 17:13:46 -05:00
readOPML :: PandocMonad m => ReaderOptions -> String -> m Pandoc
readOPML _ inp = do
(bs, st') <- flip runStateT def (mapM parseBlock $ normalizeTree $ parseXML inp)
return $
setTitle (opmlDocTitle st') $
setAuthors (opmlDocAuthors st') $
setDate (opmlDocDate st') $
doc $ mconcat bs
-- normalize input, consolidating adjacent Text and CRef elements
normalizeTree :: [Content] -> [Content]
normalizeTree = everywhere (mkT go)
where go :: [Content] -> [Content]
go (Text (CData CDataRaw _ _):xs) = xs
go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
Text (CData CDataText (s1 ++ s2) z):xs
go (Text (CData CDataText s1 z):CRef r:xs) =
Text (CData CDataText (s1 ++ convertEntity r) z):xs
go (CRef r:Text (CData CDataText s1 z):xs) =
Text (CData CDataText (convertEntity r ++ s1) z):xs
go (CRef r1:CRef r2:xs) =
Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
go xs = xs
convertEntity :: String -> String
convertEntity e = maybe (map toUpper e) id (lookupEntity e)
-- convenience function to get an attribute value, defaulting to ""
attrValue :: String -> Element -> String
attrValue attr elt =
case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of
Just z -> z
Nothing -> ""
2016-12-01 12:13:51 -05:00
-- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
2016-11-28 17:13:46 -05:00
-- exceptT = either throwError return
2015-02-18 13:05:05 +00:00
2016-11-28 17:13:46 -05:00
asHtml :: PandocMonad m => String -> OPML m Inlines
asHtml s =
(\(Pandoc _ bs) -> case bs of
2015-02-18 13:05:05 +00:00
[Plain ils] -> fromList ils
2016-11-28 17:13:46 -05:00
_ -> mempty) <$> (lift $ readHtml def s)
2016-11-28 17:13:46 -05:00
asMarkdown :: PandocMonad m => String -> OPML m Blocks
asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> (lift $ readMarkdown def s)
2016-11-28 17:13:46 -05:00
getBlocks :: PandocMonad m => Element -> OPML m Blocks
getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
2016-11-28 17:13:46 -05:00
parseBlock :: PandocMonad m => Content -> OPML m Blocks
parseBlock (Elem e) =
case qName (elName e) of
"ownerName" -> mempty <$ modify (\st ->
st{opmlDocAuthors = [text $ strContent e]})
"dateModified" -> mempty <$ modify (\st ->
st{opmlDocDate = text $ strContent e})
"title" -> mempty <$ modify (\st ->
st{opmlDocTitle = text $ strContent e})
"outline" -> gets opmlSectionLevel >>= sect . (+1)
"?xml" -> return mempty
_ -> getBlocks e
2015-02-18 13:05:05 +00:00
where sect n = do headerText <- asHtml $ attrValue "text" e
noteBlocks <- asMarkdown $ attrValue "_note" e
modify $ \st -> st{ opmlSectionLevel = n }
bs <- getBlocks e
modify $ \st -> st{ opmlSectionLevel = n - 1 }
let headerText' = case map toUpper (attrValue "type" e) of
"LINK" -> link
(attrValue "url" e) "" headerText
_ -> headerText
return $ header n headerText' <> noteBlocks <> bs
parseBlock _ = return mempty