diff --git a/README b/README index 39c69d08f..a6602718a 100644 --- a/README +++ b/README @@ -13,8 +13,8 @@ Description Pandoc is a [Haskell] library for converting from one markup format to another, and a command-line tool that uses this library. It can read [markdown] and (subsets of) [Textile], [reStructuredText], [HTML], -[LaTeX], [MediaWiki markup], and [DocBook XML]; and it can write plain -text, [markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX] +[LaTeX], [MediaWiki markup], [OPML], and [DocBook XML]; and it can write +plain text, [markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX] (including [beamer] slide shows), [ConTeXt], [RTF], [DocBook XML], [OpenDocument XML], [ODT], [Word docx], [GNU Texinfo], [MediaWiki markup], [EPUB] (v2 or v3), [FictionBook2], [Textile], [groff man] pages, [Emacs @@ -143,7 +143,7 @@ General options `markdown_phpextra` (PHP Markdown Extra extended markdown), `markdown_github` (github extended markdown), `textile` (Textile), `rst` (reStructuredText), `html` (HTML), - `docbook` (DocBook XML), `mediawiki` (MediaWiki markup), + `docbook` (DocBook XML), `opml` (OPML), `mediawiki` (MediaWiki markup), or `latex` (LaTeX). If `+lhs` is appended to `markdown`, `rst`, `latex`, the input will be treated as literate Haskell source: see [Literate Haskell support](#literate-haskell-support), below. @@ -2624,6 +2624,7 @@ Sergey Astanin, Arlo O'Keeffe, Denis Laxalde, Brent Yorgey. [ConTeXt]: http://www.pragma-ade.nl/ [RTF]: http://en.wikipedia.org/wiki/Rich_Text_Format [DocBook XML]: http://www.docbook.org/ +[OPML]: http://dev.opml.org/spec2.html [OpenDocument XML]: http://opendocument.xml.org/ [ODT]: http://en.wikipedia.org/wiki/OpenDocument [Textile]: http://redcloth.org/textile diff --git a/pandoc.cabal b/pandoc.cabal index 7f82d11a1..c53e29838 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -16,7 +16,7 @@ Synopsis: Conversion between markup formats Description: Pandoc is a Haskell library for converting from one markup format to another, and a command-line tool that uses this library. It can read markdown and (subsets of) HTML, - reStructuredText, LaTeX, DocBook, MediaWiki markup, + reStructuredText, LaTeX, DocBook, MediaWiki markup, OPML, and Textile, and it can write markdown, reStructuredText, HTML, LaTeX, ConTeXt, Docbook, OpenDocument, ODT, Word docx, RTF, MediaWiki, Textile, groff man pages, @@ -105,6 +105,8 @@ Extra-Source-Files: tests/docbook-reader.native tests/html-reader.html, tests/html-reader.native, + tests/opml-reader.html, + tests/opml-reader.native, tests/insert, tests/lalune.jpg, tests/movie.jpg, @@ -278,6 +280,7 @@ Library Text.Pandoc.Readers.MediaWiki, Text.Pandoc.Readers.RST, Text.Pandoc.Readers.DocBook, + Text.Pandoc.Readers.OPML, Text.Pandoc.Readers.TeXMath, Text.Pandoc.Readers.Textile, Text.Pandoc.Readers.Native, diff --git a/pandoc.hs b/pandoc.hs index de132b0c8..8433460b3 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -758,6 +758,7 @@ defaultReaderName fallback (x:xs) = ".rst" -> "rst" ".lhs" -> "markdown+lhs" ".db" -> "docbook" + ".opml" -> "opml" ".wiki" -> "mediawiki" ".textile" -> "textile" ".native" -> "native" diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 8201bc881..80ddb72d7 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -72,6 +72,7 @@ module Text.Pandoc , readHtml , readTextile , readDocBook + , readOPML , readNative -- * Writers: converting /from/ Pandoc format , Writer (..) @@ -113,6 +114,7 @@ import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.RST import Text.Pandoc.Readers.DocBook +import Text.Pandoc.Readers.OPML import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.Textile @@ -192,6 +194,7 @@ readers = [("native" , \_ s -> return $ readNative s) ,("rst" , \o s -> return $ readRST o s) ,("mediawiki" , \o s -> return $ readMediaWiki o s) ,("docbook" , \o s -> return $ readDocBook o s) + ,("opml" , \o s -> return $ readOPML o s) ,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs ,("html" , \o s -> return $ readHtml o s) ,("latex" , \o s -> return $ readLaTeX o s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs new file mode 100644 index 000000000..53b599349 --- /dev/null +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -0,0 +1,95 @@ +module Text.Pandoc.Readers.OPML ( readOPML ) where +import Data.Char (toUpper) +import Text.Pandoc.Options +import Text.Pandoc.Definition +import Text.Pandoc.Builder +import Text.Pandoc.Readers.HTML (readHtml) +import Text.Pandoc.Readers.Markdown (readMarkdown) +import Text.XML.Light +import Text.HTML.TagSoup.Entity (lookupEntity) +import Data.Generics +import Data.Monoid +import Control.Monad.State +import Control.Applicative ((<$>), (<$)) + +type OPML = State OPMLState + +data OPMLState = OPMLState{ + opmlSectionLevel :: Int + , opmlDocTitle :: Inlines + , opmlDocAuthors :: [Inlines] + , opmlDocDate :: Inlines + } deriving Show + +readOPML :: ReaderOptions -> String -> Pandoc +readOPML _ inp = setTitle (opmlDocTitle st') + $ setAuthors (opmlDocAuthors st') + $ setDate (opmlDocDate st') + $ doc $ mconcat bs + where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp) + OPMLState{ opmlSectionLevel = 0 + , opmlDocTitle = mempty + , opmlDocAuthors = [] + , opmlDocDate = mempty + } + +-- normalize input, consolidating adjacent Text and CRef elements +normalizeTree :: [Content] -> [Content] +normalizeTree = everywhere (mkT go) + where go :: [Content] -> [Content] + go (Text (CData CDataRaw _ _):xs) = xs + go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = + Text (CData CDataText (s1 ++ s2) z):xs + go (Text (CData CDataText s1 z):CRef r:xs) = + Text (CData CDataText (s1 ++ convertEntity r) z):xs + go (CRef r:Text (CData CDataText s1 z):xs) = + Text (CData CDataText (convertEntity r ++ s1) z):xs + go (CRef r1:CRef r2:xs) = + Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs + go xs = xs + +convertEntity :: String -> String +convertEntity e = maybe (map toUpper e) (:[]) (lookupEntity e) + +-- convenience function to get an attribute value, defaulting to "" +attrValue :: String -> Element -> String +attrValue attr elt = + case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of + Just z -> z + Nothing -> "" + +asHtml :: String -> Inlines +asHtml s = case readHtml def s of + Pandoc _ [Plain ils] -> fromList ils + _ -> mempty + +asMarkdown :: String -> Blocks +asMarkdown s = fromList bs + where Pandoc _ bs = readMarkdown def s + +getBlocks :: Element -> OPML Blocks +getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) + +parseBlock :: Content -> OPML Blocks +parseBlock (Elem e) = + case qName (elName e) of + "ownerName" -> mempty <$ modify (\st -> + st{opmlDocAuthors = [text $ strContent e]}) + "dateModified" -> mempty <$ modify (\st -> + st{opmlDocDate = text $ strContent e}) + "title" -> mempty <$ modify (\st -> + st{opmlDocTitle = text $ strContent e}) + "outline" -> gets opmlSectionLevel >>= sect . (+1) + "?xml" -> return mempty + _ -> getBlocks e + where sect n = do let headerText = asHtml $ attrValue "text" e + let noteBlocks = asMarkdown $ attrValue "_note" e + modify $ \st -> st{ opmlSectionLevel = n } + bs <- getBlocks e + modify $ \st -> st{ opmlSectionLevel = n - 1 } + let headerText' = case attrValue "type" e of + "link" -> link + (attrValue "url" e) "" headerText + _ -> headerText + return $ header n headerText' <> noteBlocks <> bs +parseBlock _ = return mempty diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index 9e7493504..c22fa9830 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -124,6 +124,10 @@ tests = [ testGroup "markdown" , test "reader" ["-r", "mediawiki", "-w", "native", "-s"] "mediawiki-reader.wiki" "mediawiki-reader.native" ] + , testGroup "opml" + [ test "reader" ["-r", "opml", "-w", "native", "-s"] + "opml-reader.opml" "opml-reader.native" + ] , testGroup "other writers" $ map (\f -> testGroup f $ writerTests f) [ "opendocument" , "context" , "texinfo" , "man" , "plain" , "rtf", "org", "asciidoc" diff --git a/tests/opml-reader.native b/tests/opml-reader.native new file mode 100644 index 000000000..8a627c025 --- /dev/null +++ b/tests/opml-reader.native @@ -0,0 +1,66 @@ +Pandoc (Meta {docTitle = [Str "states.opml"], docAuthors = [[Str "Dave",Space,Str "Winer"]], docDate = [Str "Thu,",Space,Str "14",Space,Str "Jul",Space,Str "2005",Space,Str "23:41:05",Space,Str "GMT"]}) +[Header 1 ("",[],[]) [Str "United",Space,Str "States"] +,Header 2 ("",[],[]) [Str "Far",Space,Str "West"] +,Header 3 ("",[],[]) [Str "Alaska"] +,Header 3 ("",[],[]) [Str "California"] +,Header 3 ("",[],[]) [Str "Hawaii"] +,Header 3 ("",[],[]) [Strong [Str "Nevada"]] +,Para [Str "I",Space,Str "lived",Space,Str "here",Space,Emph [Str "once"],Str "."] +,Para [Str "Loved",Space,Str "it."] +,Header 4 ("",[],[]) [Link [Str "Reno"] ("http://www.reno.gov","")] +,Header 4 ("",[],[]) [Str "Las",Space,Str "Vegas"] +,Header 4 ("",[],[]) [Str "Ely"] +,Header 4 ("",[],[]) [Str "Gerlach"] +,Header 3 ("",[],[]) [Str "Oregon"] +,Header 3 ("",[],[]) [Str "Washington"] +,Header 2 ("",[],[]) [Str "Great",Space,Str "Plains"] +,Header 3 ("",[],[]) [Str "Kansas"] +,Header 3 ("",[],[]) [Str "Nebraska"] +,Header 3 ("",[],[]) [Str "North",Space,Str "Dakota"] +,Header 3 ("",[],[]) [Str "Oklahoma"] +,Header 3 ("",[],[]) [Str "South",Space,Str "Dakota"] +,Header 2 ("",[],[]) [Str "Mid",Str "-",Str "Atlantic"] +,Header 3 ("",[],[]) [Str "Delaware"] +,Header 3 ("",[],[]) [Str "Maryland"] +,Header 3 ("",[],[]) [Str "New",Space,Str "Jersey"] +,Header 3 ("",[],[]) [Str "New",Space,Str "York"] +,Header 3 ("",[],[]) [Str "Pennsylvania"] +,Header 2 ("",[],[]) [Str "Midwest"] +,Header 3 ("",[],[]) [Str "Illinois"] +,Header 3 ("",[],[]) [Str "Indiana"] +,Header 3 ("",[],[]) [Str "Iowa"] +,Header 3 ("",[],[]) [Str "Kentucky"] +,Header 3 ("",[],[]) [Str "Michigan"] +,Header 3 ("",[],[]) [Str "Minnesota"] +,Header 3 ("",[],[]) [Str "Missouri"] +,Header 3 ("",[],[]) [Str "Ohio"] +,Header 3 ("",[],[]) [Str "West",Space,Str "Virginia"] +,Header 3 ("",[],[]) [Str "Wisconsin"] +,Header 2 ("",[],[]) [Str "Mountains"] +,Header 3 ("",[],[]) [Str "Colorado"] +,Header 3 ("",[],[]) [Str "Idaho"] +,Header 3 ("",[],[]) [Str "Montana"] +,Header 3 ("",[],[]) [Str "Utah"] +,Header 3 ("",[],[]) [Str "Wyoming"] +,Header 2 ("",[],[]) [Str "New",Space,Str "England"] +,Header 3 ("",[],[]) [Str "Connecticut"] +,Header 3 ("",[],[]) [Str "Maine"] +,Header 3 ("",[],[]) [Str "Massachusetts"] +,Header 3 ("",[],[]) [Str "New",Space,Str "Hampshire"] +,Header 3 ("",[],[]) [Str "Rhode",Space,Str "Island"] +,Header 3 ("",[],[]) [Str "Vermont"] +,Header 2 ("",[],[]) [Str "South"] +,Header 3 ("",[],[]) [Str "Alabama"] +,Header 3 ("",[],[]) [Str "Arkansas"] +,Header 3 ("",[],[]) [Str "Florida"] +,Header 3 ("",[],[]) [Str "Georgia"] +,Header 3 ("",[],[]) [Str "Louisiana"] +,Header 3 ("",[],[]) [Str "Mississippi"] +,Header 3 ("",[],[]) [Str "North",Space,Str "Carolina"] +,Header 3 ("",[],[]) [Str "South",Space,Str "Carolina"] +,Header 3 ("",[],[]) [Str "Tennessee"] +,Header 3 ("",[],[]) [Str "Virginia"] +,Header 2 ("",[],[]) [Str "Southwest"] +,Header 3 ("",[],[]) [Str "Arizona"] +,Header 3 ("",[],[]) [Str "New",Space,Str "Mexico"] +,Header 3 ("",[],[]) [Str "Texas"]] diff --git a/tests/opml-reader.opml b/tests/opml-reader.opml new file mode 100644 index 000000000..54dd592ea --- /dev/null +++ b/tests/opml-reader.opml @@ -0,0 +1,91 @@ + + + + states.opml + Tue, 15 Mar 2005 16:35:45 GMT + Thu, 14 Jul 2005 23:41:05 GMT + Dave Winer + dave@scripting.com + 1, 6, 13, 16, 18, 20 + 1 + 106 + 106 + 558 + 479 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +