Merge branch 'opml'

This commit is contained in:
John MacFarlane 2013-03-19 20:23:48 -07:00
commit 8aa6172380
8 changed files with 268 additions and 4 deletions

7
README
View file

@ -13,8 +13,8 @@ Description
Pandoc is a [Haskell] library for converting from one markup format to Pandoc is a [Haskell] library for converting from one markup format to
another, and a command-line tool that uses this library. It can read another, and a command-line tool that uses this library. It can read
[markdown] and (subsets of) [Textile], [reStructuredText], [HTML], [markdown] and (subsets of) [Textile], [reStructuredText], [HTML],
[LaTeX], [MediaWiki markup], and [DocBook XML]; and it can write plain [LaTeX], [MediaWiki markup], [OPML], and [DocBook XML]; and it can write
text, [markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX] plain text, [markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX]
(including [beamer] slide shows), [ConTeXt], [RTF], [DocBook XML], (including [beamer] slide shows), [ConTeXt], [RTF], [DocBook XML],
[OpenDocument XML], [ODT], [Word docx], [GNU Texinfo], [MediaWiki [OpenDocument XML], [ODT], [Word docx], [GNU Texinfo], [MediaWiki
markup], [EPUB] (v2 or v3), [FictionBook2], [Textile], [groff man] pages, [Emacs markup], [EPUB] (v2 or v3), [FictionBook2], [Textile], [groff man] pages, [Emacs
@ -143,7 +143,7 @@ General options
`markdown_phpextra` (PHP Markdown Extra extended markdown), `markdown_phpextra` (PHP Markdown Extra extended markdown),
`markdown_github` (github extended markdown), `markdown_github` (github extended markdown),
`textile` (Textile), `rst` (reStructuredText), `html` (HTML), `textile` (Textile), `rst` (reStructuredText), `html` (HTML),
`docbook` (DocBook XML), `mediawiki` (MediaWiki markup), `docbook` (DocBook XML), `opml` (OPML), `mediawiki` (MediaWiki markup),
or `latex` (LaTeX). If `+lhs` is appended to `markdown`, `rst`, or `latex` (LaTeX). If `+lhs` is appended to `markdown`, `rst`,
`latex`, the input will be treated as literate Haskell source: `latex`, the input will be treated as literate Haskell source:
see [Literate Haskell support](#literate-haskell-support), below. see [Literate Haskell support](#literate-haskell-support), below.
@ -2624,6 +2624,7 @@ Sergey Astanin, Arlo O'Keeffe, Denis Laxalde, Brent Yorgey.
[ConTeXt]: http://www.pragma-ade.nl/ [ConTeXt]: http://www.pragma-ade.nl/
[RTF]: http://en.wikipedia.org/wiki/Rich_Text_Format [RTF]: http://en.wikipedia.org/wiki/Rich_Text_Format
[DocBook XML]: http://www.docbook.org/ [DocBook XML]: http://www.docbook.org/
[OPML]: http://dev.opml.org/spec2.html
[OpenDocument XML]: http://opendocument.xml.org/ [OpenDocument XML]: http://opendocument.xml.org/
[ODT]: http://en.wikipedia.org/wiki/OpenDocument [ODT]: http://en.wikipedia.org/wiki/OpenDocument
[Textile]: http://redcloth.org/textile [Textile]: http://redcloth.org/textile

View file

@ -16,7 +16,7 @@ Synopsis: Conversion between markup formats
Description: Pandoc is a Haskell library for converting from one markup Description: Pandoc is a Haskell library for converting from one markup
format to another, and a command-line tool that uses format to another, and a command-line tool that uses
this library. It can read markdown and (subsets of) HTML, this library. It can read markdown and (subsets of) HTML,
reStructuredText, LaTeX, DocBook, MediaWiki markup, reStructuredText, LaTeX, DocBook, MediaWiki markup, OPML,
and Textile, and it can write markdown, reStructuredText, and Textile, and it can write markdown, reStructuredText,
HTML, LaTeX, ConTeXt, Docbook, OpenDocument, ODT, HTML, LaTeX, ConTeXt, Docbook, OpenDocument, ODT,
Word docx, RTF, MediaWiki, Textile, groff man pages, Word docx, RTF, MediaWiki, Textile, groff man pages,
@ -105,6 +105,8 @@ Extra-Source-Files:
tests/docbook-reader.native tests/docbook-reader.native
tests/html-reader.html, tests/html-reader.html,
tests/html-reader.native, tests/html-reader.native,
tests/opml-reader.html,
tests/opml-reader.native,
tests/insert, tests/insert,
tests/lalune.jpg, tests/lalune.jpg,
tests/movie.jpg, tests/movie.jpg,
@ -278,6 +280,7 @@ Library
Text.Pandoc.Readers.MediaWiki, Text.Pandoc.Readers.MediaWiki,
Text.Pandoc.Readers.RST, Text.Pandoc.Readers.RST,
Text.Pandoc.Readers.DocBook, Text.Pandoc.Readers.DocBook,
Text.Pandoc.Readers.OPML,
Text.Pandoc.Readers.TeXMath, Text.Pandoc.Readers.TeXMath,
Text.Pandoc.Readers.Textile, Text.Pandoc.Readers.Textile,
Text.Pandoc.Readers.Native, Text.Pandoc.Readers.Native,

View file

@ -758,6 +758,7 @@ defaultReaderName fallback (x:xs) =
".rst" -> "rst" ".rst" -> "rst"
".lhs" -> "markdown+lhs" ".lhs" -> "markdown+lhs"
".db" -> "docbook" ".db" -> "docbook"
".opml" -> "opml"
".wiki" -> "mediawiki" ".wiki" -> "mediawiki"
".textile" -> "textile" ".textile" -> "textile"
".native" -> "native" ".native" -> "native"

View file

@ -72,6 +72,7 @@ module Text.Pandoc
, readHtml , readHtml
, readTextile , readTextile
, readDocBook , readDocBook
, readOPML
, readNative , readNative
-- * Writers: converting /from/ Pandoc format -- * Writers: converting /from/ Pandoc format
, Writer (..) , Writer (..)
@ -113,6 +114,7 @@ import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.MediaWiki
import Text.Pandoc.Readers.RST import Text.Pandoc.Readers.RST
import Text.Pandoc.Readers.DocBook import Text.Pandoc.Readers.DocBook
import Text.Pandoc.Readers.OPML
import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.LaTeX
import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.HTML
import Text.Pandoc.Readers.Textile import Text.Pandoc.Readers.Textile
@ -192,6 +194,7 @@ readers = [("native" , \_ s -> return $ readNative s)
,("rst" , \o s -> return $ readRST o s) ,("rst" , \o s -> return $ readRST o s)
,("mediawiki" , \o s -> return $ readMediaWiki o s) ,("mediawiki" , \o s -> return $ readMediaWiki o s)
,("docbook" , \o s -> return $ readDocBook o s) ,("docbook" , \o s -> return $ readDocBook o s)
,("opml" , \o s -> return $ readOPML o s)
,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs ,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs
,("html" , \o s -> return $ readHtml o s) ,("html" , \o s -> return $ readHtml o s)
,("latex" , \o s -> return $ readLaTeX o s) ,("latex" , \o s -> return $ readLaTeX o s)

View file

@ -0,0 +1,95 @@
module Text.Pandoc.Readers.OPML ( readOPML ) where
import Data.Char (toUpper)
import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import Text.XML.Light
import Text.HTML.TagSoup.Entity (lookupEntity)
import Data.Generics
import Data.Monoid
import Control.Monad.State
import Control.Applicative ((<$>), (<$))
type OPML = State OPMLState
data OPMLState = OPMLState{
opmlSectionLevel :: Int
, opmlDocTitle :: Inlines
, opmlDocAuthors :: [Inlines]
, opmlDocDate :: Inlines
} deriving Show
readOPML :: ReaderOptions -> String -> Pandoc
readOPML _ inp = setTitle (opmlDocTitle st')
$ setAuthors (opmlDocAuthors st')
$ setDate (opmlDocDate st')
$ doc $ mconcat bs
where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp)
OPMLState{ opmlSectionLevel = 0
, opmlDocTitle = mempty
, opmlDocAuthors = []
, opmlDocDate = mempty
}
-- normalize input, consolidating adjacent Text and CRef elements
normalizeTree :: [Content] -> [Content]
normalizeTree = everywhere (mkT go)
where go :: [Content] -> [Content]
go (Text (CData CDataRaw _ _):xs) = xs
go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
Text (CData CDataText (s1 ++ s2) z):xs
go (Text (CData CDataText s1 z):CRef r:xs) =
Text (CData CDataText (s1 ++ convertEntity r) z):xs
go (CRef r:Text (CData CDataText s1 z):xs) =
Text (CData CDataText (convertEntity r ++ s1) z):xs
go (CRef r1:CRef r2:xs) =
Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
go xs = xs
convertEntity :: String -> String
convertEntity e = maybe (map toUpper e) (:[]) (lookupEntity e)
-- convenience function to get an attribute value, defaulting to ""
attrValue :: String -> Element -> String
attrValue attr elt =
case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of
Just z -> z
Nothing -> ""
asHtml :: String -> Inlines
asHtml s = case readHtml def s of
Pandoc _ [Plain ils] -> fromList ils
_ -> mempty
asMarkdown :: String -> Blocks
asMarkdown s = fromList bs
where Pandoc _ bs = readMarkdown def s
getBlocks :: Element -> OPML Blocks
getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
parseBlock :: Content -> OPML Blocks
parseBlock (Elem e) =
case qName (elName e) of
"ownerName" -> mempty <$ modify (\st ->
st{opmlDocAuthors = [text $ strContent e]})
"dateModified" -> mempty <$ modify (\st ->
st{opmlDocDate = text $ strContent e})
"title" -> mempty <$ modify (\st ->
st{opmlDocTitle = text $ strContent e})
"outline" -> gets opmlSectionLevel >>= sect . (+1)
"?xml" -> return mempty
_ -> getBlocks e
where sect n = do let headerText = asHtml $ attrValue "text" e
let noteBlocks = asMarkdown $ attrValue "_note" e
modify $ \st -> st{ opmlSectionLevel = n }
bs <- getBlocks e
modify $ \st -> st{ opmlSectionLevel = n - 1 }
let headerText' = case attrValue "type" e of
"link" -> link
(attrValue "url" e) "" headerText
_ -> headerText
return $ header n headerText' <> noteBlocks <> bs
parseBlock _ = return mempty

View file

@ -124,6 +124,10 @@ tests = [ testGroup "markdown"
, test "reader" ["-r", "mediawiki", "-w", "native", "-s"] , test "reader" ["-r", "mediawiki", "-w", "native", "-s"]
"mediawiki-reader.wiki" "mediawiki-reader.native" "mediawiki-reader.wiki" "mediawiki-reader.native"
] ]
, testGroup "opml"
[ test "reader" ["-r", "opml", "-w", "native", "-s"]
"opml-reader.opml" "opml-reader.native"
]
, testGroup "other writers" $ map (\f -> testGroup f $ writerTests f) , testGroup "other writers" $ map (\f -> testGroup f $ writerTests f)
[ "opendocument" , "context" , "texinfo" [ "opendocument" , "context" , "texinfo"
, "man" , "plain" , "rtf", "org", "asciidoc" , "man" , "plain" , "rtf", "org", "asciidoc"

66
tests/opml-reader.native Normal file
View file

@ -0,0 +1,66 @@
Pandoc (Meta {docTitle = [Str "states.opml"], docAuthors = [[Str "Dave",Space,Str "Winer"]], docDate = [Str "Thu,",Space,Str "14",Space,Str "Jul",Space,Str "2005",Space,Str "23:41:05",Space,Str "GMT"]})
[Header 1 ("",[],[]) [Str "United",Space,Str "States"]
,Header 2 ("",[],[]) [Str "Far",Space,Str "West"]
,Header 3 ("",[],[]) [Str "Alaska"]
,Header 3 ("",[],[]) [Str "California"]
,Header 3 ("",[],[]) [Str "Hawaii"]
,Header 3 ("",[],[]) [Strong [Str "Nevada"]]
,Para [Str "I",Space,Str "lived",Space,Str "here",Space,Emph [Str "once"],Str "."]
,Para [Str "Loved",Space,Str "it."]
,Header 4 ("",[],[]) [Link [Str "Reno"] ("http://www.reno.gov","")]
,Header 4 ("",[],[]) [Str "Las",Space,Str "Vegas"]
,Header 4 ("",[],[]) [Str "Ely"]
,Header 4 ("",[],[]) [Str "Gerlach"]
,Header 3 ("",[],[]) [Str "Oregon"]
,Header 3 ("",[],[]) [Str "Washington"]
,Header 2 ("",[],[]) [Str "Great",Space,Str "Plains"]
,Header 3 ("",[],[]) [Str "Kansas"]
,Header 3 ("",[],[]) [Str "Nebraska"]
,Header 3 ("",[],[]) [Str "North",Space,Str "Dakota"]
,Header 3 ("",[],[]) [Str "Oklahoma"]
,Header 3 ("",[],[]) [Str "South",Space,Str "Dakota"]
,Header 2 ("",[],[]) [Str "Mid",Str "-",Str "Atlantic"]
,Header 3 ("",[],[]) [Str "Delaware"]
,Header 3 ("",[],[]) [Str "Maryland"]
,Header 3 ("",[],[]) [Str "New",Space,Str "Jersey"]
,Header 3 ("",[],[]) [Str "New",Space,Str "York"]
,Header 3 ("",[],[]) [Str "Pennsylvania"]
,Header 2 ("",[],[]) [Str "Midwest"]
,Header 3 ("",[],[]) [Str "Illinois"]
,Header 3 ("",[],[]) [Str "Indiana"]
,Header 3 ("",[],[]) [Str "Iowa"]
,Header 3 ("",[],[]) [Str "Kentucky"]
,Header 3 ("",[],[]) [Str "Michigan"]
,Header 3 ("",[],[]) [Str "Minnesota"]
,Header 3 ("",[],[]) [Str "Missouri"]
,Header 3 ("",[],[]) [Str "Ohio"]
,Header 3 ("",[],[]) [Str "West",Space,Str "Virginia"]
,Header 3 ("",[],[]) [Str "Wisconsin"]
,Header 2 ("",[],[]) [Str "Mountains"]
,Header 3 ("",[],[]) [Str "Colorado"]
,Header 3 ("",[],[]) [Str "Idaho"]
,Header 3 ("",[],[]) [Str "Montana"]
,Header 3 ("",[],[]) [Str "Utah"]
,Header 3 ("",[],[]) [Str "Wyoming"]
,Header 2 ("",[],[]) [Str "New",Space,Str "England"]
,Header 3 ("",[],[]) [Str "Connecticut"]
,Header 3 ("",[],[]) [Str "Maine"]
,Header 3 ("",[],[]) [Str "Massachusetts"]
,Header 3 ("",[],[]) [Str "New",Space,Str "Hampshire"]
,Header 3 ("",[],[]) [Str "Rhode",Space,Str "Island"]
,Header 3 ("",[],[]) [Str "Vermont"]
,Header 2 ("",[],[]) [Str "South"]
,Header 3 ("",[],[]) [Str "Alabama"]
,Header 3 ("",[],[]) [Str "Arkansas"]
,Header 3 ("",[],[]) [Str "Florida"]
,Header 3 ("",[],[]) [Str "Georgia"]
,Header 3 ("",[],[]) [Str "Louisiana"]
,Header 3 ("",[],[]) [Str "Mississippi"]
,Header 3 ("",[],[]) [Str "North",Space,Str "Carolina"]
,Header 3 ("",[],[]) [Str "South",Space,Str "Carolina"]
,Header 3 ("",[],[]) [Str "Tennessee"]
,Header 3 ("",[],[]) [Str "Virginia"]
,Header 2 ("",[],[]) [Str "Southwest"]
,Header 3 ("",[],[]) [Str "Arizona"]
,Header 3 ("",[],[]) [Str "New",Space,Str "Mexico"]
,Header 3 ("",[],[]) [Str "Texas"]]

91
tests/opml-reader.opml Normal file
View file

@ -0,0 +1,91 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<opml version="2.0">
<head>
<title>states.opml</title>
<dateCreated>Tue, 15 Mar 2005 16:35:45 GMT</dateCreated>
<dateModified>Thu, 14 Jul 2005 23:41:05 GMT</dateModified>
<ownerName>Dave Winer</ownerName>
<ownerEmail>dave@scripting.com</ownerEmail>
<expansionState>1, 6, 13, 16, 18, 20</expansionState>
<vertScrollState>1</vertScrollState>
<windowTop>106</windowTop>
<windowLeft>106</windowLeft>
<windowBottom>558</windowBottom>
<windowRight>479</windowRight>
</head>
<body>
<outline text="United States">
<outline text="Far West">
<outline text="Alaska"/>
<outline text="California"/>
<outline text="Hawaii"/>
<outline text="&lt;strong&gt;Nevada&lt;/strong&gt;" _note="I lived here *once*.&#10;&#10;Loved it.">
<outline text="Reno" created="Tue, 12 Jul 2005 23:56:35 GMT" type="link" url="http://www.reno.gov"/>
<outline text="Las Vegas" created="Tue, 12 Jul 2005 23:56:37 GMT"/>
<outline text="Ely" created="Tue, 12 Jul 2005 23:56:39 GMT"/>
<outline text="Gerlach" created="Tue, 12 Jul 2005 23:56:47 GMT"/>
</outline>
<outline text="Oregon"/>
<outline text="Washington"/>
</outline>
<outline text="Great Plains">
<outline text="Kansas"/>
<outline text="Nebraska"/>
<outline text="North Dakota"/>
<outline text="Oklahoma"/>
<outline text="South Dakota"/>
</outline>
<outline text="Mid-Atlantic">
<outline text="Delaware"/>
<outline text="Maryland"/>
<outline text="New Jersey"/>
<outline text="New York"/>
<outline text="Pennsylvania"/>
</outline>
<outline text="Midwest">
<outline text="Illinois"/>
<outline text="Indiana"/>
<outline text="Iowa"/>
<outline text="Kentucky"/>
<outline text="Michigan"/>
<outline text="Minnesota"/>
<outline text="Missouri"/>
<outline text="Ohio"/>
<outline text="West Virginia"/>
<outline text="Wisconsin"/>
</outline>
<outline text="Mountains">
<outline text="Colorado"/>
<outline text="Idaho"/>
<outline text="Montana"/>
<outline text="Utah"/>
<outline text="Wyoming"/>
</outline>
<outline text="New England">
<outline text="Connecticut"/>
<outline text="Maine"/>
<outline text="Massachusetts"/>
<outline text="New Hampshire"/>
<outline text="Rhode Island"/>
<outline text="Vermont"/>
</outline>
<outline text="South">
<outline text="Alabama"/>
<outline text="Arkansas"/>
<outline text="Florida"/>
<outline text="Georgia"/>
<outline text="Louisiana"/>
<outline text="Mississippi"/>
<outline text="North Carolina"/>
<outline text="South Carolina"/>
<outline text="Tennessee"/>
<outline text="Virginia"/>
</outline>
<outline text="Southwest">
<outline text="Arizona"/>
<outline text="New Mexico"/>
<outline text="Texas"/>
</outline>
</outline>
</body>
</opml>