Merge branch 'opml'
This commit is contained in:
commit
8aa6172380
8 changed files with 268 additions and 4 deletions
7
README
7
README
|
@ -13,8 +13,8 @@ Description
|
|||
Pandoc is a [Haskell] library for converting from one markup format to
|
||||
another, and a command-line tool that uses this library. It can read
|
||||
[markdown] and (subsets of) [Textile], [reStructuredText], [HTML],
|
||||
[LaTeX], [MediaWiki markup], and [DocBook XML]; and it can write plain
|
||||
text, [markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX]
|
||||
[LaTeX], [MediaWiki markup], [OPML], and [DocBook XML]; and it can write
|
||||
plain text, [markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX]
|
||||
(including [beamer] slide shows), [ConTeXt], [RTF], [DocBook XML],
|
||||
[OpenDocument XML], [ODT], [Word docx], [GNU Texinfo], [MediaWiki
|
||||
markup], [EPUB] (v2 or v3), [FictionBook2], [Textile], [groff man] pages, [Emacs
|
||||
|
@ -143,7 +143,7 @@ General options
|
|||
`markdown_phpextra` (PHP Markdown Extra extended markdown),
|
||||
`markdown_github` (github extended markdown),
|
||||
`textile` (Textile), `rst` (reStructuredText), `html` (HTML),
|
||||
`docbook` (DocBook XML), `mediawiki` (MediaWiki markup),
|
||||
`docbook` (DocBook XML), `opml` (OPML), `mediawiki` (MediaWiki markup),
|
||||
or `latex` (LaTeX). If `+lhs` is appended to `markdown`, `rst`,
|
||||
`latex`, the input will be treated as literate Haskell source:
|
||||
see [Literate Haskell support](#literate-haskell-support), below.
|
||||
|
@ -2624,6 +2624,7 @@ Sergey Astanin, Arlo O'Keeffe, Denis Laxalde, Brent Yorgey.
|
|||
[ConTeXt]: http://www.pragma-ade.nl/
|
||||
[RTF]: http://en.wikipedia.org/wiki/Rich_Text_Format
|
||||
[DocBook XML]: http://www.docbook.org/
|
||||
[OPML]: http://dev.opml.org/spec2.html
|
||||
[OpenDocument XML]: http://opendocument.xml.org/
|
||||
[ODT]: http://en.wikipedia.org/wiki/OpenDocument
|
||||
[Textile]: http://redcloth.org/textile
|
||||
|
|
|
@ -16,7 +16,7 @@ Synopsis: Conversion between markup formats
|
|||
Description: Pandoc is a Haskell library for converting from one markup
|
||||
format to another, and a command-line tool that uses
|
||||
this library. It can read markdown and (subsets of) HTML,
|
||||
reStructuredText, LaTeX, DocBook, MediaWiki markup,
|
||||
reStructuredText, LaTeX, DocBook, MediaWiki markup, OPML,
|
||||
and Textile, and it can write markdown, reStructuredText,
|
||||
HTML, LaTeX, ConTeXt, Docbook, OpenDocument, ODT,
|
||||
Word docx, RTF, MediaWiki, Textile, groff man pages,
|
||||
|
@ -105,6 +105,8 @@ Extra-Source-Files:
|
|||
tests/docbook-reader.native
|
||||
tests/html-reader.html,
|
||||
tests/html-reader.native,
|
||||
tests/opml-reader.html,
|
||||
tests/opml-reader.native,
|
||||
tests/insert,
|
||||
tests/lalune.jpg,
|
||||
tests/movie.jpg,
|
||||
|
@ -278,6 +280,7 @@ Library
|
|||
Text.Pandoc.Readers.MediaWiki,
|
||||
Text.Pandoc.Readers.RST,
|
||||
Text.Pandoc.Readers.DocBook,
|
||||
Text.Pandoc.Readers.OPML,
|
||||
Text.Pandoc.Readers.TeXMath,
|
||||
Text.Pandoc.Readers.Textile,
|
||||
Text.Pandoc.Readers.Native,
|
||||
|
|
|
@ -758,6 +758,7 @@ defaultReaderName fallback (x:xs) =
|
|||
".rst" -> "rst"
|
||||
".lhs" -> "markdown+lhs"
|
||||
".db" -> "docbook"
|
||||
".opml" -> "opml"
|
||||
".wiki" -> "mediawiki"
|
||||
".textile" -> "textile"
|
||||
".native" -> "native"
|
||||
|
|
|
@ -72,6 +72,7 @@ module Text.Pandoc
|
|||
, readHtml
|
||||
, readTextile
|
||||
, readDocBook
|
||||
, readOPML
|
||||
, readNative
|
||||
-- * Writers: converting /from/ Pandoc format
|
||||
, Writer (..)
|
||||
|
@ -113,6 +114,7 @@ import Text.Pandoc.Readers.Markdown
|
|||
import Text.Pandoc.Readers.MediaWiki
|
||||
import Text.Pandoc.Readers.RST
|
||||
import Text.Pandoc.Readers.DocBook
|
||||
import Text.Pandoc.Readers.OPML
|
||||
import Text.Pandoc.Readers.LaTeX
|
||||
import Text.Pandoc.Readers.HTML
|
||||
import Text.Pandoc.Readers.Textile
|
||||
|
@ -192,6 +194,7 @@ readers = [("native" , \_ s -> return $ readNative s)
|
|||
,("rst" , \o s -> return $ readRST o s)
|
||||
,("mediawiki" , \o s -> return $ readMediaWiki o s)
|
||||
,("docbook" , \o s -> return $ readDocBook o s)
|
||||
,("opml" , \o s -> return $ readOPML o s)
|
||||
,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs
|
||||
,("html" , \o s -> return $ readHtml o s)
|
||||
,("latex" , \o s -> return $ readLaTeX o s)
|
||||
|
|
95
src/Text/Pandoc/Readers/OPML.hs
Normal file
95
src/Text/Pandoc/Readers/OPML.hs
Normal file
|
@ -0,0 +1,95 @@
|
|||
module Text.Pandoc.Readers.OPML ( readOPML ) where
|
||||
import Data.Char (toUpper)
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Builder
|
||||
import Text.Pandoc.Readers.HTML (readHtml)
|
||||
import Text.Pandoc.Readers.Markdown (readMarkdown)
|
||||
import Text.XML.Light
|
||||
import Text.HTML.TagSoup.Entity (lookupEntity)
|
||||
import Data.Generics
|
||||
import Data.Monoid
|
||||
import Control.Monad.State
|
||||
import Control.Applicative ((<$>), (<$))
|
||||
|
||||
type OPML = State OPMLState
|
||||
|
||||
data OPMLState = OPMLState{
|
||||
opmlSectionLevel :: Int
|
||||
, opmlDocTitle :: Inlines
|
||||
, opmlDocAuthors :: [Inlines]
|
||||
, opmlDocDate :: Inlines
|
||||
} deriving Show
|
||||
|
||||
readOPML :: ReaderOptions -> String -> Pandoc
|
||||
readOPML _ inp = setTitle (opmlDocTitle st')
|
||||
$ setAuthors (opmlDocAuthors st')
|
||||
$ setDate (opmlDocDate st')
|
||||
$ doc $ mconcat bs
|
||||
where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp)
|
||||
OPMLState{ opmlSectionLevel = 0
|
||||
, opmlDocTitle = mempty
|
||||
, opmlDocAuthors = []
|
||||
, opmlDocDate = mempty
|
||||
}
|
||||
|
||||
-- normalize input, consolidating adjacent Text and CRef elements
|
||||
normalizeTree :: [Content] -> [Content]
|
||||
normalizeTree = everywhere (mkT go)
|
||||
where go :: [Content] -> [Content]
|
||||
go (Text (CData CDataRaw _ _):xs) = xs
|
||||
go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
|
||||
Text (CData CDataText (s1 ++ s2) z):xs
|
||||
go (Text (CData CDataText s1 z):CRef r:xs) =
|
||||
Text (CData CDataText (s1 ++ convertEntity r) z):xs
|
||||
go (CRef r:Text (CData CDataText s1 z):xs) =
|
||||
Text (CData CDataText (convertEntity r ++ s1) z):xs
|
||||
go (CRef r1:CRef r2:xs) =
|
||||
Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
|
||||
go xs = xs
|
||||
|
||||
convertEntity :: String -> String
|
||||
convertEntity e = maybe (map toUpper e) (:[]) (lookupEntity e)
|
||||
|
||||
-- convenience function to get an attribute value, defaulting to ""
|
||||
attrValue :: String -> Element -> String
|
||||
attrValue attr elt =
|
||||
case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of
|
||||
Just z -> z
|
||||
Nothing -> ""
|
||||
|
||||
asHtml :: String -> Inlines
|
||||
asHtml s = case readHtml def s of
|
||||
Pandoc _ [Plain ils] -> fromList ils
|
||||
_ -> mempty
|
||||
|
||||
asMarkdown :: String -> Blocks
|
||||
asMarkdown s = fromList bs
|
||||
where Pandoc _ bs = readMarkdown def s
|
||||
|
||||
getBlocks :: Element -> OPML Blocks
|
||||
getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
|
||||
|
||||
parseBlock :: Content -> OPML Blocks
|
||||
parseBlock (Elem e) =
|
||||
case qName (elName e) of
|
||||
"ownerName" -> mempty <$ modify (\st ->
|
||||
st{opmlDocAuthors = [text $ strContent e]})
|
||||
"dateModified" -> mempty <$ modify (\st ->
|
||||
st{opmlDocDate = text $ strContent e})
|
||||
"title" -> mempty <$ modify (\st ->
|
||||
st{opmlDocTitle = text $ strContent e})
|
||||
"outline" -> gets opmlSectionLevel >>= sect . (+1)
|
||||
"?xml" -> return mempty
|
||||
_ -> getBlocks e
|
||||
where sect n = do let headerText = asHtml $ attrValue "text" e
|
||||
let noteBlocks = asMarkdown $ attrValue "_note" e
|
||||
modify $ \st -> st{ opmlSectionLevel = n }
|
||||
bs <- getBlocks e
|
||||
modify $ \st -> st{ opmlSectionLevel = n - 1 }
|
||||
let headerText' = case attrValue "type" e of
|
||||
"link" -> link
|
||||
(attrValue "url" e) "" headerText
|
||||
_ -> headerText
|
||||
return $ header n headerText' <> noteBlocks <> bs
|
||||
parseBlock _ = return mempty
|
|
@ -124,6 +124,10 @@ tests = [ testGroup "markdown"
|
|||
, test "reader" ["-r", "mediawiki", "-w", "native", "-s"]
|
||||
"mediawiki-reader.wiki" "mediawiki-reader.native"
|
||||
]
|
||||
, testGroup "opml"
|
||||
[ test "reader" ["-r", "opml", "-w", "native", "-s"]
|
||||
"opml-reader.opml" "opml-reader.native"
|
||||
]
|
||||
, testGroup "other writers" $ map (\f -> testGroup f $ writerTests f)
|
||||
[ "opendocument" , "context" , "texinfo"
|
||||
, "man" , "plain" , "rtf", "org", "asciidoc"
|
||||
|
|
66
tests/opml-reader.native
Normal file
66
tests/opml-reader.native
Normal file
|
@ -0,0 +1,66 @@
|
|||
Pandoc (Meta {docTitle = [Str "states.opml"], docAuthors = [[Str "Dave",Space,Str "Winer"]], docDate = [Str "Thu,",Space,Str "14",Space,Str "Jul",Space,Str "2005",Space,Str "23:41:05",Space,Str "GMT"]})
|
||||
[Header 1 ("",[],[]) [Str "United",Space,Str "States"]
|
||||
,Header 2 ("",[],[]) [Str "Far",Space,Str "West"]
|
||||
,Header 3 ("",[],[]) [Str "Alaska"]
|
||||
,Header 3 ("",[],[]) [Str "California"]
|
||||
,Header 3 ("",[],[]) [Str "Hawaii"]
|
||||
,Header 3 ("",[],[]) [Strong [Str "Nevada"]]
|
||||
,Para [Str "I",Space,Str "lived",Space,Str "here",Space,Emph [Str "once"],Str "."]
|
||||
,Para [Str "Loved",Space,Str "it."]
|
||||
,Header 4 ("",[],[]) [Link [Str "Reno"] ("http://www.reno.gov","")]
|
||||
,Header 4 ("",[],[]) [Str "Las",Space,Str "Vegas"]
|
||||
,Header 4 ("",[],[]) [Str "Ely"]
|
||||
,Header 4 ("",[],[]) [Str "Gerlach"]
|
||||
,Header 3 ("",[],[]) [Str "Oregon"]
|
||||
,Header 3 ("",[],[]) [Str "Washington"]
|
||||
,Header 2 ("",[],[]) [Str "Great",Space,Str "Plains"]
|
||||
,Header 3 ("",[],[]) [Str "Kansas"]
|
||||
,Header 3 ("",[],[]) [Str "Nebraska"]
|
||||
,Header 3 ("",[],[]) [Str "North",Space,Str "Dakota"]
|
||||
,Header 3 ("",[],[]) [Str "Oklahoma"]
|
||||
,Header 3 ("",[],[]) [Str "South",Space,Str "Dakota"]
|
||||
,Header 2 ("",[],[]) [Str "Mid",Str "-",Str "Atlantic"]
|
||||
,Header 3 ("",[],[]) [Str "Delaware"]
|
||||
,Header 3 ("",[],[]) [Str "Maryland"]
|
||||
,Header 3 ("",[],[]) [Str "New",Space,Str "Jersey"]
|
||||
,Header 3 ("",[],[]) [Str "New",Space,Str "York"]
|
||||
,Header 3 ("",[],[]) [Str "Pennsylvania"]
|
||||
,Header 2 ("",[],[]) [Str "Midwest"]
|
||||
,Header 3 ("",[],[]) [Str "Illinois"]
|
||||
,Header 3 ("",[],[]) [Str "Indiana"]
|
||||
,Header 3 ("",[],[]) [Str "Iowa"]
|
||||
,Header 3 ("",[],[]) [Str "Kentucky"]
|
||||
,Header 3 ("",[],[]) [Str "Michigan"]
|
||||
,Header 3 ("",[],[]) [Str "Minnesota"]
|
||||
,Header 3 ("",[],[]) [Str "Missouri"]
|
||||
,Header 3 ("",[],[]) [Str "Ohio"]
|
||||
,Header 3 ("",[],[]) [Str "West",Space,Str "Virginia"]
|
||||
,Header 3 ("",[],[]) [Str "Wisconsin"]
|
||||
,Header 2 ("",[],[]) [Str "Mountains"]
|
||||
,Header 3 ("",[],[]) [Str "Colorado"]
|
||||
,Header 3 ("",[],[]) [Str "Idaho"]
|
||||
,Header 3 ("",[],[]) [Str "Montana"]
|
||||
,Header 3 ("",[],[]) [Str "Utah"]
|
||||
,Header 3 ("",[],[]) [Str "Wyoming"]
|
||||
,Header 2 ("",[],[]) [Str "New",Space,Str "England"]
|
||||
,Header 3 ("",[],[]) [Str "Connecticut"]
|
||||
,Header 3 ("",[],[]) [Str "Maine"]
|
||||
,Header 3 ("",[],[]) [Str "Massachusetts"]
|
||||
,Header 3 ("",[],[]) [Str "New",Space,Str "Hampshire"]
|
||||
,Header 3 ("",[],[]) [Str "Rhode",Space,Str "Island"]
|
||||
,Header 3 ("",[],[]) [Str "Vermont"]
|
||||
,Header 2 ("",[],[]) [Str "South"]
|
||||
,Header 3 ("",[],[]) [Str "Alabama"]
|
||||
,Header 3 ("",[],[]) [Str "Arkansas"]
|
||||
,Header 3 ("",[],[]) [Str "Florida"]
|
||||
,Header 3 ("",[],[]) [Str "Georgia"]
|
||||
,Header 3 ("",[],[]) [Str "Louisiana"]
|
||||
,Header 3 ("",[],[]) [Str "Mississippi"]
|
||||
,Header 3 ("",[],[]) [Str "North",Space,Str "Carolina"]
|
||||
,Header 3 ("",[],[]) [Str "South",Space,Str "Carolina"]
|
||||
,Header 3 ("",[],[]) [Str "Tennessee"]
|
||||
,Header 3 ("",[],[]) [Str "Virginia"]
|
||||
,Header 2 ("",[],[]) [Str "Southwest"]
|
||||
,Header 3 ("",[],[]) [Str "Arizona"]
|
||||
,Header 3 ("",[],[]) [Str "New",Space,Str "Mexico"]
|
||||
,Header 3 ("",[],[]) [Str "Texas"]]
|
91
tests/opml-reader.opml
Normal file
91
tests/opml-reader.opml
Normal file
|
@ -0,0 +1,91 @@
|
|||
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<opml version="2.0">
|
||||
<head>
|
||||
<title>states.opml</title>
|
||||
<dateCreated>Tue, 15 Mar 2005 16:35:45 GMT</dateCreated>
|
||||
<dateModified>Thu, 14 Jul 2005 23:41:05 GMT</dateModified>
|
||||
<ownerName>Dave Winer</ownerName>
|
||||
<ownerEmail>dave@scripting.com</ownerEmail>
|
||||
<expansionState>1, 6, 13, 16, 18, 20</expansionState>
|
||||
<vertScrollState>1</vertScrollState>
|
||||
<windowTop>106</windowTop>
|
||||
<windowLeft>106</windowLeft>
|
||||
<windowBottom>558</windowBottom>
|
||||
<windowRight>479</windowRight>
|
||||
</head>
|
||||
<body>
|
||||
<outline text="United States">
|
||||
<outline text="Far West">
|
||||
<outline text="Alaska"/>
|
||||
<outline text="California"/>
|
||||
<outline text="Hawaii"/>
|
||||
<outline text="<strong>Nevada</strong>" _note="I lived here *once*. Loved it.">
|
||||
<outline text="Reno" created="Tue, 12 Jul 2005 23:56:35 GMT" type="link" url="http://www.reno.gov"/>
|
||||
<outline text="Las Vegas" created="Tue, 12 Jul 2005 23:56:37 GMT"/>
|
||||
<outline text="Ely" created="Tue, 12 Jul 2005 23:56:39 GMT"/>
|
||||
<outline text="Gerlach" created="Tue, 12 Jul 2005 23:56:47 GMT"/>
|
||||
</outline>
|
||||
<outline text="Oregon"/>
|
||||
<outline text="Washington"/>
|
||||
</outline>
|
||||
<outline text="Great Plains">
|
||||
<outline text="Kansas"/>
|
||||
<outline text="Nebraska"/>
|
||||
<outline text="North Dakota"/>
|
||||
<outline text="Oklahoma"/>
|
||||
<outline text="South Dakota"/>
|
||||
</outline>
|
||||
<outline text="Mid-Atlantic">
|
||||
<outline text="Delaware"/>
|
||||
<outline text="Maryland"/>
|
||||
<outline text="New Jersey"/>
|
||||
<outline text="New York"/>
|
||||
<outline text="Pennsylvania"/>
|
||||
</outline>
|
||||
<outline text="Midwest">
|
||||
<outline text="Illinois"/>
|
||||
<outline text="Indiana"/>
|
||||
<outline text="Iowa"/>
|
||||
<outline text="Kentucky"/>
|
||||
<outline text="Michigan"/>
|
||||
<outline text="Minnesota"/>
|
||||
<outline text="Missouri"/>
|
||||
<outline text="Ohio"/>
|
||||
<outline text="West Virginia"/>
|
||||
<outline text="Wisconsin"/>
|
||||
</outline>
|
||||
<outline text="Mountains">
|
||||
<outline text="Colorado"/>
|
||||
<outline text="Idaho"/>
|
||||
<outline text="Montana"/>
|
||||
<outline text="Utah"/>
|
||||
<outline text="Wyoming"/>
|
||||
</outline>
|
||||
<outline text="New England">
|
||||
<outline text="Connecticut"/>
|
||||
<outline text="Maine"/>
|
||||
<outline text="Massachusetts"/>
|
||||
<outline text="New Hampshire"/>
|
||||
<outline text="Rhode Island"/>
|
||||
<outline text="Vermont"/>
|
||||
</outline>
|
||||
<outline text="South">
|
||||
<outline text="Alabama"/>
|
||||
<outline text="Arkansas"/>
|
||||
<outline text="Florida"/>
|
||||
<outline text="Georgia"/>
|
||||
<outline text="Louisiana"/>
|
||||
<outline text="Mississippi"/>
|
||||
<outline text="North Carolina"/>
|
||||
<outline text="South Carolina"/>
|
||||
<outline text="Tennessee"/>
|
||||
<outline text="Virginia"/>
|
||||
</outline>
|
||||
<outline text="Southwest">
|
||||
<outline text="Arizona"/>
|
||||
<outline text="New Mexico"/>
|
||||
<outline text="Texas"/>
|
||||
</outline>
|
||||
</outline>
|
||||
</body>
|
||||
</opml>
|
Loading…
Reference in a new issue