From 1927bc9aac0e822bd6179323e00fe38bee5a2cf3 Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 26 Apr 2018 22:33:18 +0300 Subject: [PATCH] Add FB2 reader (#4539) --- pandoc.cabal | 4 + src/Text/Pandoc/App.hs | 1 + src/Text/Pandoc/Readers.hs | 3 + src/Text/Pandoc/Readers/FB2.hs | 402 ++++++++++++++++++++++++++++++++ test/Tests/Readers/FB2.hs | 29 +++ test/fb2/reader/emphasis.fb2 | 11 + test/fb2/reader/emphasis.native | 6 + test/fb2/reader/epigraph.fb2 | 18 ++ test/fb2/reader/epigraph.native | 9 + test/fb2/reader/meta.fb2 | 26 +++ test/fb2/reader/meta.native | 2 + test/fb2/reader/poem.fb2 | 28 +++ test/fb2/reader/poem.native | 14 ++ test/fb2/reader/titles.fb2 | 18 ++ test/fb2/reader/titles.native | 8 + test/test-pandoc.hs | 2 + 16 files changed, 581 insertions(+) create mode 100644 src/Text/Pandoc/Readers/FB2.hs create mode 100644 test/Tests/Readers/FB2.hs create mode 100644 test/fb2/reader/emphasis.fb2 create mode 100644 test/fb2/reader/emphasis.native create mode 100644 test/fb2/reader/epigraph.fb2 create mode 100644 test/fb2/reader/epigraph.native create mode 100644 test/fb2/reader/meta.fb2 create mode 100644 test/fb2/reader/meta.native create mode 100644 test/fb2/reader/poem.fb2 create mode 100644 test/fb2/reader/poem.native create mode 100644 test/fb2/reader/titles.fb2 create mode 100644 test/fb2/reader/titles.native diff --git a/pandoc.cabal b/pandoc.cabal index 24fba87f7..383a35931 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -301,6 +301,8 @@ extra-source-files: test/fb2/images-embedded.html test/fb2/images-embedded.fb2 test/fb2/test-small.png + test/fb2/reader/*.fb2 + test/fb2/reader/*.native test/fb2/test.jpg test/docx/*.docx test/docx/golden/*.docx @@ -445,6 +447,7 @@ library Text.Pandoc.Readers.Odt, Text.Pandoc.Readers.EPUB, Text.Pandoc.Readers.Muse, + Text.Pandoc.Readers.FB2, Text.Pandoc.Writers, Text.Pandoc.Writers.Native, Text.Pandoc.Writers.Docbook, @@ -663,6 +666,7 @@ test-suite test-pandoc Tests.Readers.EPUB Tests.Readers.Muse Tests.Readers.Creole + Tests.Readers.FB2 Tests.Writers.Native Tests.Writers.ConTeXt Tests.Writers.Docbook diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 99277d434..9a3e00c9f 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -738,6 +738,7 @@ defaultReaderName fallback (x:xs) = ".odt" -> "odt" ".pdf" -> "pdf" -- so we get an "unknown reader" error ".doc" -> "doc" -- so we get an "unknown reader" error + ".fb2" -> "fb2" _ -> defaultReaderName fallback xs -- Determine default writer based on output file extension diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 680e7e0b0..7b7f92b35 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -65,6 +65,7 @@ module Text.Pandoc.Readers , readTxt2Tags , readEPUB , readMuse + , readFB2 -- * Miscellaneous , getReader , getDefaultExtensions @@ -86,6 +87,7 @@ import Text.Pandoc.Readers.Creole import Text.Pandoc.Readers.DocBook import Text.Pandoc.Readers.Docx import Text.Pandoc.Readers.EPUB +import Text.Pandoc.Readers.FB2 import Text.Pandoc.Readers.Haddock import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.JATS (readJATS) @@ -143,6 +145,7 @@ readers = [ ("native" , TextReader readNative) ,("t2t" , TextReader readTxt2Tags) ,("epub" , ByteStringReader readEPUB) ,("muse" , TextReader readMuse) + ,("fb2" , TextReader readFB2) ] -- | Retrieve reader, extensions based on formatSpec (format+extensions). diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs new file mode 100644 index 000000000..99b71922f --- /dev/null +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -0,0 +1,402 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TupleSections #-} +{- +Copyright (C) 2018 Alexander Krotov + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.FB2 + Copyright : Copyright (C) 2018 Alexander Krotov + License : GNU GPL, version 2 or above + + Maintainer : Alexander Krotov + Stability : alpha + Portability : portable + +Conversion of FB2 to 'Pandoc' document. +-} + +{- + +TODO: + - Tables + - Named styles + - Parse ID attribute for all elements that have it + +-} + +module Text.Pandoc.Readers.FB2 ( readFB2 ) where +import Prelude +import Control.Monad.Except (throwError) +import Control.Monad.State.Strict +import Data.ByteString.Lazy.Char8 ( pack ) +import Data.ByteString.Base64.Lazy +import Data.Char (isSpace, toUpper) +import Data.List (dropWhileEnd, intersperse) +import Data.List.Split (splitOn) +import Data.Text (Text) +import Data.Default +import Data.Maybe +import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Builder +import Text.Pandoc.Class (PandocMonad, insertMedia) +import Text.Pandoc.Error +import Text.Pandoc.Options +import Text.Pandoc.Shared (crFilter) +import Text.XML.Light + +type FB2 m = StateT FB2State m + +data FB2State = FB2State{ fb2SectionLevel :: Int + , fb2Meta :: Meta + , fb2Authors :: [String] + } deriving Show + +instance Default FB2State where + def = FB2State{ fb2SectionLevel = 1 + , fb2Meta = mempty + , fb2Authors = [] + } + +instance HasMeta FB2State where + setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)} + deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)} + +readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readFB2 _ inp = do + (bs, st) <- runStateT (mapM parseBlock $ parseXML (crFilter inp)) def + let authors = if null $ fb2Authors st + then id + else setMeta "author" (map text $ reverse $ fb2Authors st) + pure $ Pandoc (authors $ fb2Meta st) (toList . mconcat $ bs) + +-- * Utility functions + +trim :: String -> String +trim = dropWhileEnd isSpace . dropWhile isSpace + +removeHash :: String -> String +removeHash ('#':xs) = xs +removeHash xs = xs + +convertEntity :: String -> String +convertEntity e = fromMaybe (map toUpper e) (lookupEntity e) + +parseInline :: PandocMonad m => Content -> FB2 m Inlines +parseInline (Elem e) = + case qName $ elName e of + "strong" -> strong <$> parseStyleType e + "emphasis" -> emph <$> parseStyleType e + "style" -> parseNamedStyle e + "a" -> parseLinkType e + "strikethrough" -> strikeout <$> parseStyleType e + "sub" -> subscript <$> parseStyleType e + "sup" -> superscript <$> parseStyleType e + "code" -> pure $ code $ strContent e + "image" -> parseInlineImageElement e + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ ".") +parseInline (Text x) = pure $ text $ cdData x +parseInline (CRef r) = pure $ str $ convertEntity r + +parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks +parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel <*> parsePType e + +-- * Root element parser + +parseBlock :: PandocMonad m => Content -> FB2 m Blocks +parseBlock (Elem e) = + case qName $ elName e of + "?xml" -> pure mempty + "FictionBook" -> mconcat <$> mapM parseFictionBookChild (elChildren e) + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ ".") +parseBlock _ = pure mempty + +-- | Parse a child of @\@ element. +parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks +parseFictionBookChild e = + case qName $ elName e of + "stylesheet" -> pure mempty -- stylesheet is ignored + "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e) + "body" -> mconcat <$> mapM parseBodyChild (elChildren e) + "binary" -> mempty <$ parseBinaryElement e + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ "in FictionBook.") + +-- | Parse a child of @\@ element. +parseDescriptionChild :: PandocMonad m => Element -> FB2 m () +parseDescriptionChild e = + case qName $ elName e of + "title-info" -> mapM_ parseTitleInfoChild (elChildren e) + "src-title-info" -> pure () -- ignore + "document-info" -> pure () + "publish-info" -> pure () + "custom-info" -> pure () + "output" -> pure () + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ "in description.") + +-- | Parse a child of @\@ element. +parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks +parseBodyChild e = + case qName $ elName e of + "image" -> parseImageElement e + "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e) + "epigraph" -> parseEpigraph e + "section" -> parseSection e + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in body.") + +-- | Parse a @\@ element. +parseBinaryElement :: PandocMonad m => Element -> FB2 m () +parseBinaryElement e = + case (findAttr (QName "id" Nothing Nothing) e, findAttr (QName "content-type" Nothing Nothing) e) of + (Nothing, _) -> throwError $ PandocParseError " element must have an \"id\" attribute" + (Just _, Nothing) -> throwError $ PandocParseError " element must have a \"content-type\" attribute" + (Just filename, contentType) -> insertMedia filename contentType (decodeLenient (pack (strContent e))) + +-- * Type parsers + +-- | Parse @authorType@ +parseAuthor :: PandocMonad m => Element -> FB2 m String +parseAuthor e = unwords <$> mapM parseAuthorChild (elChildren e) + +parseAuthorChild :: PandocMonad m => Element -> FB2 m String +parseAuthorChild e = + case qName $ elName e of + "first-name" -> pure $ strContent e + "middle-name" -> pure $ strContent e + "last-name" -> pure $ strContent e + "nickname" -> pure $ strContent e + "home-page" -> pure $ strContent e + "email" -> pure $ strContent e + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in author.") + +-- | Parse @titleType@ +parseTitle :: PandocMonad m => Element -> FB2 m Blocks +parseTitle e = header <$> gets fb2SectionLevel <*> parseTitleType (elContent e) + +parseTitleType :: PandocMonad m => [Content] -> FB2 m Inlines +parseTitleType c = mconcat . intersperse linebreak . catMaybes <$> mapM parseTitleContent c + +parseTitleContent :: PandocMonad m => Content -> FB2 m (Maybe Inlines) +parseTitleContent (Elem e) = + case qName $ elName e of + "p" -> Just <$> parsePType e + "empty-line" -> pure $ Just mempty + _ -> pure mempty +parseTitleContent _ = pure Nothing + +-- | Parse @imageType@ +parseImageElement :: PandocMonad m => Element -> FB2 m Blocks +parseImageElement e = + case href of + Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt + Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: image without href." + where alt = maybe mempty str $ findAttr (QName "alt" Nothing Nothing) e + title = fromMaybe "" $ findAttr (QName "title" Nothing Nothing) e + imgId = fromMaybe "" $ findAttr (QName "id" Nothing Nothing) e + href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e + +-- | Parse @pType@ +parsePType :: PandocMonad m => Element -> FB2 m Inlines +parsePType = parseStyleType -- TODO add support for optional "id" and "style" attributes + +-- | Parse @citeType@ +parseCite :: PandocMonad m => Element -> FB2 m Blocks +parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e) + +-- | Parse @citeType@ child +parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks +parseCiteChild e = + case qName $ elName e of + "p" -> para <$> parsePType e + "poem" -> parsePoem e + "empty-line" -> pure horizontalRule + "subtitle" -> parseSubtitle e + "table" -> parseTable e + "text-author" -> para <$> parsePType e + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in cite.") + +-- | Parse @poemType@ +parsePoem :: PandocMonad m => Element -> FB2 m Blocks +parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e) + +parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks +parsePoemChild e = + case qName $ elName e of + "title" -> parseTitle e + "subtitle" -> parseSubtitle e + "epigraph" -> parseEpigraph e + "stanza" -> parseStanza e + "text-author" -> para <$> parsePType e + "date" -> pure $ para $ text $ strContent e + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in poem.") + +parseStanza :: PandocMonad m => Element -> FB2 m Blocks +parseStanza e = fromList . joinLineBlocks . toList . mconcat <$> mapM parseStanzaChild (elChildren e) + +joinLineBlocks :: [Block] -> [Block] +joinLineBlocks (LineBlock xs:LineBlock ys:zs) = joinLineBlocks (LineBlock (xs ++ ys) : zs) +joinLineBlocks (x:xs) = x:joinLineBlocks xs +joinLineBlocks [] = [] + +parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks +parseStanzaChild e = + case qName $ elName e of + "title" -> parseTitle e + "subtitle" -> parseSubtitle e + "v" -> lineBlock . (:[]) <$> parsePType e + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in stanza.") + +-- | Parse @epigraphType@ +parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks +parseEpigraph e = + divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e) + where divId = fromMaybe "" $ findAttr (QName "id" Nothing Nothing) e + +parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks +parseEpigraphChild e = + case qName $ elName e of + "p" -> para <$> parsePType e + "poem" -> parsePoem e + "cite" -> parseCite e + "empty-line" -> pure horizontalRule + "text-author" -> para <$> parsePType e + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in epigraph.") + +-- | Parse @annotationType@ +parseAnnotation :: PandocMonad m => Element -> FB2 m Blocks +parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e) + +parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks +parseAnnotationChild e = + case qName $ elName e of + "p" -> para <$> parsePType e + "poem" -> parsePoem e + "cite" -> parseCite e + "subtitle" -> parseSubtitle e + "table" -> parseTable e + "empty-line" -> pure horizontalRule + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in annotation.") + +-- | Parse @sectionType@ +parseSection :: PandocMonad m => Element -> FB2 m Blocks +parseSection e = do + n <- gets fb2SectionLevel + modify $ \st -> st{ fb2SectionLevel = n + 1 } + let sectionId = fromMaybe "" $ findAttr (QName "id" Nothing Nothing) e + bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e) + modify $ \st -> st{ fb2SectionLevel = n } + pure bs + +parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks +parseSectionChild e = + case qName $ elName e of + "title" -> parseBodyChild e + "epigraph" -> parseEpigraph e + "image" -> parseImageElement e + "annotation" -> parseAnnotation e + "poem" -> parsePoem e + "cite" -> parseCite e + "empty-line" -> pure horizontalRule + "table" -> parseTable e + "subtitle" -> parseSubtitle e + "p" -> para <$> parsePType e + "section" -> parseSection e + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in section.") + +-- | parse @styleType@ +parseStyleType :: PandocMonad m => Element -> FB2 m Inlines +parseStyleType e = mconcat <$> mapM parseInline (elContent e) + +-- | Parse @namedStyleType@ +parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines +parseNamedStyle e = do + content <- mconcat <$> mapM parseNamedStyleChild (elContent e) + let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e + case findAttr (QName "name" Nothing Nothing) e of + Just name -> pure $ spanWith ("", [name], lang) content + Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: link without required name." + +parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines +parseNamedStyleChild (Elem e) = + case qName (elName e) of + "strong" -> strong <$> parseStyleType e + "emphasis" -> emph <$> parseStyleType e + "style" -> parseNamedStyle e + "a" -> parseLinkType e + "strikethrough" -> strikeout <$> parseStyleType e + "sub" -> subscript <$> parseStyleType e + "sup" -> superscript <$> parseStyleType e + "code" -> pure $ code $ strContent e + "image" -> parseInlineImageElement e + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ ".") +parseNamedStyleChild x = parseInline x + +-- | Parse @linkType@ +parseLinkType :: PandocMonad m => Element -> FB2 m Inlines +parseLinkType e = do + content <- mconcat <$> mapM parseStyleLinkType (elContent e) + case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of + Just href -> pure $ link href "" content + Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: link without required href." + +-- | Parse @styleLinkType@ +parseStyleLinkType :: PandocMonad m => Content -> FB2 m Inlines +parseStyleLinkType x@(Elem e) = + case qName (elName e) of + "a" -> throwError $ PandocParseError "Couldn't parse FB2 file: links cannot be nested." + _ -> parseInline x +parseStyleLinkType x = parseInline x + +-- | Parse @tableType@ +parseTable :: PandocMonad m => Element -> FB2 m Blocks +parseTable _ = pure mempty -- TODO: tables are not supported yet + +-- | Parse @title-infoType@ +parseTitleInfoChild :: PandocMonad m => Element -> FB2 m () +parseTitleInfoChild e = + case qName (elName e) of + "genre" -> pure () + "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st}) + "book-title" -> modify (setMeta "title" (text $ strContent e)) + "annotation" -> parseAnnotation e >>= modify . setMeta "abstract" + "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ splitOn "," $ strContent e)) + "date" -> modify (setMeta "date" (text $ strContent e)) + "coverpage" -> parseCoverPage e + "lang" -> pure () + "src-lang" -> pure () + "translator" -> pure () + "sequence" -> pure () + name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in title-info.") + +parseCoverPage :: PandocMonad m => Element -> FB2 m () +parseCoverPage e = + case findChild (QName "image" (Just "http://www.gribuser.ru/xml/fictionbook/2.0") Nothing) e of + Just img -> case href of + Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src)) + Nothing -> pure () + where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img + Nothing -> pure () + +-- | Parse @inlineImageType@ element +parseInlineImageElement :: PandocMonad m + => Element + -> FB2 m Inlines +parseInlineImageElement e = + case href of + Just src -> pure $ imageWith ("", [], []) (removeHash src) "" alt + Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: inline image without href." + where alt = maybe mempty str $ findAttr (QName "alt" Nothing Nothing) e + href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e diff --git a/test/Tests/Readers/FB2.hs b/test/Tests/Readers/FB2.hs new file mode 100644 index 000000000..9b2983d57 --- /dev/null +++ b/test/Tests/Readers/FB2.hs @@ -0,0 +1,29 @@ +{-# LANGUAGE NoImplicitPrelude #-} + +module Tests.Readers.FB2 (tests) where + +import Prelude +import Test.Tasty +import Tests.Helpers +import Test.Tasty.Golden (goldenVsString) +import qualified Data.ByteString as BS +import Text.Pandoc +import Text.Pandoc.UTF8 (toText, fromTextLazy) +import Data.Text (Text) +import Data.Text.Lazy (fromStrict) +import System.FilePath (replaceExtension) + +fb2ToNative :: Text -> Text +fb2ToNative = purely (writeNative def{ writerTemplate = Just "" }) . purely (readFB2 def) + +fb2Test :: TestName -> FilePath -> TestTree +fb2Test name path = goldenVsString name native (fromTextLazy . fromStrict . fb2ToNative . toText <$> BS.readFile path) + where native = replaceExtension path ".native" + +tests :: [TestTree] +tests = [ fb2Test "Emphasis" "fb2/reader/emphasis.fb2" + , fb2Test "Titles" "fb2/reader/titles.fb2" + , fb2Test "Epigraph" "fb2/reader/epigraph.fb2" + , fb2Test "Poem" "fb2/reader/poem.fb2" + , fb2Test "Meta" "fb2/reader/meta.fb2" + ] diff --git a/test/fb2/reader/emphasis.fb2 b/test/fb2/reader/emphasis.fb2 new file mode 100644 index 000000000..1a936a9d0 --- /dev/null +++ b/test/fb2/reader/emphasis.fb2 @@ -0,0 +1,11 @@ + + + +
+

Plain, strong, emphasis, strong emphasis, emphasized strong.

+

Strikethrough: deleted

+

Subscript and superscript

+

Some code

+
+ +
diff --git a/test/fb2/reader/emphasis.native b/test/fb2/reader/emphasis.native new file mode 100644 index 000000000..422e7bb15 --- /dev/null +++ b/test/fb2/reader/emphasis.native @@ -0,0 +1,6 @@ +Pandoc (Meta {unMeta = fromList []}) +[Div ("",["section"],[]) + [Para [Str "Plain,",Space,Strong [Str "strong"],Str ",",Space,Emph [Str "emphasis"],Str ",",Space,Strong [Emph [Str "strong",Space,Str "emphasis"]],Str ",",Space,Emph [Strong [Str "emphasized",Space,Str "strong"]],Str "."] + ,Para [Str "Strikethrough:",Space,Strikeout [Str "deleted"]] + ,Para [Subscript [Str "Subscript"],Space,Str "and",Space,Superscript [Str "superscript"]] + ,Para [Str "Some",Space,Code ("",[],[]) "code"]]] diff --git a/test/fb2/reader/epigraph.fb2 b/test/fb2/reader/epigraph.fb2 new file mode 100644 index 000000000..5bb5cd2ef --- /dev/null +++ b/test/fb2/reader/epigraph.fb2 @@ -0,0 +1,18 @@ + + + + +

Body epigraph

+
+
+ +

Section epigraph

+
+
+ +

Subsection epigraph

+
+
+
+ +
diff --git a/test/fb2/reader/epigraph.native b/test/fb2/reader/epigraph.native new file mode 100644 index 000000000..a58a3e05b --- /dev/null +++ b/test/fb2/reader/epigraph.native @@ -0,0 +1,9 @@ +Pandoc (Meta {unMeta = fromList []}) +[Div ("",["epigraph"],[]) + [Para [Str "Body",Space,Str "epigraph"]] +,Div ("",["section"],[]) + [Div ("",["epigraph"],[]) + [Para [Str "Section",Space,Str "epigraph"]] + ,Div ("",["section"],[]) + [Div ("",["epigraph"],[]) + [Para [Str "Subsection",Space,Str "epigraph"]]]]] diff --git a/test/fb2/reader/meta.fb2 b/test/fb2/reader/meta.fb2 new file mode 100644 index 000000000..7e1736d64 --- /dev/null +++ b/test/fb2/reader/meta.fb2 @@ -0,0 +1,26 @@ + + + + + + First + Middle + Last + + + Another + Author + + Book title + +

Book annotation

+

Second paragraph of book annotation

+
+ foo, bar, baz + 2018 +
+
+ + <p>Body title</p> + +
diff --git a/test/fb2/reader/meta.native b/test/fb2/reader/meta.native new file mode 100644 index 000000000..71a8795b6 --- /dev/null +++ b/test/fb2/reader/meta.native @@ -0,0 +1,2 @@ +Pandoc (Meta {unMeta = fromList [("abstract",MetaBlocks [Para [Str "Book",Space,Str "annotation"],Para [Str "Second",Space,Str "paragraph",Space,Str "of",Space,Str "book",Space,Str "annotation"]]),("author",MetaList [MetaInlines [Str "First",Space,Str "Middle",Space,Str "Last"],MetaInlines [Str "Another",Space,Str "Author"]]),("date",MetaInlines [Str "2018"]),("keywords",MetaList [MetaString "foo",MetaString "bar",MetaString "baz"]),("title",MetaInlines [Str "Book",Space,Str "title"])]}) +[Header 1 ("",[],[]) [Str "Body",Space,Str "title"]] diff --git a/test/fb2/reader/poem.fb2 b/test/fb2/reader/poem.fb2 new file mode 100644 index 000000000..fcf4a0c02 --- /dev/null +++ b/test/fb2/reader/poem.fb2 @@ -0,0 +1,28 @@ + + + +
+ + + <p>Poem title</p> + + +

Poem epigraph

+
+ + Subtitle + + <p>First stanza title</p> + + Verse + More verse + + + One more stanza + + Author + April 2018 +
+
+ +
diff --git a/test/fb2/reader/poem.native b/test/fb2/reader/poem.native new file mode 100644 index 000000000..67be6a672 --- /dev/null +++ b/test/fb2/reader/poem.native @@ -0,0 +1,14 @@ +Pandoc (Meta {unMeta = fromList []}) +[Div ("",["section"],[]) + [Header 2 ("",[],[]) [Str "Poem",Space,Str "title"] + ,Div ("",["epigraph"],[]) + [Para [Str "Poem",Space,Str "epigraph"]] + ,Header 2 ("",["unnumbered"],[]) [Str "Subtitle"] + ,Header 2 ("",[],[]) [Str "First",Space,Str "stanza",Space,Str "title"] + ,LineBlock + [[Str "Verse"] + ,[Emph [Str "More"],Space,Str "verse"]] + ,LineBlock + [[Str "One",Space,Str "more",Space,Str "stanza"]] + ,Para [Str "Author"] + ,Para [Str "April",Space,Str "2018"]]] diff --git a/test/fb2/reader/titles.fb2 b/test/fb2/reader/titles.fb2 new file mode 100644 index 000000000..cfe9588d8 --- /dev/null +++ b/test/fb2/reader/titles.fb2 @@ -0,0 +1,18 @@ + + + + <p>Body title</p> +
+ <p>Section title</p> +
+ + <p>Subsection title</p> + <p>with multiple paragraphs</p> + +
+
+ <p>Another subsection title</p> +
+
+ +
diff --git a/test/fb2/reader/titles.native b/test/fb2/reader/titles.native new file mode 100644 index 000000000..a6c34f5ea --- /dev/null +++ b/test/fb2/reader/titles.native @@ -0,0 +1,8 @@ +Pandoc (Meta {unMeta = fromList []}) +[Header 1 ("",[],[]) [Str "Body",Space,Str "title"] +,Div ("",["section"],[]) + [Header 2 ("",[],[]) [Str "Section",Space,Str "title"] + ,Div ("",["section"],[]) + [Header 3 ("",[],[]) [Str "Subsection",Space,Str "title",LineBreak,Str "with",Space,Str "multiple",Space,Str "paragraphs"]] + ,Div ("",["section"],[]) + [Header 3 ("",[],[]) [Str "Another",Space,Str "subsection",Space,Str "title"]]]] diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs index 8613d5dda..b70d2286c 100644 --- a/test/test-pandoc.hs +++ b/test/test-pandoc.hs @@ -12,6 +12,7 @@ import qualified Tests.Old import qualified Tests.Readers.Creole import qualified Tests.Readers.Docx import qualified Tests.Readers.EPUB +import qualified Tests.Readers.FB2 import qualified Tests.Readers.HTML import qualified Tests.Readers.JATS import qualified Tests.Readers.LaTeX @@ -75,6 +76,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests , testGroup "EPUB" Tests.Readers.EPUB.tests , testGroup "Muse" Tests.Readers.Muse.tests , testGroup "Creole" Tests.Readers.Creole.tests + , testGroup "FB2" Tests.Readers.FB2.tests ] , testGroup "Lua filters" Tests.Lua.tests ]