Add FB2 reader (#4539)
This commit is contained in:
parent
5f0d407279
commit
1927bc9aac
16 changed files with 581 additions and 0 deletions
|
@ -301,6 +301,8 @@ extra-source-files:
|
|||
test/fb2/images-embedded.html
|
||||
test/fb2/images-embedded.fb2
|
||||
test/fb2/test-small.png
|
||||
test/fb2/reader/*.fb2
|
||||
test/fb2/reader/*.native
|
||||
test/fb2/test.jpg
|
||||
test/docx/*.docx
|
||||
test/docx/golden/*.docx
|
||||
|
@ -445,6 +447,7 @@ library
|
|||
Text.Pandoc.Readers.Odt,
|
||||
Text.Pandoc.Readers.EPUB,
|
||||
Text.Pandoc.Readers.Muse,
|
||||
Text.Pandoc.Readers.FB2,
|
||||
Text.Pandoc.Writers,
|
||||
Text.Pandoc.Writers.Native,
|
||||
Text.Pandoc.Writers.Docbook,
|
||||
|
@ -663,6 +666,7 @@ test-suite test-pandoc
|
|||
Tests.Readers.EPUB
|
||||
Tests.Readers.Muse
|
||||
Tests.Readers.Creole
|
||||
Tests.Readers.FB2
|
||||
Tests.Writers.Native
|
||||
Tests.Writers.ConTeXt
|
||||
Tests.Writers.Docbook
|
||||
|
|
|
@ -738,6 +738,7 @@ defaultReaderName fallback (x:xs) =
|
|||
".odt" -> "odt"
|
||||
".pdf" -> "pdf" -- so we get an "unknown reader" error
|
||||
".doc" -> "doc" -- so we get an "unknown reader" error
|
||||
".fb2" -> "fb2"
|
||||
_ -> defaultReaderName fallback xs
|
||||
|
||||
-- Determine default writer based on output file extension
|
||||
|
|
|
@ -65,6 +65,7 @@ module Text.Pandoc.Readers
|
|||
, readTxt2Tags
|
||||
, readEPUB
|
||||
, readMuse
|
||||
, readFB2
|
||||
-- * Miscellaneous
|
||||
, getReader
|
||||
, getDefaultExtensions
|
||||
|
@ -86,6 +87,7 @@ import Text.Pandoc.Readers.Creole
|
|||
import Text.Pandoc.Readers.DocBook
|
||||
import Text.Pandoc.Readers.Docx
|
||||
import Text.Pandoc.Readers.EPUB
|
||||
import Text.Pandoc.Readers.FB2
|
||||
import Text.Pandoc.Readers.Haddock
|
||||
import Text.Pandoc.Readers.HTML (readHtml)
|
||||
import Text.Pandoc.Readers.JATS (readJATS)
|
||||
|
@ -143,6 +145,7 @@ readers = [ ("native" , TextReader readNative)
|
|||
,("t2t" , TextReader readTxt2Tags)
|
||||
,("epub" , ByteStringReader readEPUB)
|
||||
,("muse" , TextReader readMuse)
|
||||
,("fb2" , TextReader readFB2)
|
||||
]
|
||||
|
||||
-- | Retrieve reader, extensions based on formatSpec (format+extensions).
|
||||
|
|
402
src/Text/Pandoc/Readers/FB2.hs
Normal file
402
src/Text/Pandoc/Readers/FB2.hs
Normal file
|
@ -0,0 +1,402 @@
|
|||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE TupleSections #-}
|
||||
{-
|
||||
Copyright (C) 2018 Alexander Krotov <ilabdsf@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
-}
|
||||
|
||||
{- |
|
||||
Module : Text.Pandoc.Readers.FB2
|
||||
Copyright : Copyright (C) 2018 Alexander Krotov
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : Alexander Krotov <ilabdsf@gmail.com>
|
||||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
Conversion of FB2 to 'Pandoc' document.
|
||||
-}
|
||||
|
||||
{-
|
||||
|
||||
TODO:
|
||||
- Tables
|
||||
- Named styles
|
||||
- Parse ID attribute for all elements that have it
|
||||
|
||||
-}
|
||||
|
||||
module Text.Pandoc.Readers.FB2 ( readFB2 ) where
|
||||
import Prelude
|
||||
import Control.Monad.Except (throwError)
|
||||
import Control.Monad.State.Strict
|
||||
import Data.ByteString.Lazy.Char8 ( pack )
|
||||
import Data.ByteString.Base64.Lazy
|
||||
import Data.Char (isSpace, toUpper)
|
||||
import Data.List (dropWhileEnd, intersperse)
|
||||
import Data.List.Split (splitOn)
|
||||
import Data.Text (Text)
|
||||
import Data.Default
|
||||
import Data.Maybe
|
||||
import Text.HTML.TagSoup.Entity (lookupEntity)
|
||||
import Text.Pandoc.Builder
|
||||
import Text.Pandoc.Class (PandocMonad, insertMedia)
|
||||
import Text.Pandoc.Error
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Shared (crFilter)
|
||||
import Text.XML.Light
|
||||
|
||||
type FB2 m = StateT FB2State m
|
||||
|
||||
data FB2State = FB2State{ fb2SectionLevel :: Int
|
||||
, fb2Meta :: Meta
|
||||
, fb2Authors :: [String]
|
||||
} deriving Show
|
||||
|
||||
instance Default FB2State where
|
||||
def = FB2State{ fb2SectionLevel = 1
|
||||
, fb2Meta = mempty
|
||||
, fb2Authors = []
|
||||
}
|
||||
|
||||
instance HasMeta FB2State where
|
||||
setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)}
|
||||
deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)}
|
||||
|
||||
readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
|
||||
readFB2 _ inp = do
|
||||
(bs, st) <- runStateT (mapM parseBlock $ parseXML (crFilter inp)) def
|
||||
let authors = if null $ fb2Authors st
|
||||
then id
|
||||
else setMeta "author" (map text $ reverse $ fb2Authors st)
|
||||
pure $ Pandoc (authors $ fb2Meta st) (toList . mconcat $ bs)
|
||||
|
||||
-- * Utility functions
|
||||
|
||||
trim :: String -> String
|
||||
trim = dropWhileEnd isSpace . dropWhile isSpace
|
||||
|
||||
removeHash :: String -> String
|
||||
removeHash ('#':xs) = xs
|
||||
removeHash xs = xs
|
||||
|
||||
convertEntity :: String -> String
|
||||
convertEntity e = fromMaybe (map toUpper e) (lookupEntity e)
|
||||
|
||||
parseInline :: PandocMonad m => Content -> FB2 m Inlines
|
||||
parseInline (Elem e) =
|
||||
case qName $ elName e of
|
||||
"strong" -> strong <$> parseStyleType e
|
||||
"emphasis" -> emph <$> parseStyleType e
|
||||
"style" -> parseNamedStyle e
|
||||
"a" -> parseLinkType e
|
||||
"strikethrough" -> strikeout <$> parseStyleType e
|
||||
"sub" -> subscript <$> parseStyleType e
|
||||
"sup" -> superscript <$> parseStyleType e
|
||||
"code" -> pure $ code $ strContent e
|
||||
"image" -> parseInlineImageElement e
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ ".")
|
||||
parseInline (Text x) = pure $ text $ cdData x
|
||||
parseInline (CRef r) = pure $ str $ convertEntity r
|
||||
|
||||
parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel <*> parsePType e
|
||||
|
||||
-- * Root element parser
|
||||
|
||||
parseBlock :: PandocMonad m => Content -> FB2 m Blocks
|
||||
parseBlock (Elem e) =
|
||||
case qName $ elName e of
|
||||
"?xml" -> pure mempty
|
||||
"FictionBook" -> mconcat <$> mapM parseFictionBookChild (elChildren e)
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ ".")
|
||||
parseBlock _ = pure mempty
|
||||
|
||||
-- | Parse a child of @\<FictionBook>@ element.
|
||||
parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseFictionBookChild e =
|
||||
case qName $ elName e of
|
||||
"stylesheet" -> pure mempty -- stylesheet is ignored
|
||||
"description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e)
|
||||
"body" -> mconcat <$> mapM parseBodyChild (elChildren e)
|
||||
"binary" -> mempty <$ parseBinaryElement e
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ "in FictionBook.")
|
||||
|
||||
-- | Parse a child of @\<description>@ element.
|
||||
parseDescriptionChild :: PandocMonad m => Element -> FB2 m ()
|
||||
parseDescriptionChild e =
|
||||
case qName $ elName e of
|
||||
"title-info" -> mapM_ parseTitleInfoChild (elChildren e)
|
||||
"src-title-info" -> pure () -- ignore
|
||||
"document-info" -> pure ()
|
||||
"publish-info" -> pure ()
|
||||
"custom-info" -> pure ()
|
||||
"output" -> pure ()
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ "in description.")
|
||||
|
||||
-- | Parse a child of @\<body>@ element.
|
||||
parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseBodyChild e =
|
||||
case qName $ elName e of
|
||||
"image" -> parseImageElement e
|
||||
"title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
|
||||
"epigraph" -> parseEpigraph e
|
||||
"section" -> parseSection e
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in body.")
|
||||
|
||||
-- | Parse a @\<binary>@ element.
|
||||
parseBinaryElement :: PandocMonad m => Element -> FB2 m ()
|
||||
parseBinaryElement e =
|
||||
case (findAttr (QName "id" Nothing Nothing) e, findAttr (QName "content-type" Nothing Nothing) e) of
|
||||
(Nothing, _) -> throwError $ PandocParseError "<binary> element must have an \"id\" attribute"
|
||||
(Just _, Nothing) -> throwError $ PandocParseError "<binary> element must have a \"content-type\" attribute"
|
||||
(Just filename, contentType) -> insertMedia filename contentType (decodeLenient (pack (strContent e)))
|
||||
|
||||
-- * Type parsers
|
||||
|
||||
-- | Parse @authorType@
|
||||
parseAuthor :: PandocMonad m => Element -> FB2 m String
|
||||
parseAuthor e = unwords <$> mapM parseAuthorChild (elChildren e)
|
||||
|
||||
parseAuthorChild :: PandocMonad m => Element -> FB2 m String
|
||||
parseAuthorChild e =
|
||||
case qName $ elName e of
|
||||
"first-name" -> pure $ strContent e
|
||||
"middle-name" -> pure $ strContent e
|
||||
"last-name" -> pure $ strContent e
|
||||
"nickname" -> pure $ strContent e
|
||||
"home-page" -> pure $ strContent e
|
||||
"email" -> pure $ strContent e
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in author.")
|
||||
|
||||
-- | Parse @titleType@
|
||||
parseTitle :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseTitle e = header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
|
||||
|
||||
parseTitleType :: PandocMonad m => [Content] -> FB2 m Inlines
|
||||
parseTitleType c = mconcat . intersperse linebreak . catMaybes <$> mapM parseTitleContent c
|
||||
|
||||
parseTitleContent :: PandocMonad m => Content -> FB2 m (Maybe Inlines)
|
||||
parseTitleContent (Elem e) =
|
||||
case qName $ elName e of
|
||||
"p" -> Just <$> parsePType e
|
||||
"empty-line" -> pure $ Just mempty
|
||||
_ -> pure mempty
|
||||
parseTitleContent _ = pure Nothing
|
||||
|
||||
-- | Parse @imageType@
|
||||
parseImageElement :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseImageElement e =
|
||||
case href of
|
||||
Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt
|
||||
Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: image without href."
|
||||
where alt = maybe mempty str $ findAttr (QName "alt" Nothing Nothing) e
|
||||
title = fromMaybe "" $ findAttr (QName "title" Nothing Nothing) e
|
||||
imgId = fromMaybe "" $ findAttr (QName "id" Nothing Nothing) e
|
||||
href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
|
||||
|
||||
-- | Parse @pType@
|
||||
parsePType :: PandocMonad m => Element -> FB2 m Inlines
|
||||
parsePType = parseStyleType -- TODO add support for optional "id" and "style" attributes
|
||||
|
||||
-- | Parse @citeType@
|
||||
parseCite :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e)
|
||||
|
||||
-- | Parse @citeType@ child
|
||||
parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseCiteChild e =
|
||||
case qName $ elName e of
|
||||
"p" -> para <$> parsePType e
|
||||
"poem" -> parsePoem e
|
||||
"empty-line" -> pure horizontalRule
|
||||
"subtitle" -> parseSubtitle e
|
||||
"table" -> parseTable e
|
||||
"text-author" -> para <$> parsePType e
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in cite.")
|
||||
|
||||
-- | Parse @poemType@
|
||||
parsePoem :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e)
|
||||
|
||||
parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parsePoemChild e =
|
||||
case qName $ elName e of
|
||||
"title" -> parseTitle e
|
||||
"subtitle" -> parseSubtitle e
|
||||
"epigraph" -> parseEpigraph e
|
||||
"stanza" -> parseStanza e
|
||||
"text-author" -> para <$> parsePType e
|
||||
"date" -> pure $ para $ text $ strContent e
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in poem.")
|
||||
|
||||
parseStanza :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseStanza e = fromList . joinLineBlocks . toList . mconcat <$> mapM parseStanzaChild (elChildren e)
|
||||
|
||||
joinLineBlocks :: [Block] -> [Block]
|
||||
joinLineBlocks (LineBlock xs:LineBlock ys:zs) = joinLineBlocks (LineBlock (xs ++ ys) : zs)
|
||||
joinLineBlocks (x:xs) = x:joinLineBlocks xs
|
||||
joinLineBlocks [] = []
|
||||
|
||||
parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseStanzaChild e =
|
||||
case qName $ elName e of
|
||||
"title" -> parseTitle e
|
||||
"subtitle" -> parseSubtitle e
|
||||
"v" -> lineBlock . (:[]) <$> parsePType e
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in stanza.")
|
||||
|
||||
-- | Parse @epigraphType@
|
||||
parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseEpigraph e =
|
||||
divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e)
|
||||
where divId = fromMaybe "" $ findAttr (QName "id" Nothing Nothing) e
|
||||
|
||||
parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseEpigraphChild e =
|
||||
case qName $ elName e of
|
||||
"p" -> para <$> parsePType e
|
||||
"poem" -> parsePoem e
|
||||
"cite" -> parseCite e
|
||||
"empty-line" -> pure horizontalRule
|
||||
"text-author" -> para <$> parsePType e
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in epigraph.")
|
||||
|
||||
-- | Parse @annotationType@
|
||||
parseAnnotation :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e)
|
||||
|
||||
parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseAnnotationChild e =
|
||||
case qName $ elName e of
|
||||
"p" -> para <$> parsePType e
|
||||
"poem" -> parsePoem e
|
||||
"cite" -> parseCite e
|
||||
"subtitle" -> parseSubtitle e
|
||||
"table" -> parseTable e
|
||||
"empty-line" -> pure horizontalRule
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in annotation.")
|
||||
|
||||
-- | Parse @sectionType@
|
||||
parseSection :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseSection e = do
|
||||
n <- gets fb2SectionLevel
|
||||
modify $ \st -> st{ fb2SectionLevel = n + 1 }
|
||||
let sectionId = fromMaybe "" $ findAttr (QName "id" Nothing Nothing) e
|
||||
bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e)
|
||||
modify $ \st -> st{ fb2SectionLevel = n }
|
||||
pure bs
|
||||
|
||||
parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseSectionChild e =
|
||||
case qName $ elName e of
|
||||
"title" -> parseBodyChild e
|
||||
"epigraph" -> parseEpigraph e
|
||||
"image" -> parseImageElement e
|
||||
"annotation" -> parseAnnotation e
|
||||
"poem" -> parsePoem e
|
||||
"cite" -> parseCite e
|
||||
"empty-line" -> pure horizontalRule
|
||||
"table" -> parseTable e
|
||||
"subtitle" -> parseSubtitle e
|
||||
"p" -> para <$> parsePType e
|
||||
"section" -> parseSection e
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in section.")
|
||||
|
||||
-- | parse @styleType@
|
||||
parseStyleType :: PandocMonad m => Element -> FB2 m Inlines
|
||||
parseStyleType e = mconcat <$> mapM parseInline (elContent e)
|
||||
|
||||
-- | Parse @namedStyleType@
|
||||
parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines
|
||||
parseNamedStyle e = do
|
||||
content <- mconcat <$> mapM parseNamedStyleChild (elContent e)
|
||||
let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e
|
||||
case findAttr (QName "name" Nothing Nothing) e of
|
||||
Just name -> pure $ spanWith ("", [name], lang) content
|
||||
Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: link without required name."
|
||||
|
||||
parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines
|
||||
parseNamedStyleChild (Elem e) =
|
||||
case qName (elName e) of
|
||||
"strong" -> strong <$> parseStyleType e
|
||||
"emphasis" -> emph <$> parseStyleType e
|
||||
"style" -> parseNamedStyle e
|
||||
"a" -> parseLinkType e
|
||||
"strikethrough" -> strikeout <$> parseStyleType e
|
||||
"sub" -> subscript <$> parseStyleType e
|
||||
"sup" -> superscript <$> parseStyleType e
|
||||
"code" -> pure $ code $ strContent e
|
||||
"image" -> parseInlineImageElement e
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ ".")
|
||||
parseNamedStyleChild x = parseInline x
|
||||
|
||||
-- | Parse @linkType@
|
||||
parseLinkType :: PandocMonad m => Element -> FB2 m Inlines
|
||||
parseLinkType e = do
|
||||
content <- mconcat <$> mapM parseStyleLinkType (elContent e)
|
||||
case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
|
||||
Just href -> pure $ link href "" content
|
||||
Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: link without required href."
|
||||
|
||||
-- | Parse @styleLinkType@
|
||||
parseStyleLinkType :: PandocMonad m => Content -> FB2 m Inlines
|
||||
parseStyleLinkType x@(Elem e) =
|
||||
case qName (elName e) of
|
||||
"a" -> throwError $ PandocParseError "Couldn't parse FB2 file: links cannot be nested."
|
||||
_ -> parseInline x
|
||||
parseStyleLinkType x = parseInline x
|
||||
|
||||
-- | Parse @tableType@
|
||||
parseTable :: PandocMonad m => Element -> FB2 m Blocks
|
||||
parseTable _ = pure mempty -- TODO: tables are not supported yet
|
||||
|
||||
-- | Parse @title-infoType@
|
||||
parseTitleInfoChild :: PandocMonad m => Element -> FB2 m ()
|
||||
parseTitleInfoChild e =
|
||||
case qName (elName e) of
|
||||
"genre" -> pure ()
|
||||
"author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st})
|
||||
"book-title" -> modify (setMeta "title" (text $ strContent e))
|
||||
"annotation" -> parseAnnotation e >>= modify . setMeta "abstract"
|
||||
"keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ splitOn "," $ strContent e))
|
||||
"date" -> modify (setMeta "date" (text $ strContent e))
|
||||
"coverpage" -> parseCoverPage e
|
||||
"lang" -> pure ()
|
||||
"src-lang" -> pure ()
|
||||
"translator" -> pure ()
|
||||
"sequence" -> pure ()
|
||||
name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in title-info.")
|
||||
|
||||
parseCoverPage :: PandocMonad m => Element -> FB2 m ()
|
||||
parseCoverPage e =
|
||||
case findChild (QName "image" (Just "http://www.gribuser.ru/xml/fictionbook/2.0") Nothing) e of
|
||||
Just img -> case href of
|
||||
Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src))
|
||||
Nothing -> pure ()
|
||||
where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
|
||||
Nothing -> pure ()
|
||||
|
||||
-- | Parse @inlineImageType@ element
|
||||
parseInlineImageElement :: PandocMonad m
|
||||
=> Element
|
||||
-> FB2 m Inlines
|
||||
parseInlineImageElement e =
|
||||
case href of
|
||||
Just src -> pure $ imageWith ("", [], []) (removeHash src) "" alt
|
||||
Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: inline image without href."
|
||||
where alt = maybe mempty str $ findAttr (QName "alt" Nothing Nothing) e
|
||||
href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
|
29
test/Tests/Readers/FB2.hs
Normal file
29
test/Tests/Readers/FB2.hs
Normal file
|
@ -0,0 +1,29 @@
|
|||
{-# LANGUAGE NoImplicitPrelude #-}
|
||||
|
||||
module Tests.Readers.FB2 (tests) where
|
||||
|
||||
import Prelude
|
||||
import Test.Tasty
|
||||
import Tests.Helpers
|
||||
import Test.Tasty.Golden (goldenVsString)
|
||||
import qualified Data.ByteString as BS
|
||||
import Text.Pandoc
|
||||
import Text.Pandoc.UTF8 (toText, fromTextLazy)
|
||||
import Data.Text (Text)
|
||||
import Data.Text.Lazy (fromStrict)
|
||||
import System.FilePath (replaceExtension)
|
||||
|
||||
fb2ToNative :: Text -> Text
|
||||
fb2ToNative = purely (writeNative def{ writerTemplate = Just "" }) . purely (readFB2 def)
|
||||
|
||||
fb2Test :: TestName -> FilePath -> TestTree
|
||||
fb2Test name path = goldenVsString name native (fromTextLazy . fromStrict . fb2ToNative . toText <$> BS.readFile path)
|
||||
where native = replaceExtension path ".native"
|
||||
|
||||
tests :: [TestTree]
|
||||
tests = [ fb2Test "Emphasis" "fb2/reader/emphasis.fb2"
|
||||
, fb2Test "Titles" "fb2/reader/titles.fb2"
|
||||
, fb2Test "Epigraph" "fb2/reader/epigraph.fb2"
|
||||
, fb2Test "Poem" "fb2/reader/poem.fb2"
|
||||
, fb2Test "Meta" "fb2/reader/meta.fb2"
|
||||
]
|
11
test/fb2/reader/emphasis.fb2
Normal file
11
test/fb2/reader/emphasis.fb2
Normal file
|
@ -0,0 +1,11 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
|
||||
<body>
|
||||
<section>
|
||||
<p>Plain, <strong>strong</strong>, <emphasis>emphasis</emphasis>, <strong><emphasis>strong emphasis</emphasis></strong>, <emphasis><strong>emphasized strong</strong></emphasis>.</p>
|
||||
<p>Strikethrough: <strikethrough>deleted</strikethrough></p>
|
||||
<p><sub>Subscript</sub> and <sup>superscript</sup></p>
|
||||
<p>Some <code>code</code></p>
|
||||
</section>
|
||||
</body>
|
||||
</FictionBook>
|
6
test/fb2/reader/emphasis.native
Normal file
6
test/fb2/reader/emphasis.native
Normal file
|
@ -0,0 +1,6 @@
|
|||
Pandoc (Meta {unMeta = fromList []})
|
||||
[Div ("",["section"],[])
|
||||
[Para [Str "Plain,",Space,Strong [Str "strong"],Str ",",Space,Emph [Str "emphasis"],Str ",",Space,Strong [Emph [Str "strong",Space,Str "emphasis"]],Str ",",Space,Emph [Strong [Str "emphasized",Space,Str "strong"]],Str "."]
|
||||
,Para [Str "Strikethrough:",Space,Strikeout [Str "deleted"]]
|
||||
,Para [Subscript [Str "Subscript"],Space,Str "and",Space,Superscript [Str "superscript"]]
|
||||
,Para [Str "Some",Space,Code ("",[],[]) "code"]]]
|
18
test/fb2/reader/epigraph.fb2
Normal file
18
test/fb2/reader/epigraph.fb2
Normal file
|
@ -0,0 +1,18 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
|
||||
<body>
|
||||
<epigraph>
|
||||
<p>Body epigraph</p>
|
||||
</epigraph>
|
||||
<section>
|
||||
<epigraph>
|
||||
<p>Section epigraph</p>
|
||||
</epigraph>
|
||||
<section>
|
||||
<epigraph>
|
||||
<p>Subsection epigraph</p>
|
||||
</epigraph>
|
||||
</section>
|
||||
</section>
|
||||
</body>
|
||||
</FictionBook>
|
9
test/fb2/reader/epigraph.native
Normal file
9
test/fb2/reader/epigraph.native
Normal file
|
@ -0,0 +1,9 @@
|
|||
Pandoc (Meta {unMeta = fromList []})
|
||||
[Div ("",["epigraph"],[])
|
||||
[Para [Str "Body",Space,Str "epigraph"]]
|
||||
,Div ("",["section"],[])
|
||||
[Div ("",["epigraph"],[])
|
||||
[Para [Str "Section",Space,Str "epigraph"]]
|
||||
,Div ("",["section"],[])
|
||||
[Div ("",["epigraph"],[])
|
||||
[Para [Str "Subsection",Space,Str "epigraph"]]]]]
|
26
test/fb2/reader/meta.fb2
Normal file
26
test/fb2/reader/meta.fb2
Normal file
|
@ -0,0 +1,26 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
|
||||
<description>
|
||||
<title-info>
|
||||
<author>
|
||||
<first-name>First</first-name>
|
||||
<middle-name>Middle</middle-name>
|
||||
<last-name>Last</last-name>
|
||||
</author>
|
||||
<author>
|
||||
<first-name>Another</first-name>
|
||||
<last-name>Author</last-name>
|
||||
</author>
|
||||
<book-title>Book title</book-title>
|
||||
<annotation>
|
||||
<p>Book annotation</p>
|
||||
<p>Second paragraph of book annotation</p>
|
||||
</annotation>
|
||||
<keywords>foo, bar, baz</keywords>
|
||||
<date>2018</date>
|
||||
</title-info>
|
||||
</description>
|
||||
<body>
|
||||
<title><p>Body title</p></title>
|
||||
</body>
|
||||
</FictionBook>
|
2
test/fb2/reader/meta.native
Normal file
2
test/fb2/reader/meta.native
Normal file
|
@ -0,0 +1,2 @@
|
|||
Pandoc (Meta {unMeta = fromList [("abstract",MetaBlocks [Para [Str "Book",Space,Str "annotation"],Para [Str "Second",Space,Str "paragraph",Space,Str "of",Space,Str "book",Space,Str "annotation"]]),("author",MetaList [MetaInlines [Str "First",Space,Str "Middle",Space,Str "Last"],MetaInlines [Str "Another",Space,Str "Author"]]),("date",MetaInlines [Str "2018"]),("keywords",MetaList [MetaString "foo",MetaString "bar",MetaString "baz"]),("title",MetaInlines [Str "Book",Space,Str "title"])]})
|
||||
[Header 1 ("",[],[]) [Str "Body",Space,Str "title"]]
|
28
test/fb2/reader/poem.fb2
Normal file
28
test/fb2/reader/poem.fb2
Normal file
|
@ -0,0 +1,28 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
|
||||
<body>
|
||||
<section>
|
||||
<poem>
|
||||
<title>
|
||||
<p>Poem title</p>
|
||||
</title>
|
||||
<epigraph>
|
||||
<p>Poem epigraph</p>
|
||||
</epigraph>
|
||||
<stanza>
|
||||
<subtitle>Subtitle</subtitle>
|
||||
<title>
|
||||
<p>First stanza title</p>
|
||||
</title>
|
||||
<v>Verse</v>
|
||||
<v><emphasis>More</emphasis> verse</v>
|
||||
</stanza>
|
||||
<stanza>
|
||||
<v>One more stanza</v>
|
||||
</stanza>
|
||||
<text-author>Author</text-author>
|
||||
<date>April 2018</date>
|
||||
</poem>
|
||||
</section>
|
||||
</body>
|
||||
</FictionBook>
|
14
test/fb2/reader/poem.native
Normal file
14
test/fb2/reader/poem.native
Normal file
|
@ -0,0 +1,14 @@
|
|||
Pandoc (Meta {unMeta = fromList []})
|
||||
[Div ("",["section"],[])
|
||||
[Header 2 ("",[],[]) [Str "Poem",Space,Str "title"]
|
||||
,Div ("",["epigraph"],[])
|
||||
[Para [Str "Poem",Space,Str "epigraph"]]
|
||||
,Header 2 ("",["unnumbered"],[]) [Str "Subtitle"]
|
||||
,Header 2 ("",[],[]) [Str "First",Space,Str "stanza",Space,Str "title"]
|
||||
,LineBlock
|
||||
[[Str "Verse"]
|
||||
,[Emph [Str "More"],Space,Str "verse"]]
|
||||
,LineBlock
|
||||
[[Str "One",Space,Str "more",Space,Str "stanza"]]
|
||||
,Para [Str "Author"]
|
||||
,Para [Str "April",Space,Str "2018"]]]
|
18
test/fb2/reader/titles.fb2
Normal file
18
test/fb2/reader/titles.fb2
Normal file
|
@ -0,0 +1,18 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
|
||||
<body>
|
||||
<title><p>Body title</p></title>
|
||||
<section>
|
||||
<title><p>Section title</p></title>
|
||||
<section>
|
||||
<title>
|
||||
<p>Subsection title</p>
|
||||
<p>with multiple paragraphs</p>
|
||||
</title>
|
||||
</section>
|
||||
<section>
|
||||
<title><p>Another subsection title</p></title>
|
||||
</section>
|
||||
</section>
|
||||
</body>
|
||||
</FictionBook>
|
8
test/fb2/reader/titles.native
Normal file
8
test/fb2/reader/titles.native
Normal file
|
@ -0,0 +1,8 @@
|
|||
Pandoc (Meta {unMeta = fromList []})
|
||||
[Header 1 ("",[],[]) [Str "Body",Space,Str "title"]
|
||||
,Div ("",["section"],[])
|
||||
[Header 2 ("",[],[]) [Str "Section",Space,Str "title"]
|
||||
,Div ("",["section"],[])
|
||||
[Header 3 ("",[],[]) [Str "Subsection",Space,Str "title",LineBreak,Str "with",Space,Str "multiple",Space,Str "paragraphs"]]
|
||||
,Div ("",["section"],[])
|
||||
[Header 3 ("",[],[]) [Str "Another",Space,Str "subsection",Space,Str "title"]]]]
|
|
@ -12,6 +12,7 @@ import qualified Tests.Old
|
|||
import qualified Tests.Readers.Creole
|
||||
import qualified Tests.Readers.Docx
|
||||
import qualified Tests.Readers.EPUB
|
||||
import qualified Tests.Readers.FB2
|
||||
import qualified Tests.Readers.HTML
|
||||
import qualified Tests.Readers.JATS
|
||||
import qualified Tests.Readers.LaTeX
|
||||
|
@ -75,6 +76,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests
|
|||
, testGroup "EPUB" Tests.Readers.EPUB.tests
|
||||
, testGroup "Muse" Tests.Readers.Muse.tests
|
||||
, testGroup "Creole" Tests.Readers.Creole.tests
|
||||
, testGroup "FB2" Tests.Readers.FB2.tests
|
||||
]
|
||||
, testGroup "Lua filters" Tests.Lua.tests
|
||||
]
|
||||
|
|
Loading…
Reference in a new issue