From 021cdb543b00995f86eefd5d84314d9e1f5abf82 Mon Sep 17 00:00:00 2001 From: nuew Date: Fri, 11 Jun 2021 20:09:06 -0400 Subject: [PATCH] epub: Add EPUB3 subject metadata (authority/term) This adds the ability to specify EPUB 3 `authority` and `term` specific refinements to the `subject` tag. Specifying a plain `subject` tag in metadata will function as before. --- MANUAL.txt | 10 ++++++-- src/Text/Pandoc/Writers/EPUB.hs | 41 +++++++++++++++++++++++++-------- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/MANUAL.txt b/MANUAL.txt index 54eb96467..7caedc94a 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -2189,7 +2189,7 @@ Currently the following pipes are predefined: and AsciiDoc metadata; repeat as for `author`, above `subject` -: document subject, included in ODT, PDF, docx and pptx metadata +: document subject, included in ODT, PDF, docx, EPUB, and pptx metadata `description` : document description, included in ODT, docx and pptx metadata. Some @@ -6189,7 +6189,12 @@ The following fields are recognized: language if nothing is specified. `subject` - ~ A string value or a list of such values. + ~ Either a string value, or an object with fields `text`, `authority`, + and `term`, or a list of such objects. Valid values for `authority` + are either a [reserved authority value] (currently `AAT`, `BIC`, + `BISAC`, `CLC`, `DDC`, `CLIL`, `EuroVoc`, `MEDTOP`, `LCSH`, `NDC`, + `Thema`, `UDC`, and `WGS`) or an absolute IRI identifying a custom + scheme. Valid values for `term` are defined by the scheme. `description` ~ A string value. @@ -6239,6 +6244,7 @@ The following fields are recognized: - `scroll-axis`: `vertical`|`horizontal`|`default` [MARC relators]: https://loc.gov/marc/relators/relaterm.html +[reserved authority value]: https://idpf.github.io/epub-registries/authorities/ [`spine` element]: http://idpf.org/epub/301/spec/epub-publications.html#sec-spine-elem ## The `epub:type` attribute diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 86a7646e6..d1417ff48 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -79,7 +79,7 @@ data EPUBMetadata = EPUBMetadata{ , epubLanguage :: Text , epubCreator :: [Creator] , epubContributor :: [Creator] - , epubSubject :: [Text] + , epubSubject :: [Subject] , epubDescription :: Maybe Text , epubType :: Maybe Text , epubFormat :: Maybe Text @@ -121,6 +121,12 @@ data Title = Title{ data ProgressionDirection = LTR | RTL deriving Show +data Subject = Subject{ + subjectText :: Text + , subjectAuthority :: Maybe Text + , subjectTerm :: Maybe Text + } deriving Show + dcName :: Text -> QName dcName n = QName n Nothing (Just "dc") @@ -232,7 +238,11 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md , creatorRole = getAttr "role" , creatorFileAs = getAttr "file-as" } : epubContributor md } - | name == "subject" = md{ epubSubject = strContent e : epubSubject md } + | name == "subject" = md{ epubSubject = + Subject { subjectText = strContent e + , subjectAuthority = getAttr "authority" + , subjectTerm = getAttr "term" + } : epubSubject md } | name == "description" = md { epubDescription = Just $ strContent e } | name == "type" = md { epubType = Just $ strContent e } | name == "format" = md { epubFormat = Just $ strContent e } @@ -313,12 +323,13 @@ getDate s meta = getList s meta handleMetaValue handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv , dateEvent = Nothing } -simpleList :: T.Text -> Meta -> [Text] -simpleList s meta = - case lookupMeta s meta of - Just (MetaList xs) -> map metaValueToString xs - Just x -> [metaValueToString x] - Nothing -> [] +getSubject :: T.Text -> Meta -> [Subject] +getSubject s meta = getList s meta handleMetaValue + where handleMetaValue (MetaMap m) = + Subject{ subjectText = maybe "" metaValueToString $ M.lookup "text" m + , subjectAuthority = metaValueToString <$> M.lookup "authority" m + , subjectTerm = metaValueToString <$> M.lookup "term" m } + handleMetaValue mv = Subject (metaValueToString mv) Nothing Nothing metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata metadataFromMeta opts meta = EPUBMetadata{ @@ -352,7 +363,7 @@ metadataFromMeta opts meta = EPUBMetadata{ lookupMeta "language" meta `mplus` lookupMeta "lang" meta creators = getCreator "creator" meta contributors = getCreator "contributor" meta - subjects = simpleList "subject" meta + subjects = getSubject "subject" meta description = metaValueToString <$> lookupMeta "description" meta epubtype = metaValueToString <$> lookupMeta "type" meta format = metaValueToString <$> lookupMeta "format" meta @@ -974,7 +985,7 @@ metadataElement version md currentTime = epubCreator md contributorNodes = withIds "epub-contributor" (toCreatorNode "contributor") $ epubContributor md - subjectNodes = map (dcTag "subject") $ epubSubject md + subjectNodes = withIds "subject" toSubjectNode $ epubSubject md descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md typeNodes = maybe [] (dcTag' "type") $ epubType md formatNodes = maybe [] (dcTag' "format") $ epubFormat md @@ -1046,6 +1057,16 @@ metadataElement version md currentTime = (("id",id') : maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ dateText date] + toSubjectNode id' subject + | version == EPUB2 = [dcNode "subject" ! + [("id",id')] $ subjectText subject] + | otherwise = (dcNode "subject" ! [("id",id')] $ subjectText subject) + : maybe [] (\x -> (unode "meta" ! + [("refines", "#" <> id'),("property","authority")] $ x) : + maybe [] (\y -> [unode "meta" ! + [("refines", "#" <> id'),("property","term")] $ y]) + (subjectTerm subject)) + (subjectAuthority subject) schemeToOnix :: Text -> Text schemeToOnix "ISBN-10" = "02" schemeToOnix "GTIN-13" = "03"