epub: Add EPUB3 subject metadata (authority/term)

This adds the ability to specify EPUB 3 `authority` and `term` specific
refinements to the `subject` tag. Specifying a plain `subject` tag in
metadata will function as before.
This commit is contained in:
nuew 2021-06-11 20:09:06 -04:00 committed by John MacFarlane
parent 75d551f65b
commit 021cdb543b
2 changed files with 39 additions and 12 deletions

View file

@ -2189,7 +2189,7 @@ Currently the following pipes are predefined:
and AsciiDoc metadata; repeat as for `author`, above and AsciiDoc metadata; repeat as for `author`, above
`subject` `subject`
: document subject, included in ODT, PDF, docx and pptx metadata : document subject, included in ODT, PDF, docx, EPUB, and pptx metadata
`description` `description`
: document description, included in ODT, docx and pptx metadata. Some : document description, included in ODT, docx and pptx metadata. Some
@ -6189,7 +6189,12 @@ The following fields are recognized:
language if nothing is specified. language if nothing is specified.
`subject` `subject`
~ A string value or a list of such values. ~ Either a string value, or an object with fields `text`, `authority`,
and `term`, or a list of such objects. Valid values for `authority`
are either a [reserved authority value] (currently `AAT`, `BIC`,
`BISAC`, `CLC`, `DDC`, `CLIL`, `EuroVoc`, `MEDTOP`, `LCSH`, `NDC`,
`Thema`, `UDC`, and `WGS`) or an absolute IRI identifying a custom
scheme. Valid values for `term` are defined by the scheme.
`description` `description`
~ A string value. ~ A string value.
@ -6239,6 +6244,7 @@ The following fields are recognized:
- `scroll-axis`: `vertical`|`horizontal`|`default` - `scroll-axis`: `vertical`|`horizontal`|`default`
[MARC relators]: https://loc.gov/marc/relators/relaterm.html [MARC relators]: https://loc.gov/marc/relators/relaterm.html
[reserved authority value]: https://idpf.github.io/epub-registries/authorities/
[`spine` element]: http://idpf.org/epub/301/spec/epub-publications.html#sec-spine-elem [`spine` element]: http://idpf.org/epub/301/spec/epub-publications.html#sec-spine-elem
## The `epub:type` attribute ## The `epub:type` attribute

View file

@ -79,7 +79,7 @@ data EPUBMetadata = EPUBMetadata{
, epubLanguage :: Text , epubLanguage :: Text
, epubCreator :: [Creator] , epubCreator :: [Creator]
, epubContributor :: [Creator] , epubContributor :: [Creator]
, epubSubject :: [Text] , epubSubject :: [Subject]
, epubDescription :: Maybe Text , epubDescription :: Maybe Text
, epubType :: Maybe Text , epubType :: Maybe Text
, epubFormat :: Maybe Text , epubFormat :: Maybe Text
@ -121,6 +121,12 @@ data Title = Title{
data ProgressionDirection = LTR | RTL deriving Show data ProgressionDirection = LTR | RTL deriving Show
data Subject = Subject{
subjectText :: Text
, subjectAuthority :: Maybe Text
, subjectTerm :: Maybe Text
} deriving Show
dcName :: Text -> QName dcName :: Text -> QName
dcName n = QName n Nothing (Just "dc") dcName n = QName n Nothing (Just "dc")
@ -232,7 +238,11 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
, creatorRole = getAttr "role" , creatorRole = getAttr "role"
, creatorFileAs = getAttr "file-as" , creatorFileAs = getAttr "file-as"
} : epubContributor md } } : epubContributor md }
| name == "subject" = md{ epubSubject = strContent e : epubSubject md } | name == "subject" = md{ epubSubject =
Subject { subjectText = strContent e
, subjectAuthority = getAttr "authority"
, subjectTerm = getAttr "term"
} : epubSubject md }
| name == "description" = md { epubDescription = Just $ strContent e } | name == "description" = md { epubDescription = Just $ strContent e }
| name == "type" = md { epubType = Just $ strContent e } | name == "type" = md { epubType = Just $ strContent e }
| name == "format" = md { epubFormat = Just $ strContent e } | name == "format" = md { epubFormat = Just $ strContent e }
@ -313,12 +323,13 @@ getDate s meta = getList s meta handleMetaValue
handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv
, dateEvent = Nothing } , dateEvent = Nothing }
simpleList :: T.Text -> Meta -> [Text] getSubject :: T.Text -> Meta -> [Subject]
simpleList s meta = getSubject s meta = getList s meta handleMetaValue
case lookupMeta s meta of where handleMetaValue (MetaMap m) =
Just (MetaList xs) -> map metaValueToString xs Subject{ subjectText = maybe "" metaValueToString $ M.lookup "text" m
Just x -> [metaValueToString x] , subjectAuthority = metaValueToString <$> M.lookup "authority" m
Nothing -> [] , subjectTerm = metaValueToString <$> M.lookup "term" m }
handleMetaValue mv = Subject (metaValueToString mv) Nothing Nothing
metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata
metadataFromMeta opts meta = EPUBMetadata{ metadataFromMeta opts meta = EPUBMetadata{
@ -352,7 +363,7 @@ metadataFromMeta opts meta = EPUBMetadata{
lookupMeta "language" meta `mplus` lookupMeta "lang" meta lookupMeta "language" meta `mplus` lookupMeta "lang" meta
creators = getCreator "creator" meta creators = getCreator "creator" meta
contributors = getCreator "contributor" meta contributors = getCreator "contributor" meta
subjects = simpleList "subject" meta subjects = getSubject "subject" meta
description = metaValueToString <$> lookupMeta "description" meta description = metaValueToString <$> lookupMeta "description" meta
epubtype = metaValueToString <$> lookupMeta "type" meta epubtype = metaValueToString <$> lookupMeta "type" meta
format = metaValueToString <$> lookupMeta "format" meta format = metaValueToString <$> lookupMeta "format" meta
@ -974,7 +985,7 @@ metadataElement version md currentTime =
epubCreator md epubCreator md
contributorNodes = withIds "epub-contributor" contributorNodes = withIds "epub-contributor"
(toCreatorNode "contributor") $ epubContributor md (toCreatorNode "contributor") $ epubContributor md
subjectNodes = map (dcTag "subject") $ epubSubject md subjectNodes = withIds "subject" toSubjectNode $ epubSubject md
descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md
typeNodes = maybe [] (dcTag' "type") $ epubType md typeNodes = maybe [] (dcTag' "type") $ epubType md
formatNodes = maybe [] (dcTag' "format") $ epubFormat md formatNodes = maybe [] (dcTag' "format") $ epubFormat md
@ -1046,6 +1057,16 @@ metadataElement version md currentTime =
(("id",id') : (("id",id') :
maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $
dateText date] dateText date]
toSubjectNode id' subject
| version == EPUB2 = [dcNode "subject" !
[("id",id')] $ subjectText subject]
| otherwise = (dcNode "subject" ! [("id",id')] $ subjectText subject)
: maybe [] (\x -> (unode "meta" !
[("refines", "#" <> id'),("property","authority")] $ x) :
maybe [] (\y -> [unode "meta" !
[("refines", "#" <> id'),("property","term")] $ y])
(subjectTerm subject))
(subjectAuthority subject)
schemeToOnix :: Text -> Text schemeToOnix :: Text -> Text
schemeToOnix "ISBN-10" = "02" schemeToOnix "ISBN-10" = "02"
schemeToOnix "GTIN-13" = "03" schemeToOnix "GTIN-13" = "03"