DocBook reader: metadata handling improvements.
Now we properly parse title and subtitle elements that are direct children of book and article (as well as children of bookinfo, articleinfo, or info). We also now use the "subtitle" metadata field for subtitles, rather than tacking the subtitle on to the title.
This commit is contained in:
parent
fb94c0f6a1
commit
78dca68a0a
2 changed files with 29 additions and 39 deletions
|
@ -537,7 +537,6 @@ type DB m = StateT DBState m
|
||||||
data DBState = DBState{ dbSectionLevel :: Int
|
data DBState = DBState{ dbSectionLevel :: Int
|
||||||
, dbQuoteType :: QuoteType
|
, dbQuoteType :: QuoteType
|
||||||
, dbMeta :: Meta
|
, dbMeta :: Meta
|
||||||
, dbAcceptsMeta :: Bool
|
|
||||||
, dbBook :: Bool
|
, dbBook :: Bool
|
||||||
, dbFigureTitle :: Inlines
|
, dbFigureTitle :: Inlines
|
||||||
, dbContent :: [Content]
|
, dbContent :: [Content]
|
||||||
|
@ -547,7 +546,6 @@ instance Default DBState where
|
||||||
def = DBState{ dbSectionLevel = 0
|
def = DBState{ dbSectionLevel = 0
|
||||||
, dbQuoteType = DoubleQuote
|
, dbQuoteType = DoubleQuote
|
||||||
, dbMeta = mempty
|
, dbMeta = mempty
|
||||||
, dbAcceptsMeta = False
|
|
||||||
, dbBook = False
|
, dbBook = False
|
||||||
, dbFigureTitle = mempty
|
, dbFigureTitle = mempty
|
||||||
, dbContent = [] }
|
, dbContent = [] }
|
||||||
|
@ -609,18 +607,26 @@ named s e = qName (elName e) == s
|
||||||
|
|
||||||
--
|
--
|
||||||
|
|
||||||
acceptingMetadata :: PandocMonad m => DB m a -> DB m a
|
addMetadataFromElement :: PandocMonad m => Element -> DB m Blocks
|
||||||
acceptingMetadata p = do
|
addMetadataFromElement e = do
|
||||||
modify (\s -> s { dbAcceptsMeta = True } )
|
case filterChild (named "title") e of
|
||||||
res <- p
|
Nothing -> return ()
|
||||||
modify (\s -> s { dbAcceptsMeta = False })
|
Just z -> do
|
||||||
return res
|
getInlines z >>= addMeta "title"
|
||||||
|
addMetaField "subtitle" z
|
||||||
checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a
|
case filterChild (named "authorgroup") e of
|
||||||
checkInMeta p = do
|
Nothing -> return ()
|
||||||
accepts <- dbAcceptsMeta <$> get
|
Just z -> addMetaField "author" z
|
||||||
when accepts p
|
addMetaField "subtitle" e
|
||||||
return mempty
|
addMetaField "author" e
|
||||||
|
addMetaField "date" e
|
||||||
|
addMetaField "release" e
|
||||||
|
return mempty
|
||||||
|
where addMetaField fieldname elt =
|
||||||
|
case filterChildren (named fieldname) elt of
|
||||||
|
[] -> return ()
|
||||||
|
[z] -> getInlines z >>= addMeta fieldname
|
||||||
|
zs -> mapM getInlines zs >>= addMeta fieldname
|
||||||
|
|
||||||
addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m ()
|
addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m ()
|
||||||
addMeta field val = modify (setMeta field val)
|
addMeta field val = modify (setMeta field val)
|
||||||
|
@ -718,11 +724,6 @@ parseBlock (Elem e) =
|
||||||
"attribution" -> return mempty
|
"attribution" -> return mempty
|
||||||
"titleabbrev" -> return mempty
|
"titleabbrev" -> return mempty
|
||||||
"authorinitials" -> return mempty
|
"authorinitials" -> return mempty
|
||||||
"title" -> checkInMeta getTitle
|
|
||||||
"author" -> checkInMeta getAuthor
|
|
||||||
"authorgroup" -> checkInMeta getAuthorGroup
|
|
||||||
"releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release")
|
|
||||||
"date" -> checkInMeta getDate
|
|
||||||
"bibliography" -> sect 0
|
"bibliography" -> sect 0
|
||||||
"bibliodiv" -> sect 1
|
"bibliodiv" -> sect 1
|
||||||
"biblioentry" -> parseMixed para (elContent e)
|
"biblioentry" -> parseMixed para (elContent e)
|
||||||
|
@ -788,8 +789,8 @@ parseBlock (Elem e) =
|
||||||
"figure" -> getFigure e
|
"figure" -> getFigure e
|
||||||
"mediaobject" -> para <$> getMediaobject e
|
"mediaobject" -> para <$> getMediaobject e
|
||||||
"caption" -> return mempty
|
"caption" -> return mempty
|
||||||
"info" -> metaBlock
|
"info" -> addMetadataFromElement e
|
||||||
"articleinfo" -> metaBlock
|
"articleinfo" -> addMetadataFromElement e
|
||||||
"sectioninfo" -> return mempty -- keywords & other metadata
|
"sectioninfo" -> return mempty -- keywords & other metadata
|
||||||
"refsectioninfo" -> return mempty -- keywords & other metadata
|
"refsectioninfo" -> return mempty -- keywords & other metadata
|
||||||
"refsect1info" -> return mempty -- keywords & other metadata
|
"refsect1info" -> return mempty -- keywords & other metadata
|
||||||
|
@ -803,10 +804,11 @@ parseBlock (Elem e) =
|
||||||
"chapterinfo" -> return mempty -- keywords & other metadata
|
"chapterinfo" -> return mempty -- keywords & other metadata
|
||||||
"glossaryinfo" -> return mempty -- keywords & other metadata
|
"glossaryinfo" -> return mempty -- keywords & other metadata
|
||||||
"appendixinfo" -> return mempty -- keywords & other metadata
|
"appendixinfo" -> return mempty -- keywords & other metadata
|
||||||
"bookinfo" -> metaBlock
|
"bookinfo" -> addMetadataFromElement e
|
||||||
"article" -> modify (\st -> st{ dbBook = False }) >>
|
"article" -> modify (\st -> st{ dbBook = False }) >>
|
||||||
getBlocks e
|
addMetadataFromElement e >> getBlocks e
|
||||||
"book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e
|
"book" -> modify (\st -> st{ dbBook = True }) >>
|
||||||
|
addMetadataFromElement e >> getBlocks e
|
||||||
"table" -> parseTable
|
"table" -> parseTable
|
||||||
"informaltable" -> parseTable
|
"informaltable" -> parseTable
|
||||||
"informalexample" -> divWith ("", ["informalexample"], []) <$>
|
"informalexample" -> divWith ("", ["informalexample"], []) <$>
|
||||||
|
@ -816,6 +818,8 @@ parseBlock (Elem e) =
|
||||||
"screen" -> codeBlockWithLang
|
"screen" -> codeBlockWithLang
|
||||||
"programlisting" -> codeBlockWithLang
|
"programlisting" -> codeBlockWithLang
|
||||||
"?xml" -> return mempty
|
"?xml" -> return mempty
|
||||||
|
"title" -> return mempty -- handled in parent element
|
||||||
|
"subtitle" -> return mempty -- handled in parent element
|
||||||
_ -> getBlocks e
|
_ -> getBlocks e
|
||||||
where parseMixed container conts = do
|
where parseMixed container conts = do
|
||||||
let (ils,rest) = break isBlockElement conts
|
let (ils,rest) = break isBlockElement conts
|
||||||
|
@ -857,19 +861,6 @@ parseBlock (Elem e) =
|
||||||
terms' <- mapM getInlines terms
|
terms' <- mapM getInlines terms
|
||||||
items' <- mapM getBlocks items
|
items' <- mapM getBlocks items
|
||||||
return (mconcat $ intersperse (str "; ") terms', items')
|
return (mconcat $ intersperse (str "; ") terms', items')
|
||||||
getTitle = do
|
|
||||||
tit <- getInlines e
|
|
||||||
subtit <- case filterChild (named "subtitle") e of
|
|
||||||
Just s -> (text ": " <>) <$>
|
|
||||||
getInlines s
|
|
||||||
Nothing -> return mempty
|
|
||||||
addMeta "title" (tit <> subtit)
|
|
||||||
|
|
||||||
getAuthor = (:[]) <$> getInlines e >>= addMeta "author"
|
|
||||||
getAuthorGroup = do
|
|
||||||
let terms = filterChildren (named "author") e
|
|
||||||
mapM getInlines terms >>= addMeta "author"
|
|
||||||
getDate = getInlines e >>= addMeta "date"
|
|
||||||
parseTable = do
|
parseTable = do
|
||||||
let isCaption x = named "title" x || named "caption" x
|
let isCaption x = named "title" x || named "caption" x
|
||||||
caption <- case filterChild isCaption e of
|
caption <- case filterChild isCaption e of
|
||||||
|
@ -935,7 +926,6 @@ parseBlock (Elem e) =
|
||||||
modify $ \st -> st{ dbSectionLevel = n - 1 }
|
modify $ \st -> st{ dbSectionLevel = n - 1 }
|
||||||
return $ headerWith (ident,[],[]) n' headerText <> b
|
return $ headerWith (ident,[],[]) n' headerText <> b
|
||||||
lineItems = mapM getInlines $ filterChildren (named "line") e
|
lineItems = mapM getInlines $ filterChildren (named "line") e
|
||||||
metaBlock = acceptingMetadata (getBlocks e) >> return mempty
|
|
||||||
|
|
||||||
getInlines :: PandocMonad m => Element -> DB m Inlines
|
getInlines :: PandocMonad m => Element -> DB m Inlines
|
||||||
getInlines e' = (trimInlines . mconcat) <$>
|
getInlines e' = (trimInlines . mconcat) <$>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
Pandoc (Meta {unMeta = fromList []})
|
Pandoc (Meta {unMeta = fromList [("title",MetaInlines [Str "An",Space,Str "Example",Space,Str "Book"])]})
|
||||||
[Header 1 ("ch01",[],[]) [Str "XRef",Space,Str "Samples"]
|
[Header 1 ("ch01",[],[]) [Str "XRef",Space,Str "Samples"]
|
||||||
,Para [Str "This",Space,Str "paragraph",Space,Str "demonstrates",Space,Str "several",Space,Str "features",Space,Str "of",SoftBreak,Str "XRef."]
|
,Para [Str "This",Space,Str "paragraph",Space,Str "demonstrates",Space,Str "several",Space,Str "features",Space,Str "of",SoftBreak,Str "XRef."]
|
||||||
,BulletList
|
,BulletList
|
||||||
|
|
Loading…
Reference in a new issue