JATS reader: improve refs parsing.
Handle issn and isbn; use simpler form for issued date.
This commit is contained in:
parent
6217fd0976
commit
51c8b059e1
1 changed files with 62 additions and 70 deletions
|
@ -1,3 +1,4 @@
|
|||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-# LANGUAGE TupleSections #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{- |
|
||||
|
@ -21,7 +22,7 @@ import Data.Default
|
|||
import Data.Generics
|
||||
import Data.List (foldl', intersperse)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Maybe (maybeToList, fromMaybe, mapMaybe)
|
||||
import Data.Maybe (maybeToList, fromMaybe, catMaybes)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
|
@ -400,75 +401,66 @@ parseRefList e = do
|
|||
parseRef :: PandocMonad m
|
||||
=> Element -> JATS m (Map.Map Text MetaValue)
|
||||
parseRef e = do
|
||||
let refId = text $ attrValue "id" e
|
||||
let getInlineText n = maybe (return mempty) getInlines . filterChild (named n)
|
||||
case filterChild (named "element-citation") e of
|
||||
Just c -> do
|
||||
let refType = text $
|
||||
case attrValue "publication-type" c of
|
||||
"journal" -> "article-journal"
|
||||
x -> x
|
||||
(refTitle, refContainerTitle) <- do
|
||||
t <- getInlineText "article-title" c
|
||||
ct <- getInlineText "source" c
|
||||
if t == mempty
|
||||
then return (ct, mempty)
|
||||
else return (t, ct)
|
||||
refLabel <- getInlineText "label" c
|
||||
refYear <- getInlineText "year" c
|
||||
refVolume <- getInlineText "volume" c
|
||||
refIssue <- getInlineText "issue" c
|
||||
refFirstPage <- getInlineText "fpage" c
|
||||
refLastPage <- getInlineText "lpage" c
|
||||
refPublisher <- getInlineText "publisher-name" c
|
||||
refPublisherPlace <- getInlineText "publisher-loc" c
|
||||
let refPages = refFirstPage <> (if refLastPage == mempty
|
||||
then mempty
|
||||
else text "\x2013" <> refLastPage)
|
||||
let personGroups' = filterChildren (named "person-group") c
|
||||
let getName nm = do
|
||||
given <- maybe (return mempty) getInlines
|
||||
$ filterChild (named "given-names") nm
|
||||
family <- maybe (return mempty) getInlines
|
||||
$ filterChild (named "surname") nm
|
||||
return $ toMetaValue $ Map.fromList [
|
||||
("given" :: Text, given)
|
||||
, ("family", family)
|
||||
]
|
||||
let extractObjectId e' =
|
||||
let v = toMetaValue (strContent e')
|
||||
in case attrValue "pub-id-type" e' of
|
||||
"doi" -> Just ("DOI", v)
|
||||
"pmid" -> Just ("PMID", v)
|
||||
_ -> Nothing
|
||||
let objectIds = mapMaybe extractObjectId $
|
||||
(filterChildren (\e' -> named "object-id" e' ||
|
||||
named "pub-id" e') c)
|
||||
personGroups <- mapM (\pg ->
|
||||
do names <- mapM getName
|
||||
(filterChildren (named "name") pg)
|
||||
return (attrValue "person-group-type" pg,
|
||||
toMetaValue names))
|
||||
personGroups'
|
||||
return $ Map.fromList $
|
||||
[ ("id" :: Text, toMetaValue refId)
|
||||
, ("type", toMetaValue refType)
|
||||
, ("title", toMetaValue refTitle)
|
||||
, ("container-title", toMetaValue refContainerTitle)
|
||||
, ("publisher", toMetaValue refPublisher)
|
||||
, ("publisher-place", toMetaValue refPublisherPlace)
|
||||
, ("title", toMetaValue refTitle)
|
||||
, ("issued", toMetaValue
|
||||
$ Map.fromList [
|
||||
("year" :: Text, refYear)
|
||||
])
|
||||
, ("volume", toMetaValue refVolume)
|
||||
, ("issue", toMetaValue refIssue)
|
||||
, ("page", toMetaValue refPages)
|
||||
, ("citation-label", toMetaValue refLabel)
|
||||
] ++ objectIds ++ personGroups
|
||||
Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty
|
||||
-- TODO handle mixed-citation
|
||||
let combineWithDash x y = x <> text "-" <> y
|
||||
let getName nm = do
|
||||
given <- maybe (return mempty) getInlines
|
||||
$ filterChild (named "given-names") nm
|
||||
family <- maybe (return mempty) getInlines
|
||||
$ filterChild (named "surname") nm
|
||||
return $ toMetaValue $ Map.fromList [
|
||||
("given" :: Text, given)
|
||||
, ("family", family)
|
||||
]
|
||||
let refElement :: PandocMonad m
|
||||
=> Element -> Element -> JATS m (Maybe (Text, MetaValue))
|
||||
refElement c el =
|
||||
case qName (elName el) of
|
||||
"article-title" -> Just . ("title",) . toMetaValue <$> getInlines el
|
||||
"source" ->
|
||||
case filterChild (named "article-title") c of
|
||||
Just _ -> Just . ("container-title",) . toMetaValue <$> getInlines el
|
||||
Nothing -> Just . ("title",) . toMetaValue <$> getInlines el
|
||||
"label" -> Just . ("citation-label",) . toMetaValue <$> getInlines el
|
||||
"year" -> case filterChild (named "month") c of
|
||||
Just m -> Just . ("issued",) . toMetaValue <$>
|
||||
(combineWithDash
|
||||
<$> getInlines el <*> getInlines m)
|
||||
Nothing -> Just . ("issued",) . toMetaValue <$> getInlines el
|
||||
"volume" -> Just . ("volume",) . toMetaValue <$> getInlines el
|
||||
"issue" -> Just . ("issue",) . toMetaValue <$> getInlines el
|
||||
"isbn" -> Just . ("ISBN",) . toMetaValue <$> getInlines el
|
||||
"issn" -> Just . ("ISSN",) . toMetaValue <$> getInlines el
|
||||
"fpage" ->
|
||||
case filterChild (named "lpage") c of
|
||||
Just lp -> Just . ("page",) . toMetaValue <$>
|
||||
(combineWithDash <$> getInlines el <*> getInlines lp)
|
||||
Nothing -> Just . ("page-first",) . toMetaValue <$> getInlines el
|
||||
"publisher-name" -> Just . ("publisher",) . toMetaValue <$> getInlines el
|
||||
"publisher-loc" -> Just . ("publisher-place",) . toMetaValue
|
||||
<$> getInlines el
|
||||
"person-group" -> do names <- mapM getName
|
||||
(filterChildren (named "name") el)
|
||||
pure $ Just (attrValue "person-group-type" el,
|
||||
toMetaValue names)
|
||||
"pub-id" -> case attrValue "pub-id-type" el of
|
||||
"doi" -> pure $ Just ("DOI", toMetaValue $ strContent el)
|
||||
"pmid" -> pure $ Just ("PMID", toMetaValue $ strContent el)
|
||||
_ -> pure Nothing
|
||||
"object-id" -> case attrValue "pub-id-type" el of
|
||||
"doi" -> pure $ Just ("DOI", toMetaValue $ strContent el)
|
||||
"pmid" -> pure $ Just ("PMID", toMetaValue $ strContent el)
|
||||
_ -> pure Nothing
|
||||
|
||||
|
||||
_ -> pure Nothing
|
||||
refVariables <-
|
||||
case filterChild (named "element-citation") e of
|
||||
Just c -> (("type", toMetaValue $ case attrValue "publication-type" c of
|
||||
"journal" -> "article-journal"
|
||||
x -> x) :) .
|
||||
catMaybes <$> mapM (refElement c) (elChildren c)
|
||||
Nothing -> pure [] -- TODO handle mixed-citation
|
||||
return $ Map.fromList (("id", toMetaValue $ attrValue "id" e) : refVariables)
|
||||
|
||||
textContent :: Element -> Text
|
||||
textContent = strContent
|
||||
|
|
Loading…
Add table
Reference in a new issue