diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index 516bf2f14..98cf5bdf5 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE OverloadedStrings #-} {- | @@ -21,7 +22,7 @@ import Data.Default import Data.Generics import Data.List (foldl', intersperse) import qualified Data.Map as Map -import Data.Maybe (maybeToList, fromMaybe, mapMaybe) +import Data.Maybe (maybeToList, fromMaybe, catMaybes) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL @@ -400,75 +401,66 @@ parseRefList e = do parseRef :: PandocMonad m => Element -> JATS m (Map.Map Text MetaValue) parseRef e = do - let refId = text $ attrValue "id" e - let getInlineText n = maybe (return mempty) getInlines . filterChild (named n) - case filterChild (named "element-citation") e of - Just c -> do - let refType = text $ - case attrValue "publication-type" c of - "journal" -> "article-journal" - x -> x - (refTitle, refContainerTitle) <- do - t <- getInlineText "article-title" c - ct <- getInlineText "source" c - if t == mempty - then return (ct, mempty) - else return (t, ct) - refLabel <- getInlineText "label" c - refYear <- getInlineText "year" c - refVolume <- getInlineText "volume" c - refIssue <- getInlineText "issue" c - refFirstPage <- getInlineText "fpage" c - refLastPage <- getInlineText "lpage" c - refPublisher <- getInlineText "publisher-name" c - refPublisherPlace <- getInlineText "publisher-loc" c - let refPages = refFirstPage <> (if refLastPage == mempty - then mempty - else text "\x2013" <> refLastPage) - let personGroups' = filterChildren (named "person-group") c - let getName nm = do - given <- maybe (return mempty) getInlines - $ filterChild (named "given-names") nm - family <- maybe (return mempty) getInlines - $ filterChild (named "surname") nm - return $ toMetaValue $ Map.fromList [ - ("given" :: Text, given) - , ("family", family) - ] - let extractObjectId e' = - let v = toMetaValue (strContent e') - in case attrValue "pub-id-type" e' of - "doi" -> Just ("DOI", v) - "pmid" -> Just ("PMID", v) - _ -> Nothing - let objectIds = mapMaybe extractObjectId $ - (filterChildren (\e' -> named "object-id" e' || - named "pub-id" e') c) - personGroups <- mapM (\pg -> - do names <- mapM getName - (filterChildren (named "name") pg) - return (attrValue "person-group-type" pg, - toMetaValue names)) - personGroups' - return $ Map.fromList $ - [ ("id" :: Text, toMetaValue refId) - , ("type", toMetaValue refType) - , ("title", toMetaValue refTitle) - , ("container-title", toMetaValue refContainerTitle) - , ("publisher", toMetaValue refPublisher) - , ("publisher-place", toMetaValue refPublisherPlace) - , ("title", toMetaValue refTitle) - , ("issued", toMetaValue - $ Map.fromList [ - ("year" :: Text, refYear) - ]) - , ("volume", toMetaValue refVolume) - , ("issue", toMetaValue refIssue) - , ("page", toMetaValue refPages) - , ("citation-label", toMetaValue refLabel) - ] ++ objectIds ++ personGroups - Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty - -- TODO handle mixed-citation + let combineWithDash x y = x <> text "-" <> y + let getName nm = do + given <- maybe (return mempty) getInlines + $ filterChild (named "given-names") nm + family <- maybe (return mempty) getInlines + $ filterChild (named "surname") nm + return $ toMetaValue $ Map.fromList [ + ("given" :: Text, given) + , ("family", family) + ] + let refElement :: PandocMonad m + => Element -> Element -> JATS m (Maybe (Text, MetaValue)) + refElement c el = + case qName (elName el) of + "article-title" -> Just . ("title",) . toMetaValue <$> getInlines el + "source" -> + case filterChild (named "article-title") c of + Just _ -> Just . ("container-title",) . toMetaValue <$> getInlines el + Nothing -> Just . ("title",) . toMetaValue <$> getInlines el + "label" -> Just . ("citation-label",) . toMetaValue <$> getInlines el + "year" -> case filterChild (named "month") c of + Just m -> Just . ("issued",) . toMetaValue <$> + (combineWithDash + <$> getInlines el <*> getInlines m) + Nothing -> Just . ("issued",) . toMetaValue <$> getInlines el + "volume" -> Just . ("volume",) . toMetaValue <$> getInlines el + "issue" -> Just . ("issue",) . toMetaValue <$> getInlines el + "isbn" -> Just . ("ISBN",) . toMetaValue <$> getInlines el + "issn" -> Just . ("ISSN",) . toMetaValue <$> getInlines el + "fpage" -> + case filterChild (named "lpage") c of + Just lp -> Just . ("page",) . toMetaValue <$> + (combineWithDash <$> getInlines el <*> getInlines lp) + Nothing -> Just . ("page-first",) . toMetaValue <$> getInlines el + "publisher-name" -> Just . ("publisher",) . toMetaValue <$> getInlines el + "publisher-loc" -> Just . ("publisher-place",) . toMetaValue + <$> getInlines el + "person-group" -> do names <- mapM getName + (filterChildren (named "name") el) + pure $ Just (attrValue "person-group-type" el, + toMetaValue names) + "pub-id" -> case attrValue "pub-id-type" el of + "doi" -> pure $ Just ("DOI", toMetaValue $ strContent el) + "pmid" -> pure $ Just ("PMID", toMetaValue $ strContent el) + _ -> pure Nothing + "object-id" -> case attrValue "pub-id-type" el of + "doi" -> pure $ Just ("DOI", toMetaValue $ strContent el) + "pmid" -> pure $ Just ("PMID", toMetaValue $ strContent el) + _ -> pure Nothing + + + _ -> pure Nothing + refVariables <- + case filterChild (named "element-citation") e of + Just c -> (("type", toMetaValue $ case attrValue "publication-type" c of + "journal" -> "article-journal" + x -> x) :) . + catMaybes <$> mapM (refElement c) (elChildren c) + Nothing -> pure [] -- TODO handle mixed-citation + return $ Map.fromList (("id", toMetaValue $ attrValue "id" e) : refVariables) textContent :: Element -> Text textContent = strContent