JATS writer: allow to use element-citation

This commit is contained in:
Albert Krewinkel 2021-01-20 19:09:36 +01:00
parent 87083bd1d6
commit b4b3560191
No known key found for this signature in database
GPG key ID: 388DC0B21F631124
7 changed files with 346 additions and 7 deletions

View file

@ -3093,6 +3093,13 @@ output format.
Some aspects of [Pandoc's Markdown citation syntax](#citations)
are also accepted in `org` input.
#### Extension: `element_citations` ####
In the `jats` output formats, this causes reference items to
be replaced with `<element-citation>` elements. These
elements are not influenced by CSL styles, but all information
on the item is included in tags.
#### Extension: `ntb` ####
In the `context` output format this enables the use of [Natural Tables

View file

@ -638,6 +638,7 @@ library
Text.Pandoc.Readers.Metadata,
Text.Pandoc.Readers.Roff,
Text.Pandoc.Writers.Docx.StyleMap,
Text.Pandoc.Writers.JATS.References,
Text.Pandoc.Writers.JATS.Table,
Text.Pandoc.Writers.JATS.Types,
Text.Pandoc.Writers.LaTeX.Caption,

View file

@ -88,6 +88,7 @@ data Extension =
-- does not affect readers/writers directly; it causes
-- the eastAsianLineBreakFilter to be applied after
-- parsing, in Text.Pandoc.App.convertWithOpts.
| Ext_element_citations -- ^ Use element-citation elements for JATS citations
| Ext_emoji -- ^ Support emoji like :smile:
| Ext_empty_paragraphs -- ^ Allow empty paragraphs
| Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML
@ -412,6 +413,11 @@ getDefaultExtensions "textile" = extensionsFromList
Ext_smart,
Ext_raw_html,
Ext_auto_identifiers]
getDefaultExtensions "jats" = extensionsFromList
[Ext_auto_identifiers]
getDefaultExtensions "jats_archiving" = getDefaultExtensions "jats"
getDefaultExtensions "jats_publishing" = getDefaultExtensions "jats"
getDefaultExtensions "jats_articleauthoring" = getDefaultExtensions "jats"
getDefaultExtensions "opml" = pandocExtensions -- affects notes
getDefaultExtensions _ = extensionsFromList
[Ext_auto_identifiers]
@ -554,6 +560,14 @@ getAllExtensions f = universalExtensions <> getAll f
, Ext_smart
, Ext_raw_tex
]
getAll "jats" =
extensionsFromList
[ Ext_auto_identifiers
, Ext_element_citations
]
getAll "jats_archiving" = getAll "jats"
getAll "jats_publishing" = getAll "jats"
getAll "jats_articleauthoring" = getAll "jats"
getAll "opml" = allMarkdownExtensions -- affects notes
getAll "twiki" = autoIdExtensions <>
extensionsFromList

View file

@ -29,6 +29,7 @@ import Data.Maybe (fromMaybe)
import Data.Time (toGregorian, Day, parseTimeM, defaultTimeLocale, formatTime)
import qualified Data.Text as T
import Data.Text (Text)
import Text.Pandoc.Citeproc (getReferences)
import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
import Text.Pandoc.Definition
import Text.Pandoc.Highlighting (languages, languagesByExtension)
@ -40,6 +41,7 @@ import Text.DocLayout
import Text.Pandoc.Shared
import Text.Pandoc.Templates (renderTemplate)
import Text.DocTemplates (Context(..), Val(..))
import Text.Pandoc.Writers.JATS.References (referencesToJATS)
import Text.Pandoc.Writers.JATS.Table (tableToJATS)
import Text.Pandoc.Writers.JATS.Types
import Text.Pandoc.Writers.Math
@ -71,15 +73,19 @@ writeJATS = writeJatsArchiving
-- | Convert a @'Pandoc'@ document to JATS.
writeJats :: PandocMonad m => JATSTagSet -> WriterOptions -> Pandoc -> m Text
writeJats tagSet opts d =
runReaderT (evalStateT (docToJATS opts d) initialState)
environment
where initialState = JATSState { jatsNotes = [] }
environment = JATSEnv
writeJats tagSet opts d = do
refs <- if extensionEnabled Ext_element_citations $ writerExtensions opts
then getReferences Nothing d
else pure []
let environment = JATSEnv
{ jatsTagSet = tagSet
, jatsInlinesWriter = inlinesToJATS
, jatsBlockWriter = blockToJATS
, jatsReferences = refs
}
let initialState = JATSState { jatsNotes = [] }
runReaderT (evalStateT (docToJATS opts d) initialState)
environment
-- | Convert Pandoc document to string in JATS format.
docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> JATS m Text
@ -258,7 +264,10 @@ blockToJATS opts (Div (ident,_,_) [Para lst]) | "ref-" `T.isPrefixOf` ident =
inTagsSimple "mixed-citation" <$>
inlinesToJATS opts lst
blockToJATS opts (Div ("refs",_,_) xs) = do
contents <- blocksToJATS opts xs
refs <- asks jatsReferences
contents <- if null refs
then blocksToJATS opts xs
else referencesToJATS opts refs
return $ inTagsIndented "ref-list" contents
blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do
contents <- blocksToJATS opts bs

View file

@ -0,0 +1,160 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Writers.JATS.References
Copyright : © 2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb@zeitkraut.de>
Stability : alpha
Portability : portable
Creation of a bibliography list using @<element-citation>@ elements in
reference items.
-}
module Text.Pandoc.Writers.JATS.References
( referencesToJATS
, referenceToJATS
) where
import Citeproc.Pandoc ()
import Citeproc.Types
( Date (..), DateParts (..), ItemId (..), Name (..), Reference (..)
, Val (..) , lookupVariable, valToText
)
import Data.Text (Text)
import Text.DocLayout (Doc, empty, isEmpty, literal, vcat)
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Builder (Inlines)
import Text.Pandoc.Options (WriterOptions)
import Text.Pandoc.Shared (tshow)
import Text.Pandoc.Writers.JATS.Types
import Text.Pandoc.XML (inTags)
import qualified Data.Text as T
referencesToJATS :: PandocMonad m
=> WriterOptions
-> [Reference Inlines]
-> JATS m (Doc Text)
referencesToJATS opts =
fmap (inTags True "ref-list" [] . vcat) . mapM (referenceToJATS opts)
referenceToJATS :: PandocMonad m
=> WriterOptions
-> Reference Inlines
-> JATS m (Doc Text)
referenceToJATS _opts ref = do
let refType = referenceType ref
let pubType = [("publication-type", refType) | not (T.null refType)]
let wrap = inTags True "ref" [("id", "ref-" <> unItemId (referenceId ref))]
. inTags True "element-citation" pubType
return . wrap . vcat $
[ authors
, "title" `varInTag`
if refType == "book"
then "source"
else "article-title"
, if refType == "book"
then empty
else "container-title" `varInTag` "source"
, editors
, "publisher" `varInTag` "publisher-name"
, "publisher-place" `varInTag` "publisher-loc"
, yearTag
, accessed
, "volume" `varInTag` "volume"
, "issue" `varInTag` "issue"
, "page-first" `varInTag` "fpage"
, "page-last" `varInTag` "lpage"
, "pages" `varInTag` "page-range"
, "ISBN" `varInTag` "isbn"
, "ISSN" `varInTag` "issn"
, varInTagWith "doi" "pub-id" [("pub-id-type", "doi")]
, varInTagWith "pmid" "pub-id" [("pub-id-type", "pmid")]
]
where
varInTag var tagName = varInTagWith var tagName []
varInTagWith var tagName tagAttribs =
case lookupVariable var ref >>= valToText of
Nothing -> mempty
Just val -> inTags' tagName tagAttribs $ literal val
authors = case lookupVariable "author" ref of
Just (NamesVal names) ->
inTags True "person-group" [("person-group-type", "author")] . vcat $
map toNameElements names
_ -> empty
editors = case lookupVariable "editor" ref of
Just (NamesVal names) ->
inTags True "person-group" [("person-group-type", "editor")] . vcat $
map toNameElements names
_ -> empty
yearTag =
case lookupVariable "issued" ref of
Just (DateVal date) -> toDateElements date
_ -> empty
accessed =
case lookupVariable "accessed" ref of
Just (DateVal d) -> inTags' "date-in-citation"
[("content-type", "access-date")]
(toDateElements d)
_ -> empty
toDateElements :: Date -> Doc Text
toDateElements date =
case dateParts date of
dp@(DateParts (y:m:d:_)):_ -> yearElement y dp <>
monthElement m <>
dayElement d
dp@(DateParts (y:m:_)):_ -> yearElement y dp <> monthElement m
dp@(DateParts (y:_)):_ -> yearElement y dp
_ -> empty
yearElement :: Int -> DateParts -> Doc Text
yearElement year dp =
inTags' "year" [("iso-8601-date", iso8601 dp)] $ literal (fourDigits year)
monthElement :: Int -> Doc Text
monthElement month = inTags' "month" [] . literal $ twoDigits month
dayElement :: Int -> Doc Text
dayElement day = inTags' "day" [] . literal $ twoDigits day
iso8601 :: DateParts -> Text
iso8601 = T.intercalate "-" . \case
DateParts (y:m:d:_) -> [fourDigits y, twoDigits m, twoDigits d]
DateParts (y:m:_) -> [fourDigits y, twoDigits m]
DateParts (y:_) -> [fourDigits y]
_ -> []
twoDigits :: Int -> Text
twoDigits n = T.takeEnd 2 $ '0' `T.cons` tshow n
fourDigits :: Int -> Text
fourDigits n = T.takeEnd 4 $ "000" <> tshow n
toNameElements :: Name -> Doc Text
toNameElements name =
if not (isEmpty nameTags)
then inTags' "name" [] nameTags
else nameLiteral name `inNameTag` "string-name"
where
inNameTag val tag = maybe empty (inTags' tag [] . literal) val
surnamePrefix = maybe mempty (`T.snoc` ' ') $
nameNonDroppingParticle name
givenSuffix = maybe mempty (T.cons ' ') $
nameDroppingParticle name
nameTags = mconcat
[ ((surnamePrefix <>) <$> nameFamily name) `inNameTag` "surname"
, ((<> givenSuffix) <$> nameGiven name) `inNameTag` "given-names"
, nameSuffix name `inNameTag` "suffix"
]
-- | Put the supplied contents between start and end tags of tagType,
-- with specified attributes.
inTags' :: Text -> [(Text, Text)] -> Doc Text -> Doc Text
inTags' = inTags False

View file

@ -17,11 +17,12 @@ module Text.Pandoc.Writers.JATS.Types
)
where
import Citeproc.Types (Reference)
import Control.Monad.Reader (ReaderT)
import Control.Monad.State (StateT)
import Data.Text (Text)
import Text.DocLayout (Doc)
import Text.Pandoc.Definition (Block, Inline)
import Text.Pandoc.Builder (Block, Inline, Inlines)
import Text.Pandoc.Options (WriterOptions)
-- | JATS tag set variant
@ -40,6 +41,7 @@ data JATSEnv m = JATSEnv
{ jatsTagSet :: JATSTagSet
, jatsInlinesWriter :: WriterOptions -> [Inline] -> JATS m (Doc Text)
, jatsBlockWriter :: WriterOptions -> Block -> JATS m (Doc Text)
, jatsReferences :: [Reference Inlines]
}
-- | JATS writer type

146
test/command/7042.md Normal file
View file

@ -0,0 +1,146 @@
```
% pandoc -f markdown -t jats_publishing+element_citations --citeproc -s
---
nocite: "[@*]"
references:
- author:
- family: Jane
given: Doe
container-title: Public Library of Tests
id: year-month
issued: 1999-08
title: Year and month
type: article-journal
- accessed: 1999-01-22
author:
- family: Negidius
given: Numerius
container-title: Public Library of Tests
id: access-date
issued: 1911-10-03
title: Entry with access date
type: article-journal
- author:
- family: Beethoven
given: Ludwig
dropping-particle: van
- family: Bray
given: Jan
non-dropping-particle: de
container-title: Public Library of Tests
id: name-particles
issued: 1820
title: Name particles, dropping and non-dropping
type: article-journal
- author:
- 宮水 三葉
- 立花 瀧
title: Big Book of Tests
id: book-with-japanese-authors
issued: 2016
type: book
- author:
- family: Watson
given: J. D.
- family: Crick
given: F. H. C.
container-title: Nature
doi: '10.1038/171737a0'
id: full-journal-article-entry
issue: 4356
issued: '1953-04-01'
pages: 737-738
pmid: 13054692
title: 'Molecular Structure of Nucleic Acids: A Structure for Deoxyribose Nucleic Acid'
type: article-journal
volume: 171
...
^D
<?xml version="1.0" encoding="utf-8" ?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN"
"JATS-publishing1.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.2" article-type="other">
<front>
<journal-meta>
<journal-id></journal-id>
<journal-title-group>
</journal-title-group>
<issn></issn>
<publisher>
<publisher-name></publisher-name>
</publisher>
</journal-meta>
<article-meta>
</article-meta>
</front>
<body>
</body>
<back>
<ref-list>
<ref-list>
<ref id="ref-year-month">
<element-citation publication-type="article-journal">
<person-group person-group-type="author">
<name><surname>Jane</surname><given-names>Doe</given-names></name>
</person-group>
<article-title>Year and month</article-title>
<source>Public Library of Tests</source>
<year iso-8601-date="1999-08">1999</year><month>08</month>
</element-citation>
</ref>
<ref id="ref-access-date">
<element-citation publication-type="article-journal">
<person-group person-group-type="author">
<name><surname>Negidius</surname><given-names>Numerius</given-names></name>
</person-group>
<article-title>Entry with access date</article-title>
<source>Public Library of Tests</source>
<year iso-8601-date="1911-10-03">1911</year><month>10</month><day>03</day>
<date-in-citation content-type="access-date"><year iso-8601-date="1999-01-22">1999</year><month>01</month><day>22</day></date-in-citation>
</element-citation>
</ref>
<ref id="ref-name-particles">
<element-citation publication-type="article-journal">
<person-group person-group-type="author">
<name><surname>Beethoven</surname><given-names>Ludwig van</given-names></name>
<name><surname>de Bray</surname><given-names>Jan</given-names></name>
</person-group>
<article-title>Name particles, dropping and non-dropping</article-title>
<source>Public Library of Tests</source>
<year iso-8601-date="1820">1820</year>
</element-citation>
</ref>
<ref id="ref-book-with-japanese-authors">
<element-citation publication-type="book">
<person-group person-group-type="author">
<string-name>宮水 三葉</string-name>
<string-name>立花 瀧</string-name>
</person-group>
<source>Big Book of Tests</source>
<year iso-8601-date="2016">2016</year>
</element-citation>
</ref>
<ref id="ref-full-journal-article-entry">
<element-citation publication-type="article-journal">
<person-group person-group-type="author">
<name><surname>Watson</surname><given-names>J. D.</given-names></name>
<name><surname>Crick</surname><given-names>F. H. C.</given-names></name>
</person-group>
<article-title>Molecular Structure of Nucleic Acids: A Structure for Deoxyribose Nucleic Acid</article-title>
<source>Nature</source>
<year iso-8601-date="1953-04-01">1953</year><month>04</month><day>01</day>
<volume>171</volume>
<issue>4356</issue>
<fpage>737</fpage>
<page-range>737-738</page-range>
<pub-id pub-id-type="doi">10.1038/171737a0</pub-id>
<pub-id pub-id-type="pmid">13054692</pub-id>
</element-citation>
</ref>
</ref-list>
</ref-list>
</back>
</article>
```