lang
variable is now in BCP47 format
strings are converted for LaTeX and ConTeXt output, closes #1614
This commit is contained in:
parent
a0ddabb206
commit
622df7034c
3 changed files with 192 additions and 16 deletions
20
README
20
README
|
@ -945,7 +945,19 @@ as `title`, `author`, and `date`) as well as the following:
|
||||||
: body of document
|
: body of document
|
||||||
|
|
||||||
`lang`
|
`lang`
|
||||||
: language code for HTML or LaTeX documents
|
: The `lang` variable should be set by the user to a language
|
||||||
|
code according to [BCP 47] (e.g. `en` or `en-GB`).
|
||||||
|
For some output formats, pandoc will convert it to an approriate
|
||||||
|
format stored in the additional variables `babel-lang`,
|
||||||
|
`polyglossia-lang`, `polyglossia-variant` (LaTeX)
|
||||||
|
and `context-lang` (ConTeXt).
|
||||||
|
|
||||||
|
`otherlangs`
|
||||||
|
: Should be set to a list of other languages used in the document
|
||||||
|
in the YAML metadata, according to [BCP 47]. For example:
|
||||||
|
`otherlangs: [en-GB, fr]`.
|
||||||
|
Currently only used by XeTeX through the generated
|
||||||
|
`polyglossia-otherlangs` variable.
|
||||||
|
|
||||||
`slidy-url`
|
`slidy-url`
|
||||||
: base URL for Slidy documents (defaults to
|
: base URL for Slidy documents (defaults to
|
||||||
|
@ -3264,8 +3276,8 @@ The following fields are recognized:
|
||||||
~ A string value in `YYYY-MM-DD` format. (Only the year is necessary.)
|
~ A string value in `YYYY-MM-DD` format. (Only the year is necessary.)
|
||||||
Pandoc will attempt to convert other common date formats.
|
Pandoc will attempt to convert other common date formats.
|
||||||
|
|
||||||
`language`
|
`lang` (or legacy: `language`)
|
||||||
~ A string value in [RFC5646] format. Pandoc will default to the local
|
~ A string value in [BCP 47] format. Pandoc will default to the local
|
||||||
language if nothing is specified.
|
language if nothing is specified.
|
||||||
|
|
||||||
`subject`
|
`subject`
|
||||||
|
@ -3549,7 +3561,7 @@ Xavier Olive.
|
||||||
[FictionBook2]: http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1
|
[FictionBook2]: http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1
|
||||||
[lua]: http://www.lua.org
|
[lua]: http://www.lua.org
|
||||||
[marc relators]: http://www.loc.gov/marc/relators/relaterm.html
|
[marc relators]: http://www.loc.gov/marc/relators/relaterm.html
|
||||||
[RFC5646]: http://tools.ietf.org/html/rfc5646
|
[BCP 47]: https://tools.ietf.org/html/bcp47
|
||||||
[InDesign ICML]: https://www.adobe.com/content/dam/Adobe/en/devnet/indesign/cs55-docs/IDML/idml-specification.pdf
|
[InDesign ICML]: https://www.adobe.com/content/dam/Adobe/en/devnet/indesign/cs55-docs/IDML/idml-specification.pdf
|
||||||
[txt2tags]: http://txt2tags.org/
|
[txt2tags]: http://txt2tags.org/
|
||||||
[EPUB]: http://idpf.org/epub
|
[EPUB]: http://idpf.org/epub
|
||||||
|
|
|
@ -80,12 +80,12 @@ pandocToConTeXt options (Pandoc meta blocks) = do
|
||||||
"subsubsubsection","subsubsubsubsection"])
|
"subsubsubsection","subsubsubsubsection"])
|
||||||
$ defField "body" main
|
$ defField "body" main
|
||||||
$ defField "number-sections" (writerNumberSections options)
|
$ defField "number-sections" (writerNumberSections options)
|
||||||
$ defField "mainlang" (maybe ""
|
|
||||||
(reverse . takeWhile (/=',') . reverse)
|
|
||||||
(lookup "lang" $ writerVariables options))
|
|
||||||
$ metadata
|
$ metadata
|
||||||
|
let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $
|
||||||
|
getField "lang" context)
|
||||||
|
context
|
||||||
return $ if writerStandalone options
|
return $ if writerStandalone options
|
||||||
then renderTemplate' (writerTemplate options) context
|
then renderTemplate' (writerTemplate options) context'
|
||||||
else main
|
else main
|
||||||
|
|
||||||
-- escape things as needed for ConTeXt
|
-- escape things as needed for ConTeXt
|
||||||
|
@ -362,3 +362,35 @@ sectionHeader (ident,classes,_) hdrLevel lst = do
|
||||||
then char '\\' <> chapter <> braces contents
|
then char '\\' <> chapter <> braces contents
|
||||||
else contents <> blankline
|
else contents <> blankline
|
||||||
|
|
||||||
|
-- Takes a list of the constituents of a BCP 47 language code
|
||||||
|
-- and irons out ConTeXt's exceptions
|
||||||
|
-- https://tools.ietf.org/html/bcp47#section-2.1
|
||||||
|
-- http://wiki.contextgarden.net/Language_Codes
|
||||||
|
fromBcp47 :: [String] -> String
|
||||||
|
fromBcp47 [] = ""
|
||||||
|
fromBcp47 ("ar":"SY":_) = "ar-sy"
|
||||||
|
fromBcp47 ("ar":"IQ":_) = "ar-iq"
|
||||||
|
fromBcp47 ("ar":"JO":_) = "ar-jo"
|
||||||
|
fromBcp47 ("ar":"LB":_) = "ar-lb"
|
||||||
|
fromBcp47 ("ar":"DZ":_) = "ar-dz"
|
||||||
|
fromBcp47 ("ar":"MA":_) = "ar-ma"
|
||||||
|
fromBcp47 ("de":"1901":_) = "deo"
|
||||||
|
fromBcp47 ("de":"DE":_) = "de-de"
|
||||||
|
fromBcp47 ("de":"AT":_) = "de-at"
|
||||||
|
fromBcp47 ("de":"CH":_) = "de-ch"
|
||||||
|
fromBcp47 ("el":"poly":_) = "agr"
|
||||||
|
fromBcp47 ("en":"US":_) = "en-us"
|
||||||
|
fromBcp47 ("en":"GB":_) = "en-gb"
|
||||||
|
fromBcp47 ("grc":_) = "agr"
|
||||||
|
fromBcp47 x = fromIso $ head x
|
||||||
|
where
|
||||||
|
fromIso "cz" = "cs"
|
||||||
|
fromIso "el" = "gr"
|
||||||
|
fromIso "eu" = "ba"
|
||||||
|
fromIso "he" = "il"
|
||||||
|
fromIso "jp" = "ja"
|
||||||
|
fromIso "uk" = "ua"
|
||||||
|
fromIso "vi" = "vn"
|
||||||
|
fromIso "zh" = "cn"
|
||||||
|
fromIso l = l
|
||||||
|
|
||||||
|
|
|
@ -144,11 +144,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do
|
||||||
st <- get
|
st <- get
|
||||||
titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
|
titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
|
||||||
authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
|
authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
|
||||||
let (mainlang, otherlang) =
|
|
||||||
case (reverse . splitBy (==',') . filter (/=' ')) `fmap`
|
|
||||||
getField "lang" metadata of
|
|
||||||
Just (m:os) -> (m, reverse os)
|
|
||||||
_ -> ("", [])
|
|
||||||
let context = defField "toc" (writerTableOfContents options) $
|
let context = defField "toc" (writerTableOfContents options) $
|
||||||
defField "toc-depth" (show (writerTOCDepth options -
|
defField "toc-depth" (show (writerTOCDepth options -
|
||||||
if stBook st
|
if stBook st
|
||||||
|
@ -173,8 +168,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do
|
||||||
defField "euro" (stUsesEuro st) $
|
defField "euro" (stUsesEuro st) $
|
||||||
defField "listings" (writerListings options || stLHS st) $
|
defField "listings" (writerListings options || stLHS st) $
|
||||||
defField "beamer" (writerBeamer options) $
|
defField "beamer" (writerBeamer options) $
|
||||||
defField "mainlang" mainlang $
|
|
||||||
defField "otherlang" otherlang $
|
|
||||||
(if stHighlighting st
|
(if stHighlighting st
|
||||||
then defField "highlighting-macros" (styleToLaTeX
|
then defField "highlighting-macros" (styleToLaTeX
|
||||||
$ writerHighlightStyle options )
|
$ writerHighlightStyle options )
|
||||||
|
@ -186,8 +179,18 @@ pandocToLaTeX options (Pandoc meta blocks) = do
|
||||||
defField "biblatex" True
|
defField "biblatex" True
|
||||||
_ -> id) $
|
_ -> id) $
|
||||||
metadata
|
metadata
|
||||||
|
let lang = maybe [] (splitBy (=='-')) $ getField "lang" context
|
||||||
|
(polyLang, polyVar) = toPolyglossia lang
|
||||||
|
let context' =
|
||||||
|
defField "babel-lang" (toBabel lang)
|
||||||
|
$ defField "polyglossia-lang" polyLang
|
||||||
|
$ defField "polyglossia-variant" polyVar
|
||||||
|
$ defField "polyglossia-otherlangs"
|
||||||
|
(maybe [] (map $ fst . toPolyglossia . splitBy (=='-')) $
|
||||||
|
getField "otherlangs" context)
|
||||||
|
$ context
|
||||||
return $ if writerStandalone options
|
return $ if writerStandalone options
|
||||||
then renderTemplate' template context
|
then renderTemplate' template context'
|
||||||
else main
|
else main
|
||||||
|
|
||||||
-- | Convert Elements to LaTeX
|
-- | Convert Elements to LaTeX
|
||||||
|
@ -980,3 +983,132 @@ citationsToBiblatex _ = return empty
|
||||||
getListingsLanguage :: [String] -> Maybe String
|
getListingsLanguage :: [String] -> Maybe String
|
||||||
getListingsLanguage [] = Nothing
|
getListingsLanguage [] = Nothing
|
||||||
getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs
|
getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs
|
||||||
|
|
||||||
|
-- Takes a list of the constituents of a BCP 47 language code and
|
||||||
|
-- converts it to a Polyglossia (language, variant) tuple
|
||||||
|
-- http://mirrors.concertpass.com/tex-archive/macros/latex/contrib/polyglossia/polyglossia.pdf
|
||||||
|
toPolyglossia :: [String] -> (String, String)
|
||||||
|
toPolyglossia ("de":"AT":_) = ("german", "austrian")
|
||||||
|
toPolyglossia ("de":"CH":_) = ("german", "swiss")
|
||||||
|
toPolyglossia ("de":_) = ("german", "")
|
||||||
|
toPolyglossia ("dsb":_) = ("lsorbian", "")
|
||||||
|
toPolyglossia ("el":"poly":_) = ("greek", "poly")
|
||||||
|
toPolyglossia ("en":"AU":_) = ("english", "australian")
|
||||||
|
toPolyglossia ("en":"CA":_) = ("english", "canadian")
|
||||||
|
toPolyglossia ("en":"GB":_) = ("english", "british")
|
||||||
|
toPolyglossia ("en":"NZ":_) = ("english", "newzealand")
|
||||||
|
toPolyglossia ("en":"UK":_) = ("english", "british")
|
||||||
|
toPolyglossia ("en":"US":_) = ("english", "american")
|
||||||
|
toPolyglossia ("grc":_) = ("greek", "ancient")
|
||||||
|
toPolyglossia ("hsb":_) = ("usorbian", "")
|
||||||
|
toPolyglossia ("sl":_) = ("slovenian", "")
|
||||||
|
toPolyglossia x = (commonFromBcp47 x, "")
|
||||||
|
|
||||||
|
-- Takes a list of the constituents of a BCP 47 language code and
|
||||||
|
-- converts it to a Babel language string.
|
||||||
|
-- http://mirrors.concertpass.com/tex-archive/macros/latex/required/babel/base/babel.pdf
|
||||||
|
-- Note that the PDF unfortunately does not contain a complete list of supported languages.
|
||||||
|
toBabel :: [String] -> String
|
||||||
|
toBabel ("de":"1901":_) = "german"
|
||||||
|
toBabel ("de":"AT":"1901":_) = "austrian"
|
||||||
|
toBabel ("de":"AT":_) = "naustrian"
|
||||||
|
toBabel ("de":_) = "ngerman"
|
||||||
|
toBabel ("dsb":_) = "lowersorbian"
|
||||||
|
toBabel ("el":"poly":_) = "polutonikogreek"
|
||||||
|
toBabel ("en":"AU":_) = "australian"
|
||||||
|
toBabel ("en":"CA":_) = "canadian"
|
||||||
|
toBabel ("en":"GB":_) = "british"
|
||||||
|
toBabel ("en":"NZ":_) = "newzealand"
|
||||||
|
toBabel ("en":"UK":_) = "british"
|
||||||
|
toBabel ("en":"US":_) = "american"
|
||||||
|
toBabel ("fr":"CA":_) = "canadien"
|
||||||
|
toBabel ("fra":"aca":_) = "acadian"
|
||||||
|
toBabel ("grc":_) = "polutonikogreek"
|
||||||
|
toBabel ("hsb":_) = "uppersorbian"
|
||||||
|
toBabel ("sl":_) = "slovene"
|
||||||
|
toBabel x = commonFromBcp47 x
|
||||||
|
|
||||||
|
-- Takes a list of the constituents of a BCP 47 language code
|
||||||
|
-- and converts it to a string shared by Babel and Polyglossia.
|
||||||
|
-- https://tools.ietf.org/html/bcp47#section-2.1
|
||||||
|
commonFromBcp47 :: [String] -> String
|
||||||
|
commonFromBcp47 [] = ""
|
||||||
|
commonFromBcp47 ("pt":"BR":_) = "brazilian"
|
||||||
|
commonFromBcp47 x = fromIso $ head x
|
||||||
|
where
|
||||||
|
fromIso "af" = "afrikaans"
|
||||||
|
fromIso "am" = "amharic"
|
||||||
|
fromIso "ar" = "arabic"
|
||||||
|
fromIso "ast" = "asturian"
|
||||||
|
fromIso "bg" = "bulgarian"
|
||||||
|
fromIso "bn" = "bengali"
|
||||||
|
fromIso "bo" = "tibetan"
|
||||||
|
fromIso "br" = "breton"
|
||||||
|
fromIso "ca" = "catalan"
|
||||||
|
fromIso "cy" = "welsh"
|
||||||
|
fromIso "cz" = "czech"
|
||||||
|
fromIso "cop" = "coptic"
|
||||||
|
fromIso "da" = "danish"
|
||||||
|
fromIso "dv" = "divehi"
|
||||||
|
fromIso "el" = "greek"
|
||||||
|
fromIso "en" = "english"
|
||||||
|
fromIso "eo" = "esperanto"
|
||||||
|
fromIso "es" = "spanish"
|
||||||
|
fromIso "et" = "estonian"
|
||||||
|
fromIso "eu" = "basque"
|
||||||
|
fromIso "fa" = "farsi"
|
||||||
|
fromIso "fi" = "finnish"
|
||||||
|
fromIso "fr" = "french"
|
||||||
|
fromIso "fur" = "friulan"
|
||||||
|
fromIso "ga" = "irish"
|
||||||
|
fromIso "gd" = "scottish"
|
||||||
|
fromIso "gl" = "galician"
|
||||||
|
fromIso "he" = "hebrew"
|
||||||
|
fromIso "hi" = "hindi"
|
||||||
|
fromIso "hr" = "croatian"
|
||||||
|
fromIso "hy" = "armenian"
|
||||||
|
fromIso "hu" = "magyar"
|
||||||
|
fromIso "ia" = "interlingua"
|
||||||
|
fromIso "id" = "indonesian"
|
||||||
|
fromIso "ie" = "interlingua"
|
||||||
|
fromIso "is" = "icelandic"
|
||||||
|
fromIso "it" = "italian"
|
||||||
|
fromIso "jp" = "japanese"
|
||||||
|
fromIso "km" = "khmer"
|
||||||
|
fromIso "kn" = "kannada"
|
||||||
|
fromIso "ko" = "korean"
|
||||||
|
fromIso "la" = "latin"
|
||||||
|
fromIso "lo" = "lao"
|
||||||
|
fromIso "lt" = "lithuanian"
|
||||||
|
fromIso "lv" = "latvian"
|
||||||
|
fromIso "ml" = "malayalam"
|
||||||
|
fromIso "mn" = "mongolian"
|
||||||
|
fromIso "mr" = "marathi"
|
||||||
|
fromIso "nb" = "norsk"
|
||||||
|
fromIso "nl" = "dutch"
|
||||||
|
fromIso "nn" = "nynorsk"
|
||||||
|
fromIso "no" = "norsk"
|
||||||
|
fromIso "nqo" = "nko"
|
||||||
|
fromIso "oc" = "occitan"
|
||||||
|
fromIso "pl" = "polish"
|
||||||
|
fromIso "pms" = "piedmontese"
|
||||||
|
fromIso "pt" = "portuguese"
|
||||||
|
fromIso "rm" = "romansh"
|
||||||
|
fromIso "ro" = "romanian"
|
||||||
|
fromIso "ru" = "russian"
|
||||||
|
fromIso "sa" = "sanskrit"
|
||||||
|
fromIso "se" = "samin"
|
||||||
|
fromIso "sk" = "slovak"
|
||||||
|
fromIso "sq" = "albanian"
|
||||||
|
fromIso "sr" = "serbian"
|
||||||
|
fromIso "sv" = "swedish"
|
||||||
|
fromIso "syr" = "syriac"
|
||||||
|
fromIso "ta" = "tamil"
|
||||||
|
fromIso "te" = "telugu"
|
||||||
|
fromIso "th" = "thai"
|
||||||
|
fromIso "tk" = "turkmen"
|
||||||
|
fromIso "tr" = "turkish"
|
||||||
|
fromIso "uk" = "ukrainian"
|
||||||
|
fromIso "ur" = "urdu"
|
||||||
|
fromIso "vi" = "vietnamese"
|
||||||
|
fromIso _ = ""
|
||||||
|
|
Loading…
Add table
Reference in a new issue