LaTeX writer: use BCP47 parser.
This commit is contained in:
parent
700a0843b2
commit
b2fe009d8f
1 changed files with 105 additions and 89 deletions
|
@ -39,12 +39,13 @@ import Control.Monad.State.Strict
|
|||
import Data.Aeson (FromJSON, object, (.=))
|
||||
import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord,
|
||||
toLower)
|
||||
import Data.List (foldl', intercalate, intersperse, isInfixOf, nub, nubBy,
|
||||
import Data.List (foldl', intercalate, intersperse, isInfixOf, nubBy,
|
||||
stripPrefix, (\\))
|
||||
import Data.Maybe (catMaybes, fromMaybe, isJust)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Network.URI (unEscapeString)
|
||||
import Text.Pandoc.BCP47 (Lang(..), toLang, getLang, renderLang)
|
||||
import Text.Pandoc.Class (PandocMonad, report)
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
|
||||
|
@ -188,7 +189,8 @@ pandocToLaTeX options (Pandoc meta blocks) = do
|
|||
st <- get
|
||||
titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
|
||||
authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
|
||||
let docLangs = nub $ query (extract "lang") blocks
|
||||
docLangs <- catMaybes <$>
|
||||
mapM (toLang . Just) (ordNub (query (extract "lang") blocks))
|
||||
let hasStringValue x = isJust (getField x metadata :: Maybe String)
|
||||
let geometryFromMargins = intercalate [','] $ catMaybes $
|
||||
map (\(x,y) ->
|
||||
|
@ -198,6 +200,18 @@ pandocToLaTeX options (Pandoc meta blocks) = do
|
|||
,("tmargin","margin-top")
|
||||
,("bmargin","margin-bottom")
|
||||
]
|
||||
let toPolyObj lang = object [ "name" .= T.pack name
|
||||
, "options" .= T.pack opts ]
|
||||
where
|
||||
(name, opts) = toPolyglossia lang
|
||||
mblang <- toLang $ case getLang options meta of
|
||||
Just l -> Just l
|
||||
Nothing | null docLangs -> Nothing
|
||||
| otherwise -> Just "en"
|
||||
-- we need a default here since lang is used in template conditionals
|
||||
|
||||
let dirs = query (extract "dir") blocks
|
||||
|
||||
let context = defField "toc" (writerTableOfContents options) $
|
||||
defField "toc-depth" (show (writerTOCDepth options -
|
||||
if stBook st
|
||||
|
@ -235,26 +249,20 @@ pandocToLaTeX options (Pandoc meta blocks) = do
|
|||
Biblatex -> defField "biblio-title" biblioTitle .
|
||||
defField "biblatex" True
|
||||
_ -> id) $
|
||||
-- set lang to something so polyglossia/babel is included
|
||||
defField "lang" (if null docLangs then ""::String else "en") $
|
||||
defField "otherlangs" docLangs $
|
||||
defField "colorlinks" (any hasStringValue
|
||||
["citecolor", "urlcolor", "linkcolor", "toccolor"]) $
|
||||
defField "dir" (if (null $ query (extract "dir") blocks)
|
||||
then ""::String
|
||||
else "ltr") $
|
||||
(if null dirs
|
||||
then id
|
||||
else defField "dir" ("ltr" :: String)) $
|
||||
defField "section-titles" True $
|
||||
defField "geometry" geometryFromMargins $
|
||||
metadata
|
||||
let toPolyObj lang = object [ "name" .= T.pack name
|
||||
, "options" .= T.pack opts ]
|
||||
where
|
||||
(name, opts) = toPolyglossia lang
|
||||
let lang = maybe [] (splitBy (=='-')) $ getField "lang" context
|
||||
otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context
|
||||
let context' =
|
||||
defField "babel-lang" (toBabel lang)
|
||||
$ defField "babel-otherlangs" (map toBabel otherlangs)
|
||||
-- note: lang is used in some conditionals in the template,
|
||||
-- so we need to set it if we have any babel/polyglossia:
|
||||
maybe id (defField "lang" . renderLang) mblang
|
||||
$ maybe id (defField "babel-lang" . toBabel) mblang
|
||||
$ defField "babel-otherlangs" (map toBabel docLangs)
|
||||
$ defField "babel-newcommands" (concatMap (\(poly, babel) ->
|
||||
-- \textspanish and \textgalician are already used by babel
|
||||
-- save them as \oritext... and let babel use that
|
||||
|
@ -274,16 +282,12 @@ pandocToLaTeX options (Pandoc meta blocks) = do
|
|||
-- eliminate duplicates that have same polyglossia name
|
||||
$ nubBy (\a b -> fst a == fst b)
|
||||
-- find polyglossia and babel names of languages used in the document
|
||||
$ map (\l ->
|
||||
let lng = splitBy (=='-') l
|
||||
in (fst $ toPolyglossia lng, toBabel lng)
|
||||
)
|
||||
docLangs )
|
||||
$ defField "polyglossia-lang" (toPolyObj lang)
|
||||
$ defField "polyglossia-otherlangs" (map toPolyObj otherlangs)
|
||||
$ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of
|
||||
Just "rtl" -> True
|
||||
_ -> False)
|
||||
$ map (\l -> (fst $ toPolyglossia l, toBabel l)) docLangs
|
||||
)
|
||||
$ maybe id (defField "polyglossia-lang" . toPolyObj) mblang
|
||||
$ defField "polyglossia-otherlangs" (map toPolyObj docLangs)
|
||||
$ defField "latex-dir-rtl"
|
||||
(getField "dir" context == Just ("rtl" :: String))
|
||||
$ context
|
||||
case writerTemplate options of
|
||||
Nothing -> return main
|
||||
|
@ -443,11 +447,12 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do
|
|||
-> "\\leavevmode" <> linkAnchor' <> "%"
|
||||
_ -> linkAnchor'
|
||||
let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
|
||||
lang <- toLang $ lookup "lang" kvs
|
||||
let wrapDir = case lookup "dir" kvs of
|
||||
Just "rtl" -> align "RTL"
|
||||
Just "ltr" -> align "LTR"
|
||||
_ -> id
|
||||
wrapLang txt = case lookup "lang" kvs of
|
||||
wrapLang txt = case lang of
|
||||
Just lng -> let (l, o) = toPolyglossiaEnv lng
|
||||
ops = if null o
|
||||
then ""
|
||||
|
@ -918,13 +923,14 @@ inlineToLaTeX :: PandocMonad m
|
|||
-> LW m Doc
|
||||
inlineToLaTeX (Span (id',classes,kvs) ils) = do
|
||||
linkAnchor <- hypertarget False id' empty
|
||||
lang <- toLang $ lookup "lang" kvs
|
||||
let cmds = ["textup" | "csl-no-emph" `elem` classes] ++
|
||||
["textnormal" | "csl-no-strong" `elem` classes ||
|
||||
"csl-no-smallcaps" `elem` classes] ++
|
||||
["RL" | ("dir", "rtl") `elem` kvs] ++
|
||||
["LR" | ("dir", "ltr") `elem` kvs] ++
|
||||
(case lookup "lang" kvs of
|
||||
Just lng -> let (l, o) = toPolyglossia $ splitBy (=='-') lng
|
||||
(case lang of
|
||||
Just lng -> let (l, o) = toPolyglossia lng
|
||||
ops = if null o then "" else ("[" ++ o ++ "]")
|
||||
in ["text" ++ l ++ ops]
|
||||
Nothing -> [])
|
||||
|
@ -1254,85 +1260,95 @@ lookKey :: String -> Attr -> [String]
|
|||
lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs
|
||||
|
||||
-- In environments \Arabic instead of \arabic is used
|
||||
toPolyglossiaEnv :: String -> (String, String)
|
||||
toPolyglossiaEnv :: Lang -> (String, String)
|
||||
toPolyglossiaEnv l =
|
||||
case toPolyglossia $ (splitBy (=='-')) l of
|
||||
case toPolyglossia l of
|
||||
("arabic", o) -> ("Arabic", o)
|
||||
x -> x
|
||||
|
||||
-- Takes a list of the constituents of a BCP 47 language code and
|
||||
-- converts it to a Polyglossia (language, options) tuple
|
||||
-- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf
|
||||
toPolyglossia :: [String] -> (String, String)
|
||||
toPolyglossia ("ar":"DZ":_) = ("arabic", "locale=algeria")
|
||||
toPolyglossia ("ar":"IQ":_) = ("arabic", "locale=mashriq")
|
||||
toPolyglossia ("ar":"JO":_) = ("arabic", "locale=mashriq")
|
||||
toPolyglossia ("ar":"LB":_) = ("arabic", "locale=mashriq")
|
||||
toPolyglossia ("ar":"LY":_) = ("arabic", "locale=libya")
|
||||
toPolyglossia ("ar":"MA":_) = ("arabic", "locale=morocco")
|
||||
toPolyglossia ("ar":"MR":_) = ("arabic", "locale=mauritania")
|
||||
toPolyglossia ("ar":"PS":_) = ("arabic", "locale=mashriq")
|
||||
toPolyglossia ("ar":"SY":_) = ("arabic", "locale=mashriq")
|
||||
toPolyglossia ("ar":"TN":_) = ("arabic", "locale=tunisia")
|
||||
toPolyglossia ("de":"1901":_) = ("german", "spelling=old")
|
||||
toPolyglossia ("de":"AT":"1901":_) = ("german", "variant=austrian, spelling=old")
|
||||
toPolyglossia ("de":"AT":_) = ("german", "variant=austrian")
|
||||
toPolyglossia ("de":"CH":"1901":_) = ("german", "variant=swiss, spelling=old")
|
||||
toPolyglossia ("de":"CH":_) = ("german", "variant=swiss")
|
||||
toPolyglossia ("de":_) = ("german", "")
|
||||
toPolyglossia ("dsb":_) = ("lsorbian", "")
|
||||
toPolyglossia ("el":"polyton":_) = ("greek", "variant=poly")
|
||||
toPolyglossia ("en":"AU":_) = ("english", "variant=australian")
|
||||
toPolyglossia ("en":"CA":_) = ("english", "variant=canadian")
|
||||
toPolyglossia ("en":"GB":_) = ("english", "variant=british")
|
||||
toPolyglossia ("en":"NZ":_) = ("english", "variant=newzealand")
|
||||
toPolyglossia ("en":"UK":_) = ("english", "variant=british")
|
||||
toPolyglossia ("en":"US":_) = ("english", "variant=american")
|
||||
toPolyglossia ("grc":_) = ("greek", "variant=ancient")
|
||||
toPolyglossia ("hsb":_) = ("usorbian", "")
|
||||
toPolyglossia ("la":"x":"classic":_) = ("latin", "variant=classic")
|
||||
toPolyglossia ("sl":_) = ("slovenian", "")
|
||||
toPolyglossia x = (commonFromBcp47 x, "")
|
||||
toPolyglossia :: Lang -> (String, String)
|
||||
toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria")
|
||||
toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq")
|
||||
toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq")
|
||||
toPolyglossia (Lang "ar" _ "LB" _) = ("arabic", "locale=mashriq")
|
||||
toPolyglossia (Lang "ar" _ "LY" _) = ("arabic", "locale=libya")
|
||||
toPolyglossia (Lang "ar" _ "MA" _) = ("arabic", "locale=morocco")
|
||||
toPolyglossia (Lang "ar" _ "MR" _) = ("arabic", "locale=mauritania")
|
||||
toPolyglossia (Lang "ar" _ "PS" _) = ("arabic", "locale=mashriq")
|
||||
toPolyglossia (Lang "ar" _ "SY" _) = ("arabic", "locale=mashriq")
|
||||
toPolyglossia (Lang "ar" _ "TN" _) = ("arabic", "locale=tunisia")
|
||||
toPolyglossia (Lang "de" _ _ vars)
|
||||
| "1901" `elem` vars = ("german", "spelling=old")
|
||||
toPolyglossia (Lang "de" _ "AT" vars)
|
||||
| "1901" `elem` vars = ("german", "variant=austrian, spelling=old")
|
||||
toPolyglossia (Lang "de" _ "AT" _) = ("german", "variant=austrian")
|
||||
toPolyglossia (Lang "de" _ "CH" vars)
|
||||
| "1901" `elem` vars = ("german", "variant=swiss, spelling=old")
|
||||
toPolyglossia (Lang "de" _ "CH" _) = ("german", "variant=swiss")
|
||||
toPolyglossia (Lang "de" _ _ _) = ("german", "")
|
||||
toPolyglossia (Lang "dsb" _ _ _) = ("lsorbian", "")
|
||||
toPolyglossia (Lang "el" _ "polyton" _) = ("greek", "variant=poly")
|
||||
toPolyglossia (Lang "en" _ "AU" _) = ("english", "variant=australian")
|
||||
toPolyglossia (Lang "en" _ "CA" _) = ("english", "variant=canadian")
|
||||
toPolyglossia (Lang "en" _ "GB" _) = ("english", "variant=british")
|
||||
toPolyglossia (Lang "en" _ "NZ" _) = ("english", "variant=newzealand")
|
||||
toPolyglossia (Lang "en" _ "UK" _) = ("english", "variant=british")
|
||||
toPolyglossia (Lang "en" _ "US" _) = ("english", "variant=american")
|
||||
toPolyglossia (Lang "grc" _ _ _) = ("greek", "variant=ancient")
|
||||
toPolyglossia (Lang "hsb" _ _ _) = ("usorbian", "")
|
||||
toPolyglossia (Lang "la" _ _ vars)
|
||||
| "x-classic" `elem` vars = ("latin", "variant=classic")
|
||||
toPolyglossia (Lang "sl" _ _ _) = ("slovenian", "")
|
||||
toPolyglossia x = (commonFromBcp47 x, "")
|
||||
|
||||
-- Takes a list of the constituents of a BCP 47 language code and
|
||||
-- converts it to a Babel language string.
|
||||
-- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf
|
||||
-- List of supported languages (slightly outdated):
|
||||
-- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf
|
||||
toBabel :: [String] -> String
|
||||
toBabel ("de":"1901":_) = "german"
|
||||
toBabel ("de":"AT":"1901":_) = "austrian"
|
||||
toBabel ("de":"AT":_) = "naustrian"
|
||||
toBabel ("de":"CH":"1901":_) = "swissgerman"
|
||||
toBabel ("de":"CH":_) = "nswissgerman"
|
||||
toBabel ("de":_) = "ngerman"
|
||||
toBabel ("dsb":_) = "lowersorbian"
|
||||
toBabel ("el":"polyton":_) = "polutonikogreek"
|
||||
toBabel ("en":"AU":_) = "australian"
|
||||
toBabel ("en":"CA":_) = "canadian"
|
||||
toBabel ("en":"GB":_) = "british"
|
||||
toBabel ("en":"NZ":_) = "newzealand"
|
||||
toBabel ("en":"UK":_) = "british"
|
||||
toBabel ("en":"US":_) = "american"
|
||||
toBabel ("fr":"CA":_) = "canadien"
|
||||
toBabel ("fra":"aca":_) = "acadian"
|
||||
toBabel ("grc":_) = "polutonikogreek"
|
||||
toBabel ("hsb":_) = "uppersorbian"
|
||||
toBabel ("la":"x":"classic":_) = "classiclatin"
|
||||
toBabel ("sl":_) = "slovene"
|
||||
toBabel x = commonFromBcp47 x
|
||||
toBabel :: Lang -> String
|
||||
toBabel (Lang "de" _ "AT" vars)
|
||||
| "1901" `elem` vars = "austrian"
|
||||
| otherwise = "naustrian"
|
||||
toBabel (Lang "de" _ "CH" vars)
|
||||
| "1901" `elem` vars = "swissgerman"
|
||||
| otherwise = "nswissgerman"
|
||||
toBabel (Lang "de" _ _ vars)
|
||||
| "1901" `elem` vars = "german"
|
||||
| otherwise = "ngerman"
|
||||
toBabel (Lang "dsb" _ _ _) = "lowersorbian"
|
||||
toBabel (Lang "el" _ _ vars)
|
||||
| "polyton" `elem` vars = "polutonikogreek"
|
||||
toBabel (Lang "en" _ "AU" _) = "australian"
|
||||
toBabel (Lang "en" _ "CA" _) = "canadian"
|
||||
toBabel (Lang "en" _ "GB" _) = "british"
|
||||
toBabel (Lang "en" _ "NZ" _) = "newzealand"
|
||||
toBabel (Lang "en" _ "UK" _) = "british"
|
||||
toBabel (Lang "en" _ "US" _) = "american"
|
||||
toBabel (Lang "fr" _ "CA" _) = "canadien"
|
||||
toBabel (Lang "fra" _ _ vars)
|
||||
| "aca" `elem` vars = "acadian"
|
||||
toBabel (Lang "grc" _ _ _) = "polutonikogreek"
|
||||
toBabel (Lang "hsb" _ _ _) = "uppersorbian"
|
||||
toBabel (Lang "la" _ _ vars)
|
||||
| "x-classic" `elem` vars = "classiclatin"
|
||||
toBabel (Lang "sl" _ _ _) = "slovene"
|
||||
toBabel x = commonFromBcp47 x
|
||||
|
||||
-- Takes a list of the constituents of a BCP 47 language code
|
||||
-- and converts it to a string shared by Babel and Polyglossia.
|
||||
-- https://tools.ietf.org/html/bcp47#section-2.1
|
||||
commonFromBcp47 :: [String] -> String
|
||||
commonFromBcp47 [] = ""
|
||||
commonFromBcp47 ("pt":"BR":_) = "brazil"
|
||||
commonFromBcp47 :: Lang -> String
|
||||
commonFromBcp47 (Lang "pt" _ "BR" _) = "brazil"
|
||||
-- Note: documentation says "brazilian" works too, but it doesn't seem to work
|
||||
-- on some systems. See #2953.
|
||||
commonFromBcp47 ("sr":"Cyrl":_) = "serbianc"
|
||||
commonFromBcp47 ("zh":"Latn":"pinyin":_) = "pinyin"
|
||||
commonFromBcp47 x = fromIso $ head x
|
||||
commonFromBcp47 (Lang "sr" "Cyrl" _ _) = "serbianc"
|
||||
commonFromBcp47 (Lang "zh" "Latn" _ vars)
|
||||
| "pinyin" `elem` vars = "pinyin"
|
||||
commonFromBcp47 (Lang l _ _ _) = fromIso l
|
||||
where
|
||||
fromIso "af" = "afrikaans"
|
||||
fromIso "am" = "amharic"
|
||||
|
|
Loading…
Add table
Reference in a new issue