Refactored ConTeXt writer to use BCP47.

BCP47 - consistent case for BCP47 fields (e.g. uppercase
for region).
This commit is contained in:
John MacFarlane 2017-06-25 21:56:29 +02:00
parent ac9423eccc
commit d0d2443f2e
2 changed files with 39 additions and 39 deletions

View file

@ -35,7 +35,7 @@ module Text.Pandoc.BCP47 (
)
where
import Control.Monad (guard)
import Data.Char (isAscii, isLetter, isUpper, isLower)
import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower)
import Data.List (intercalate)
import Text.Pandoc.Definition
import Text.Pandoc.Class (PandocMonad, report)
@ -93,19 +93,19 @@ parseBCP47 lang =
cs <- P.many1 asciiLetter
let lcs = length cs
guard $ lcs == 2 || lcs == 3
return cs
return $ map toLower cs
pScript = P.try $ do
P.char '-'
x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
xs <- P.count 3
(P.satisfy (\c -> isAscii c && isLetter c && isLower c))
return (x:xs)
return $ map toLower (x:xs)
pRegion = P.try $ do
P.char '-'
cs <- P.many1 asciiLetter
let lcs = length cs
guard $ lcs == 2 || lcs == 3
return cs
return $ map toUpper cs
pVariant = P.try $ do
P.char '-'
ds <- P.option "" (P.count 1 P.digit)
@ -114,4 +114,4 @@ parseBCP47 lang =
guard $ if null ds
then length var >= 5 && length var <= 8
else length var == 4
return var
return $ map toLower var

View file

@ -35,6 +35,7 @@ import Data.List (intercalate, intersperse)
import Data.Maybe (catMaybes)
import Data.Text (Text)
import Network.URI (unEscapeString)
import Text.Pandoc.BCP47
import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Logging
import Text.Pandoc.Definition
@ -88,6 +89,7 @@ pandocToConTeXt options (Pandoc meta blocks) = do
,("top","margin-top")
,("bottom","margin-bottom")
]
lang <- maybe "" fromBCP47 <$> getLang options meta
let context = defField "toc" (writerTableOfContents options)
$ defField "placelist" (intercalate ("," :: String) $
take (writerTOCDepth options +
@ -100,11 +102,10 @@ pandocToConTeXt options (Pandoc meta blocks) = do
$ defField "body" main
$ defField "layout" layoutFromMargins
$ defField "number-sections" (writerNumberSections options)
$ defField "context-lang" lang
$ metadata
let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $
getField "lang" context)
$ defField "context-dir" (toContextDir $ getField "dir" context)
$ context
let context' = defField "context-dir" (toContextDir
$ getField "dir" context) context
case writerTemplate options of
Nothing -> return main
Just tpl -> renderTemplate' tpl context'
@ -196,7 +197,7 @@ blockToConTeXt (Div (ident,_,kvs) bs) = do
_ -> id
wrapLang txt = case lookup "lang" kvs of
Just lng -> "\\start\\language["
<> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop"
<> text (fromBCP47' lng) <> "]" $$ txt $$ "\\stop"
Nothing -> txt
wrapBlank txt = blankline <> txt <> blankline
fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs
@ -421,7 +422,7 @@ inlineToConTeXt (Span (_,_,kvs) ils) = do
Just "ltr" -> braces $ "\\lefttoright " <> txt
_ -> txt
wrapLang txt = case lookup "lang" kvs of
Just lng -> "\\start\\language[" <> text (fromBcp47' lng)
Just lng -> "\\start\\language[" <> text (fromBCP47' lng)
<> "]" <> txt <> "\\stop "
Nothing -> txt
fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils
@ -458,36 +459,35 @@ sectionHeader (ident,classes,_) hdrLevel lst = do
<> blankline
_ -> contents <> blankline
fromBcp47' :: String -> String
fromBcp47' = fromBcp47 . splitBy (=='-')
fromBCP47' :: String -> String
fromBCP47' s = case parseBCP47 s of
Right r -> fromBCP47 r
Left _ -> ""
-- Takes a list of the constituents of a BCP 47 language code
-- and irons out ConTeXt's exceptions
-- https://tools.ietf.org/html/bcp47#section-2.1
-- http://wiki.contextgarden.net/Language_Codes
fromBcp47 :: [String] -> String
fromBcp47 [] = ""
fromBcp47 ("ar":"SY":_) = "ar-sy"
fromBcp47 ("ar":"IQ":_) = "ar-iq"
fromBcp47 ("ar":"JO":_) = "ar-jo"
fromBcp47 ("ar":"LB":_) = "ar-lb"
fromBcp47 ("ar":"DZ":_) = "ar-dz"
fromBcp47 ("ar":"MA":_) = "ar-ma"
fromBcp47 ("de":"1901":_) = "deo"
fromBcp47 ("de":"DE":_) = "de-de"
fromBcp47 ("de":"AT":_) = "de-at"
fromBcp47 ("de":"CH":_) = "de-ch"
fromBcp47 ("el":"poly":_) = "agr"
fromBcp47 ("en":"US":_) = "en-us"
fromBcp47 ("en":"GB":_) = "en-gb"
fromBcp47 ("grc":_) = "agr"
fromBcp47 x = fromIso $ head x
where
fromIso "el" = "gr"
fromIso "eu" = "ba"
fromIso "he" = "il"
fromIso "jp" = "ja"
fromIso "uk" = "ua"
fromIso "vi" = "vn"
fromIso "zh" = "cn"
fromIso l = l
fromBCP47 :: Lang -> String
fromBCP47 (Lang "ar" _ "SY" _) = "ar-sy"
fromBCP47 (Lang "ar" _ "IQ" _) = "ar-iq"
fromBCP47 (Lang "ar" _ "JO" _) = "ar-jo"
fromBCP47 (Lang "ar" _ "LB" _) = "ar-lb"
fromBCP47 (Lang "ar" _ "DZ" _) = "ar-dz"
fromBCP47 (Lang "ar" _ "MA" _) = "ar-ma"
fromBCP47 (Lang "de" _ _ ["1901"]) = "deo"
fromBCP47 (Lang "de" _ "DE" _) = "de-de"
fromBCP47 (Lang "de" _ "AT" _) = "de-at"
fromBCP47 (Lang "de" _ "CH" _) = "de-ch"
fromBCP47 (Lang "el" _ _ ["poly"]) = "agr"
fromBCP47 (Lang "en" _ "US" _) = "en-us"
fromBCP47 (Lang "en" _ "GB" _) = "en-gb"
fromBCP47 (Lang "grc"_ _ _) = "agr"
fromBCP47 (Lang "el" _ _ _) = "gr"
fromBCP47 (Lang "eu" _ _ _) = "ba"
fromBCP47 (Lang "he" _ _ _) = "il"
fromBCP47 (Lang "jp" _ _ _) = "ja"
fromBCP47 (Lang "uk" _ _ _) = "ua"
fromBCP47 (Lang "vi" _ _ _) = "vn"
fromBCP47 (Lang "zh" _ _ _) = "cn"
fromBCP47 (Lang l _ _ _) = l