Refactored ConTeXt writer to use BCP47.

BCP47 - consistent case for BCP47 fields (e.g. uppercase
for region).
This commit is contained in:
John MacFarlane 2017-06-25 21:56:29 +02:00
parent ac9423eccc
commit d0d2443f2e
2 changed files with 39 additions and 39 deletions

View file

@ -35,7 +35,7 @@ module Text.Pandoc.BCP47 (
) )
where where
import Control.Monad (guard) import Control.Monad (guard)
import Data.Char (isAscii, isLetter, isUpper, isLower) import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower)
import Data.List (intercalate) import Data.List (intercalate)
import Text.Pandoc.Definition import Text.Pandoc.Definition
import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Class (PandocMonad, report)
@ -93,19 +93,19 @@ parseBCP47 lang =
cs <- P.many1 asciiLetter cs <- P.many1 asciiLetter
let lcs = length cs let lcs = length cs
guard $ lcs == 2 || lcs == 3 guard $ lcs == 2 || lcs == 3
return cs return $ map toLower cs
pScript = P.try $ do pScript = P.try $ do
P.char '-' P.char '-'
x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
xs <- P.count 3 xs <- P.count 3
(P.satisfy (\c -> isAscii c && isLetter c && isLower c)) (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
return (x:xs) return $ map toLower (x:xs)
pRegion = P.try $ do pRegion = P.try $ do
P.char '-' P.char '-'
cs <- P.many1 asciiLetter cs <- P.many1 asciiLetter
let lcs = length cs let lcs = length cs
guard $ lcs == 2 || lcs == 3 guard $ lcs == 2 || lcs == 3
return cs return $ map toUpper cs
pVariant = P.try $ do pVariant = P.try $ do
P.char '-' P.char '-'
ds <- P.option "" (P.count 1 P.digit) ds <- P.option "" (P.count 1 P.digit)
@ -114,4 +114,4 @@ parseBCP47 lang =
guard $ if null ds guard $ if null ds
then length var >= 5 && length var <= 8 then length var >= 5 && length var <= 8
else length var == 4 else length var == 4
return var return $ map toLower var

View file

@ -35,6 +35,7 @@ import Data.List (intercalate, intersperse)
import Data.Maybe (catMaybes) import Data.Maybe (catMaybes)
import Data.Text (Text) import Data.Text (Text)
import Network.URI (unEscapeString) import Network.URI (unEscapeString)
import Text.Pandoc.BCP47
import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Logging import Text.Pandoc.Logging
import Text.Pandoc.Definition import Text.Pandoc.Definition
@ -88,6 +89,7 @@ pandocToConTeXt options (Pandoc meta blocks) = do
,("top","margin-top") ,("top","margin-top")
,("bottom","margin-bottom") ,("bottom","margin-bottom")
] ]
lang <- maybe "" fromBCP47 <$> getLang options meta
let context = defField "toc" (writerTableOfContents options) let context = defField "toc" (writerTableOfContents options)
$ defField "placelist" (intercalate ("," :: String) $ $ defField "placelist" (intercalate ("," :: String) $
take (writerTOCDepth options + take (writerTOCDepth options +
@ -100,11 +102,10 @@ pandocToConTeXt options (Pandoc meta blocks) = do
$ defField "body" main $ defField "body" main
$ defField "layout" layoutFromMargins $ defField "layout" layoutFromMargins
$ defField "number-sections" (writerNumberSections options) $ defField "number-sections" (writerNumberSections options)
$ defField "context-lang" lang
$ metadata $ metadata
let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $ let context' = defField "context-dir" (toContextDir
getField "lang" context) $ getField "dir" context) context
$ defField "context-dir" (toContextDir $ getField "dir" context)
$ context
case writerTemplate options of case writerTemplate options of
Nothing -> return main Nothing -> return main
Just tpl -> renderTemplate' tpl context' Just tpl -> renderTemplate' tpl context'
@ -196,7 +197,7 @@ blockToConTeXt (Div (ident,_,kvs) bs) = do
_ -> id _ -> id
wrapLang txt = case lookup "lang" kvs of wrapLang txt = case lookup "lang" kvs of
Just lng -> "\\start\\language[" Just lng -> "\\start\\language["
<> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop" <> text (fromBCP47' lng) <> "]" $$ txt $$ "\\stop"
Nothing -> txt Nothing -> txt
wrapBlank txt = blankline <> txt <> blankline wrapBlank txt = blankline <> txt <> blankline
fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs
@ -421,7 +422,7 @@ inlineToConTeXt (Span (_,_,kvs) ils) = do
Just "ltr" -> braces $ "\\lefttoright " <> txt Just "ltr" -> braces $ "\\lefttoright " <> txt
_ -> txt _ -> txt
wrapLang txt = case lookup "lang" kvs of wrapLang txt = case lookup "lang" kvs of
Just lng -> "\\start\\language[" <> text (fromBcp47' lng) Just lng -> "\\start\\language[" <> text (fromBCP47' lng)
<> "]" <> txt <> "\\stop " <> "]" <> txt <> "\\stop "
Nothing -> txt Nothing -> txt
fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils
@ -458,36 +459,35 @@ sectionHeader (ident,classes,_) hdrLevel lst = do
<> blankline <> blankline
_ -> contents <> blankline _ -> contents <> blankline
fromBcp47' :: String -> String fromBCP47' :: String -> String
fromBcp47' = fromBcp47 . splitBy (=='-') fromBCP47' s = case parseBCP47 s of
Right r -> fromBCP47 r
Left _ -> ""
-- Takes a list of the constituents of a BCP 47 language code -- Takes a list of the constituents of a BCP 47 language code
-- and irons out ConTeXt's exceptions -- and irons out ConTeXt's exceptions
-- https://tools.ietf.org/html/bcp47#section-2.1 -- https://tools.ietf.org/html/bcp47#section-2.1
-- http://wiki.contextgarden.net/Language_Codes -- http://wiki.contextgarden.net/Language_Codes
fromBcp47 :: [String] -> String fromBCP47 :: Lang -> String
fromBcp47 [] = "" fromBCP47 (Lang "ar" _ "SY" _) = "ar-sy"
fromBcp47 ("ar":"SY":_) = "ar-sy" fromBCP47 (Lang "ar" _ "IQ" _) = "ar-iq"
fromBcp47 ("ar":"IQ":_) = "ar-iq" fromBCP47 (Lang "ar" _ "JO" _) = "ar-jo"
fromBcp47 ("ar":"JO":_) = "ar-jo" fromBCP47 (Lang "ar" _ "LB" _) = "ar-lb"
fromBcp47 ("ar":"LB":_) = "ar-lb" fromBCP47 (Lang "ar" _ "DZ" _) = "ar-dz"
fromBcp47 ("ar":"DZ":_) = "ar-dz" fromBCP47 (Lang "ar" _ "MA" _) = "ar-ma"
fromBcp47 ("ar":"MA":_) = "ar-ma" fromBCP47 (Lang "de" _ _ ["1901"]) = "deo"
fromBcp47 ("de":"1901":_) = "deo" fromBCP47 (Lang "de" _ "DE" _) = "de-de"
fromBcp47 ("de":"DE":_) = "de-de" fromBCP47 (Lang "de" _ "AT" _) = "de-at"
fromBcp47 ("de":"AT":_) = "de-at" fromBCP47 (Lang "de" _ "CH" _) = "de-ch"
fromBcp47 ("de":"CH":_) = "de-ch" fromBCP47 (Lang "el" _ _ ["poly"]) = "agr"
fromBcp47 ("el":"poly":_) = "agr" fromBCP47 (Lang "en" _ "US" _) = "en-us"
fromBcp47 ("en":"US":_) = "en-us" fromBCP47 (Lang "en" _ "GB" _) = "en-gb"
fromBcp47 ("en":"GB":_) = "en-gb" fromBCP47 (Lang "grc"_ _ _) = "agr"
fromBcp47 ("grc":_) = "agr" fromBCP47 (Lang "el" _ _ _) = "gr"
fromBcp47 x = fromIso $ head x fromBCP47 (Lang "eu" _ _ _) = "ba"
where fromBCP47 (Lang "he" _ _ _) = "il"
fromIso "el" = "gr" fromBCP47 (Lang "jp" _ _ _) = "ja"
fromIso "eu" = "ba" fromBCP47 (Lang "uk" _ _ _) = "ua"
fromIso "he" = "il" fromBCP47 (Lang "vi" _ _ _) = "vn"
fromIso "jp" = "ja" fromBCP47 (Lang "zh" _ _ _) = "cn"
fromIso "uk" = "ua" fromBCP47 (Lang l _ _ _) = l
fromIso "vi" = "vn"
fromIso "zh" = "cn"
fromIso l = l