From d0d2443f2e069c9aa4510579f10ed8fe0b5f20ab Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Jun 2017 21:56:29 +0200 Subject: [PATCH] Refactored ConTeXt writer to use BCP47. BCP47 - consistent case for BCP47 fields (e.g. uppercase for region). --- src/Text/Pandoc/BCP47.hs | 10 ++--- src/Text/Pandoc/Writers/ConTeXt.hs | 68 +++++++++++++++--------------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs index ae7f54473..956130fb7 100644 --- a/src/Text/Pandoc/BCP47.hs +++ b/src/Text/Pandoc/BCP47.hs @@ -35,7 +35,7 @@ module Text.Pandoc.BCP47 ( ) where import Control.Monad (guard) -import Data.Char (isAscii, isLetter, isUpper, isLower) +import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower) import Data.List (intercalate) import Text.Pandoc.Definition import Text.Pandoc.Class (PandocMonad, report) @@ -93,19 +93,19 @@ parseBCP47 lang = cs <- P.many1 asciiLetter let lcs = length cs guard $ lcs == 2 || lcs == 3 - return cs + return $ map toLower cs pScript = P.try $ do P.char '-' x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) xs <- P.count 3 (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) - return (x:xs) + return $ map toLower (x:xs) pRegion = P.try $ do P.char '-' cs <- P.many1 asciiLetter let lcs = length cs guard $ lcs == 2 || lcs == 3 - return cs + return $ map toUpper cs pVariant = P.try $ do P.char '-' ds <- P.option "" (P.count 1 P.digit) @@ -114,4 +114,4 @@ parseBCP47 lang = guard $ if null ds then length var >= 5 && length var <= 8 else length var == 4 - return var + return $ map toLower var diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 5a81aa8a0..ae6cb482f 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -35,6 +35,7 @@ import Data.List (intercalate, intersperse) import Data.Maybe (catMaybes) import Data.Text (Text) import Network.URI (unEscapeString) +import Text.Pandoc.BCP47 import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging import Text.Pandoc.Definition @@ -88,6 +89,7 @@ pandocToConTeXt options (Pandoc meta blocks) = do ,("top","margin-top") ,("bottom","margin-bottom") ] + lang <- maybe "" fromBCP47 <$> getLang options meta let context = defField "toc" (writerTableOfContents options) $ defField "placelist" (intercalate ("," :: String) $ take (writerTOCDepth options + @@ -100,11 +102,10 @@ pandocToConTeXt options (Pandoc meta blocks) = do $ defField "body" main $ defField "layout" layoutFromMargins $ defField "number-sections" (writerNumberSections options) + $ defField "context-lang" lang $ metadata - let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $ - getField "lang" context) - $ defField "context-dir" (toContextDir $ getField "dir" context) - $ context + let context' = defField "context-dir" (toContextDir + $ getField "dir" context) context case writerTemplate options of Nothing -> return main Just tpl -> renderTemplate' tpl context' @@ -196,7 +197,7 @@ blockToConTeXt (Div (ident,_,kvs) bs) = do _ -> id wrapLang txt = case lookup "lang" kvs of Just lng -> "\\start\\language[" - <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop" + <> text (fromBCP47' lng) <> "]" $$ txt $$ "\\stop" Nothing -> txt wrapBlank txt = blankline <> txt <> blankline fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs @@ -421,7 +422,7 @@ inlineToConTeXt (Span (_,_,kvs) ils) = do Just "ltr" -> braces $ "\\lefttoright " <> txt _ -> txt wrapLang txt = case lookup "lang" kvs of - Just lng -> "\\start\\language[" <> text (fromBcp47' lng) + Just lng -> "\\start\\language[" <> text (fromBCP47' lng) <> "]" <> txt <> "\\stop " Nothing -> txt fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils @@ -458,36 +459,35 @@ sectionHeader (ident,classes,_) hdrLevel lst = do <> blankline _ -> contents <> blankline -fromBcp47' :: String -> String -fromBcp47' = fromBcp47 . splitBy (=='-') +fromBCP47' :: String -> String +fromBCP47' s = case parseBCP47 s of + Right r -> fromBCP47 r + Left _ -> "" -- Takes a list of the constituents of a BCP 47 language code -- and irons out ConTeXt's exceptions -- https://tools.ietf.org/html/bcp47#section-2.1 -- http://wiki.contextgarden.net/Language_Codes -fromBcp47 :: [String] -> String -fromBcp47 [] = "" -fromBcp47 ("ar":"SY":_) = "ar-sy" -fromBcp47 ("ar":"IQ":_) = "ar-iq" -fromBcp47 ("ar":"JO":_) = "ar-jo" -fromBcp47 ("ar":"LB":_) = "ar-lb" -fromBcp47 ("ar":"DZ":_) = "ar-dz" -fromBcp47 ("ar":"MA":_) = "ar-ma" -fromBcp47 ("de":"1901":_) = "deo" -fromBcp47 ("de":"DE":_) = "de-de" -fromBcp47 ("de":"AT":_) = "de-at" -fromBcp47 ("de":"CH":_) = "de-ch" -fromBcp47 ("el":"poly":_) = "agr" -fromBcp47 ("en":"US":_) = "en-us" -fromBcp47 ("en":"GB":_) = "en-gb" -fromBcp47 ("grc":_) = "agr" -fromBcp47 x = fromIso $ head x - where - fromIso "el" = "gr" - fromIso "eu" = "ba" - fromIso "he" = "il" - fromIso "jp" = "ja" - fromIso "uk" = "ua" - fromIso "vi" = "vn" - fromIso "zh" = "cn" - fromIso l = l +fromBCP47 :: Lang -> String +fromBCP47 (Lang "ar" _ "SY" _) = "ar-sy" +fromBCP47 (Lang "ar" _ "IQ" _) = "ar-iq" +fromBCP47 (Lang "ar" _ "JO" _) = "ar-jo" +fromBCP47 (Lang "ar" _ "LB" _) = "ar-lb" +fromBCP47 (Lang "ar" _ "DZ" _) = "ar-dz" +fromBCP47 (Lang "ar" _ "MA" _) = "ar-ma" +fromBCP47 (Lang "de" _ _ ["1901"]) = "deo" +fromBCP47 (Lang "de" _ "DE" _) = "de-de" +fromBCP47 (Lang "de" _ "AT" _) = "de-at" +fromBCP47 (Lang "de" _ "CH" _) = "de-ch" +fromBCP47 (Lang "el" _ _ ["poly"]) = "agr" +fromBCP47 (Lang "en" _ "US" _) = "en-us" +fromBCP47 (Lang "en" _ "GB" _) = "en-gb" +fromBCP47 (Lang "grc"_ _ _) = "agr" +fromBCP47 (Lang "el" _ _ _) = "gr" +fromBCP47 (Lang "eu" _ _ _) = "ba" +fromBCP47 (Lang "he" _ _ _) = "il" +fromBCP47 (Lang "jp" _ _ _) = "ja" +fromBCP47 (Lang "uk" _ _ _) = "ua" +fromBCP47 (Lang "vi" _ _ _) = "vn" +fromBCP47 (Lang "zh" _ _ _) = "cn" +fromBCP47 (Lang l _ _ _) = l