pandoc/src/Text/Pandoc/BCP47.hs
John MacFarlane f3080c0c22 Remove license boilerplate.
The haddock module header contains essentially the
same information, so the boilerplate is redundant and
just one more thing to get out of sync.
2019-03-01 10:27:06 -08:00

109 lines
3.8 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-# LANGUAGE NoImplicitPrelude #-}
{- |
Module : Text.Pandoc.BCP47
Copyright : Copyright (C) 20172019 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Functions for parsing and rendering BCP47 language identifiers.
-}
module Text.Pandoc.BCP47 (
getLang
, parseBCP47
, Lang(..)
, renderLang
)
where
import Prelude
import Control.Monad (guard)
import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper, toLower,
toUpper)
import Data.List (intercalate)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import qualified Text.Parsec as P
-- | Represents BCP 47 language/country code.
data Lang = Lang{ langLanguage :: String
, langScript :: String
, langRegion :: String
, langVariants :: [String] }
deriving (Eq, Ord, Show)
-- | Render a Lang as BCP 47.
renderLang :: Lang -> String
renderLang lang = intercalate "-" (langLanguage lang : filter (not . null)
([langScript lang, langRegion lang] ++ langVariants lang))
-- | Get the contents of the `lang` metadata field or variable.
getLang :: WriterOptions -> Meta -> Maybe String
getLang opts meta =
case lookup "lang" (writerVariables opts) of
Just s -> Just s
_ ->
case lookupMeta "lang" meta of
Just (MetaInlines [Str s]) -> Just s
Just (MetaString s) -> Just s
_ -> Nothing
-- | Parse a BCP 47 string as a Lang. Currently we parse
-- extensions and private-use fields as "variants," even
-- though officially they aren't.
parseBCP47 :: String -> Either String Lang
parseBCP47 lang =
case P.parse bcp47 "lang" lang of
Right r -> Right r
Left e -> Left $ show e
where bcp47 = do
language <- pLanguage
script <- P.option "" pScript
region <- P.option "" pRegion
variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
P.eof
return Lang{ langLanguage = language
, langScript = script
, langRegion = region
, langVariants = variants }
asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
pLanguage = do
cs <- P.many1 asciiLetter
let lcs = length cs
guard $ lcs == 2 || lcs == 3
return $ map toLower cs
pScript = P.try $ do
P.char '-'
x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
xs <- P.count 3
(P.satisfy (\c -> isAscii c && isLetter c && isLower c))
return $ map toLower (x:xs)
pRegion = P.try $ do
P.char '-'
cs <- P.many1 asciiLetter
let lcs = length cs
guard $ lcs == 2 || lcs == 3
return $ map toUpper cs
pVariant = P.try $ do
P.char '-'
ds <- P.option "" (P.count 1 P.digit)
cs <- P.many1 asciiLetter
let var = ds ++ cs
guard $ if null ds
then length var >= 5 && length var <= 8
else length var == 4
return $ map toLower var
pExtension = P.try $ do
P.char '-'
cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
guard $ length cs >= 2 && length cs <= 8
return $ map toLower cs
pPrivateUse = P.try $ do
P.char '-'
P.char 'x'
P.char '-'
cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
guard $ not (null cs) && length cs <= 8
let var = "x-" ++ cs
return $ map toLower var