pandoc/src/Text/Pandoc/BCP47.hs
John MacFarlane 7e389cb3db Use NoImplicitPrelude and explicitly import Prelude.
This seems to be necessary if we are to use our custom Prelude
with ghci.

Closes #4464.
2018-03-18 10:46:28 -07:00

127 lines
4.5 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-# LANGUAGE NoImplicitPrelude #-}
{-
Copyright (C) 20172018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.BCP47
Copyright : Copyright (C) 20172018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Functions for parsing and rendering BCP47 language identifiers.
-}
module Text.Pandoc.BCP47 (
getLang
, parseBCP47
, Lang(..)
, renderLang
)
where
import Prelude
import Control.Monad (guard)
import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper, toLower,
toUpper)
import Data.List (intercalate)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import qualified Text.Parsec as P
-- | Represents BCP 47 language/country code.
data Lang = Lang{ langLanguage :: String
, langScript :: String
, langRegion :: String
, langVariants :: [String] }
deriving (Eq, Ord, Show)
-- | Render a Lang as BCP 47.
renderLang :: Lang -> String
renderLang lang = intercalate "-" (langLanguage lang : filter (not . null)
([langScript lang, langRegion lang] ++ langVariants lang))
-- | Get the contents of the `lang` metadata field or variable.
getLang :: WriterOptions -> Meta -> Maybe String
getLang opts meta =
case lookup "lang" (writerVariables opts) of
Just s -> Just s
_ ->
case lookupMeta "lang" meta of
Just (MetaInlines [Str s]) -> Just s
Just (MetaString s) -> Just s
_ -> Nothing
-- | Parse a BCP 47 string as a Lang. Currently we parse
-- extensions and private-use fields as "variants," even
-- though officially they aren't.
parseBCP47 :: String -> Either String Lang
parseBCP47 lang =
case P.parse bcp47 "lang" lang of
Right r -> Right r
Left e -> Left $ show e
where bcp47 = do
language <- pLanguage
script <- P.option "" pScript
region <- P.option "" pRegion
variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
P.eof
return Lang{ langLanguage = language
, langScript = script
, langRegion = region
, langVariants = variants }
asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
pLanguage = do
cs <- P.many1 asciiLetter
let lcs = length cs
guard $ lcs == 2 || lcs == 3
return $ map toLower cs
pScript = P.try $ do
P.char '-'
x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
xs <- P.count 3
(P.satisfy (\c -> isAscii c && isLetter c && isLower c))
return $ map toLower (x:xs)
pRegion = P.try $ do
P.char '-'
cs <- P.many1 asciiLetter
let lcs = length cs
guard $ lcs == 2 || lcs == 3
return $ map toUpper cs
pVariant = P.try $ do
P.char '-'
ds <- P.option "" (P.count 1 P.digit)
cs <- P.many1 asciiLetter
let var = ds ++ cs
guard $ if null ds
then length var >= 5 && length var <= 8
else length var == 4
return $ map toLower var
pExtension = P.try $ do
P.char '-'
cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
guard $ length cs >= 2 && length cs <= 8
return $ map toLower cs
pPrivateUse = P.try $ do
P.char '-'
P.char 'x'
P.char '-'
cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
guard $ not (null cs) && length cs <= 8
let var = "x-" ++ cs
return $ map toLower var