Moved BCP47 specific functions from Writers.Shared to new module.
Text.Pandoc.BCP47 (unexported, internal module). `getLang`, `Lang(..)`, `parseBCP47`.
This commit is contained in:
parent
643cbdf104
commit
ac9423eccc
6 changed files with 126 additions and 87 deletions
|
@ -463,6 +463,7 @@ Library
|
|||
Text.Pandoc.Lua.Util,
|
||||
Text.Pandoc.CSS,
|
||||
Text.Pandoc.UUID,
|
||||
Text.Pandoc.BCP47
|
||||
Text.Pandoc.Slides,
|
||||
Text.Pandoc.Compat.Time,
|
||||
Paths_pandoc
|
||||
|
|
117
src/Text/Pandoc/BCP47.hs
Normal file
117
src/Text/Pandoc/BCP47.hs
Normal file
|
@ -0,0 +1,117 @@
|
|||
{-
|
||||
Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
-}
|
||||
|
||||
{- |
|
||||
Module : Text.Pandoc.BCP47
|
||||
Copyright : Copyright (C) 2017 John MacFarlane
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
Functions for parsing and rendering BCP47 language identifiers.
|
||||
-}
|
||||
module Text.Pandoc.BCP47 (
|
||||
getLang
|
||||
, parseBCP47
|
||||
, Lang(..)
|
||||
, renderLang
|
||||
)
|
||||
where
|
||||
import Control.Monad (guard)
|
||||
import Data.Char (isAscii, isLetter, isUpper, isLower)
|
||||
import Data.List (intercalate)
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Class (PandocMonad, report)
|
||||
import Text.Pandoc.Logging
|
||||
import Text.Pandoc.Options
|
||||
import qualified Text.Parsec as P
|
||||
|
||||
-- | Represents BCP 47 language/country code.
|
||||
data Lang = Lang{ langLanguage :: String
|
||||
, langScript :: String
|
||||
, langRegion :: String
|
||||
, langVariants :: [String] }
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
-- | Render a Lang as BCP 47.
|
||||
renderLang :: Lang -> String
|
||||
renderLang lang = intercalate "-" (langLanguage lang : filter (not . null)
|
||||
([langScript lang, langRegion lang] ++ langVariants lang))
|
||||
|
||||
-- | Get the contents of the `lang` metadata field or variable.
|
||||
getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang)
|
||||
getLang opts meta = case
|
||||
(case lookup "lang" (writerVariables opts) of
|
||||
Just s -> Just s
|
||||
_ ->
|
||||
case lookupMeta "lang" meta of
|
||||
Just (MetaInlines [Str s]) -> Just s
|
||||
Just (MetaString s) -> Just s
|
||||
_ -> Nothing) of
|
||||
Nothing -> return Nothing
|
||||
Just s -> case parseBCP47 s of
|
||||
Left _ -> do
|
||||
report $ InvalidLang s
|
||||
return Nothing
|
||||
Right l -> return (Just l)
|
||||
|
||||
-- | Parse a BCP 47 string as a Lang.
|
||||
parseBCP47 :: String -> Either String Lang
|
||||
parseBCP47 lang =
|
||||
case P.parse bcp47 "lang" lang of
|
||||
Right r -> Right r
|
||||
Left e -> Left $ show e
|
||||
where bcp47 = do
|
||||
language <- pLanguage
|
||||
script <- P.option "" pScript
|
||||
region <- P.option "" pRegion
|
||||
variants <- P.many pVariant
|
||||
() <$ P.char '-' P.<|> P.eof
|
||||
return $ Lang{ langLanguage = language
|
||||
, langScript = script
|
||||
, langRegion = region
|
||||
, langVariants = variants }
|
||||
asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
|
||||
pLanguage = do
|
||||
cs <- P.many1 asciiLetter
|
||||
let lcs = length cs
|
||||
guard $ lcs == 2 || lcs == 3
|
||||
return cs
|
||||
pScript = P.try $ do
|
||||
P.char '-'
|
||||
x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
|
||||
xs <- P.count 3
|
||||
(P.satisfy (\c -> isAscii c && isLetter c && isLower c))
|
||||
return (x:xs)
|
||||
pRegion = P.try $ do
|
||||
P.char '-'
|
||||
cs <- P.many1 asciiLetter
|
||||
let lcs = length cs
|
||||
guard $ lcs == 2 || lcs == 3
|
||||
return cs
|
||||
pVariant = P.try $ do
|
||||
P.char '-'
|
||||
ds <- P.option "" (P.count 1 P.digit)
|
||||
cs <- P.many1 asciiLetter
|
||||
let var = ds ++ cs
|
||||
guard $ if null ds
|
||||
then length var >= 5 && length var <= 8
|
||||
else length var == 4
|
||||
return var
|
|
@ -67,7 +67,8 @@ import Text.Pandoc.Shared hiding (Element)
|
|||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import Text.Pandoc.Walk
|
||||
import Text.Pandoc.Writers.Math
|
||||
import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang, renderLang)
|
||||
import Text.Pandoc.Writers.Shared (fixDisplayMath)
|
||||
import Text.Pandoc.BCP47 (getLang, renderLang)
|
||||
import Text.Printf (printf)
|
||||
import Text.TeXMath
|
||||
import Text.XML.Light as XML
|
||||
|
|
|
@ -50,8 +50,8 @@ import Text.Pandoc.Shared (stringify)
|
|||
import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy)
|
||||
import Text.Pandoc.Walk
|
||||
import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
|
||||
import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang, Lang(..),
|
||||
renderLang)
|
||||
import Text.Pandoc.Writers.Shared (fixDisplayMath)
|
||||
import Text.Pandoc.BCP47 (getLang, Lang(..), renderLang)
|
||||
import Text.Pandoc.XML
|
||||
import Text.TeXMath
|
||||
import Text.XML.Light
|
||||
|
|
|
@ -50,6 +50,7 @@ import Text.Pandoc.Templates (renderTemplate')
|
|||
import Text.Pandoc.Writers.Math
|
||||
import Text.Pandoc.Writers.Shared
|
||||
import Text.Pandoc.XML
|
||||
import Text.Pandoc.BCP47 (parseBCP47, Lang(..))
|
||||
import Text.Printf (printf)
|
||||
|
||||
-- | Auxiliary function to convert Plain block to Para.
|
||||
|
|
|
@ -29,11 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
Shared utility functions for pandoc writers.
|
||||
-}
|
||||
module Text.Pandoc.Writers.Shared (
|
||||
getLang
|
||||
, parseBCP47
|
||||
, Lang(..)
|
||||
, renderLang
|
||||
, metaToJSON
|
||||
metaToJSON
|
||||
, metaToJSON'
|
||||
, addVariablesToJSON
|
||||
, getField
|
||||
|
@ -46,97 +42,20 @@ module Text.Pandoc.Writers.Shared (
|
|||
, gridTable
|
||||
)
|
||||
where
|
||||
import Control.Monad (liftM, zipWithM, guard)
|
||||
import Control.Monad (liftM, zipWithM)
|
||||
import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object),
|
||||
encode, fromJSON)
|
||||
import Data.Char (isAscii, isLetter, isUpper, isLower)
|
||||
import qualified Data.HashMap.Strict as H
|
||||
import Data.List (groupBy, intersperse, transpose, intercalate)
|
||||
import Data.List (groupBy, intersperse, transpose)
|
||||
import qualified Data.Map as M
|
||||
import Data.Maybe (isJust)
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Traversable as Traversable
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Class (PandocMonad, report)
|
||||
import Text.Pandoc.Logging
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Pretty
|
||||
import Text.Pandoc.UTF8 (toStringLazy)
|
||||
import Text.Pandoc.XML (escapeStringForXML)
|
||||
import qualified Text.Parsec as P
|
||||
|
||||
-- | Represents BCP 47 language/country code.
|
||||
data Lang = Lang{ langLanguage :: String
|
||||
, langScript :: String
|
||||
, langRegion :: String
|
||||
, langVariants :: [String] }
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
-- | Render a Lang as BCP 47.
|
||||
renderLang :: Lang -> String
|
||||
renderLang lang = intercalate "-" (langLanguage lang : filter (not . null)
|
||||
([langScript lang, langRegion lang] ++ langVariants lang))
|
||||
|
||||
-- | Get the contents of the `lang` metadata field or variable.
|
||||
getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang)
|
||||
getLang opts meta = case
|
||||
(case lookup "lang" (writerVariables opts) of
|
||||
Just s -> Just s
|
||||
_ ->
|
||||
case lookupMeta "lang" meta of
|
||||
Just (MetaInlines [Str s]) -> Just s
|
||||
Just (MetaString s) -> Just s
|
||||
_ -> Nothing) of
|
||||
Nothing -> return Nothing
|
||||
Just s -> case parseBCP47 s of
|
||||
Left _ -> do
|
||||
report $ InvalidLang s
|
||||
return Nothing
|
||||
Right l -> return (Just l)
|
||||
|
||||
-- | Parse a BCP 47 string as a Lang.
|
||||
parseBCP47 :: String -> Either String Lang
|
||||
parseBCP47 lang =
|
||||
case P.parse bcp47 "lang" lang of
|
||||
Right r -> Right r
|
||||
Left e -> Left $ show e
|
||||
where bcp47 = do
|
||||
language <- pLanguage
|
||||
script <- P.option "" pScript
|
||||
region <- P.option "" pRegion
|
||||
variants <- P.many pVariant
|
||||
() <$ P.char '-' P.<|> P.eof
|
||||
return $ Lang{ langLanguage = language
|
||||
, langScript = script
|
||||
, langRegion = region
|
||||
, langVariants = variants }
|
||||
asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
|
||||
pLanguage = do
|
||||
cs <- P.many1 asciiLetter
|
||||
let lcs = length cs
|
||||
guard $ lcs == 2 || lcs == 3
|
||||
return cs
|
||||
pScript = P.try $ do
|
||||
P.char '-'
|
||||
x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
|
||||
xs <- P.count 3
|
||||
(P.satisfy (\c -> isAscii c && isLetter c && isLower c))
|
||||
return (x:xs)
|
||||
pRegion = P.try $ do
|
||||
P.char '-'
|
||||
cs <- P.many1 asciiLetter
|
||||
let lcs = length cs
|
||||
guard $ lcs == 2 || lcs == 3
|
||||
return cs
|
||||
pVariant = P.try $ do
|
||||
P.char '-'
|
||||
ds <- P.option "" (P.count 1 P.digit)
|
||||
cs <- P.many1 asciiLetter
|
||||
let var = ds ++ cs
|
||||
guard $ if null ds
|
||||
then length var >= 5 && length var <= 8
|
||||
else length var == 4
|
||||
return var
|
||||
|
||||
-- | Create JSON value for template from a 'Meta' and an association list
|
||||
-- of variables, specified at the command line or in the writer.
|
||||
|
|
Loading…
Add table
Reference in a new issue