Moved BCP47 specific functions from Writers.Shared to new module.

Text.Pandoc.BCP47 (unexported, internal module).
`getLang`, `Lang(..)`, `parseBCP47`.
This commit is contained in:
John MacFarlane 2017-06-25 21:00:35 +02:00
parent 643cbdf104
commit ac9423eccc
6 changed files with 126 additions and 87 deletions

View file

@ -463,6 +463,7 @@ Library
Text.Pandoc.Lua.Util,
Text.Pandoc.CSS,
Text.Pandoc.UUID,
Text.Pandoc.BCP47
Text.Pandoc.Slides,
Text.Pandoc.Compat.Time,
Paths_pandoc

117
src/Text/Pandoc/BCP47.hs Normal file
View file

@ -0,0 +1,117 @@
{-
Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.BCP47
Copyright : Copyright (C) 2017 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Functions for parsing and rendering BCP47 language identifiers.
-}
module Text.Pandoc.BCP47 (
getLang
, parseBCP47
, Lang(..)
, renderLang
)
where
import Control.Monad (guard)
import Data.Char (isAscii, isLetter, isUpper, isLower)
import Data.List (intercalate)
import Text.Pandoc.Definition
import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Logging
import Text.Pandoc.Options
import qualified Text.Parsec as P
-- | Represents BCP 47 language/country code.
data Lang = Lang{ langLanguage :: String
, langScript :: String
, langRegion :: String
, langVariants :: [String] }
deriving (Eq, Ord, Show)
-- | Render a Lang as BCP 47.
renderLang :: Lang -> String
renderLang lang = intercalate "-" (langLanguage lang : filter (not . null)
([langScript lang, langRegion lang] ++ langVariants lang))
-- | Get the contents of the `lang` metadata field or variable.
getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang)
getLang opts meta = case
(case lookup "lang" (writerVariables opts) of
Just s -> Just s
_ ->
case lookupMeta "lang" meta of
Just (MetaInlines [Str s]) -> Just s
Just (MetaString s) -> Just s
_ -> Nothing) of
Nothing -> return Nothing
Just s -> case parseBCP47 s of
Left _ -> do
report $ InvalidLang s
return Nothing
Right l -> return (Just l)
-- | Parse a BCP 47 string as a Lang.
parseBCP47 :: String -> Either String Lang
parseBCP47 lang =
case P.parse bcp47 "lang" lang of
Right r -> Right r
Left e -> Left $ show e
where bcp47 = do
language <- pLanguage
script <- P.option "" pScript
region <- P.option "" pRegion
variants <- P.many pVariant
() <$ P.char '-' P.<|> P.eof
return $ Lang{ langLanguage = language
, langScript = script
, langRegion = region
, langVariants = variants }
asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
pLanguage = do
cs <- P.many1 asciiLetter
let lcs = length cs
guard $ lcs == 2 || lcs == 3
return cs
pScript = P.try $ do
P.char '-'
x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
xs <- P.count 3
(P.satisfy (\c -> isAscii c && isLetter c && isLower c))
return (x:xs)
pRegion = P.try $ do
P.char '-'
cs <- P.many1 asciiLetter
let lcs = length cs
guard $ lcs == 2 || lcs == 3
return cs
pVariant = P.try $ do
P.char '-'
ds <- P.option "" (P.count 1 P.digit)
cs <- P.many1 asciiLetter
let var = ds ++ cs
guard $ if null ds
then length var >= 5 && length var <= 8
else length var == 4
return var

View file

@ -67,7 +67,8 @@ import Text.Pandoc.Shared hiding (Element)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Walk
import Text.Pandoc.Writers.Math
import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang, renderLang)
import Text.Pandoc.Writers.Shared (fixDisplayMath)
import Text.Pandoc.BCP47 (getLang, renderLang)
import Text.Printf (printf)
import Text.TeXMath
import Text.XML.Light as XML

View file

@ -50,8 +50,8 @@ import Text.Pandoc.Shared (stringify)
import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy)
import Text.Pandoc.Walk
import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang, Lang(..),
renderLang)
import Text.Pandoc.Writers.Shared (fixDisplayMath)
import Text.Pandoc.BCP47 (getLang, Lang(..), renderLang)
import Text.Pandoc.XML
import Text.TeXMath
import Text.XML.Light

View file

@ -50,6 +50,7 @@ import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Writers.Math
import Text.Pandoc.Writers.Shared
import Text.Pandoc.XML
import Text.Pandoc.BCP47 (parseBCP47, Lang(..))
import Text.Printf (printf)
-- | Auxiliary function to convert Plain block to Para.

View file

@ -29,11 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Shared utility functions for pandoc writers.
-}
module Text.Pandoc.Writers.Shared (
getLang
, parseBCP47
, Lang(..)
, renderLang
, metaToJSON
metaToJSON
, metaToJSON'
, addVariablesToJSON
, getField
@ -46,97 +42,20 @@ module Text.Pandoc.Writers.Shared (
, gridTable
)
where
import Control.Monad (liftM, zipWithM, guard)
import Control.Monad (liftM, zipWithM)
import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object),
encode, fromJSON)
import Data.Char (isAscii, isLetter, isUpper, isLower)
import qualified Data.HashMap.Strict as H
import Data.List (groupBy, intersperse, transpose, intercalate)
import Data.List (groupBy, intersperse, transpose)
import qualified Data.Map as M
import Data.Maybe (isJust)
import qualified Data.Text as T
import qualified Data.Traversable as Traversable
import Text.Pandoc.Definition
import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Pretty
import Text.Pandoc.UTF8 (toStringLazy)
import Text.Pandoc.XML (escapeStringForXML)
import qualified Text.Parsec as P
-- | Represents BCP 47 language/country code.
data Lang = Lang{ langLanguage :: String
, langScript :: String
, langRegion :: String
, langVariants :: [String] }
deriving (Eq, Ord, Show)
-- | Render a Lang as BCP 47.
renderLang :: Lang -> String
renderLang lang = intercalate "-" (langLanguage lang : filter (not . null)
([langScript lang, langRegion lang] ++ langVariants lang))
-- | Get the contents of the `lang` metadata field or variable.
getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang)
getLang opts meta = case
(case lookup "lang" (writerVariables opts) of
Just s -> Just s
_ ->
case lookupMeta "lang" meta of
Just (MetaInlines [Str s]) -> Just s
Just (MetaString s) -> Just s
_ -> Nothing) of
Nothing -> return Nothing
Just s -> case parseBCP47 s of
Left _ -> do
report $ InvalidLang s
return Nothing
Right l -> return (Just l)
-- | Parse a BCP 47 string as a Lang.
parseBCP47 :: String -> Either String Lang
parseBCP47 lang =
case P.parse bcp47 "lang" lang of
Right r -> Right r
Left e -> Left $ show e
where bcp47 = do
language <- pLanguage
script <- P.option "" pScript
region <- P.option "" pRegion
variants <- P.many pVariant
() <$ P.char '-' P.<|> P.eof
return $ Lang{ langLanguage = language
, langScript = script
, langRegion = region
, langVariants = variants }
asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
pLanguage = do
cs <- P.many1 asciiLetter
let lcs = length cs
guard $ lcs == 2 || lcs == 3
return cs
pScript = P.try $ do
P.char '-'
x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
xs <- P.count 3
(P.satisfy (\c -> isAscii c && isLetter c && isLower c))
return (x:xs)
pRegion = P.try $ do
P.char '-'
cs <- P.many1 asciiLetter
let lcs = length cs
guard $ lcs == 2 || lcs == 3
return cs
pVariant = P.try $ do
P.char '-'
ds <- P.option "" (P.count 1 P.digit)
cs <- P.many1 asciiLetter
let var = ds ++ cs
guard $ if null ds
then length var >= 5 && length var <= 8
else length var == 4
return var
-- | Create JSON value for template from a 'Meta' and an association list
-- of variables, specified at the command line or in the writer.