101 lines
2.3 KiB
Haskell
101 lines
2.3 KiB
Haskell
{-# LANGUAGE NamedFieldPuns #-}
|
|
{-# LANGUAGE FlexibleInstances #-}
|
|
module Lang.En.Grapheme (
|
|
Grapheme(..)
|
|
, Output(..)
|
|
, Regular(..)
|
|
, graphemes
|
|
, parse
|
|
) where
|
|
|
|
import Data.Char (toLower, isLower)
|
|
import Data.List (groupBy)
|
|
import qualified Data.Map as Map (empty, insert, lookup)
|
|
import Data.String (IsString(..))
|
|
--import Graph (Graph(..))
|
|
import Stream (Stream)
|
|
import Transducer (Transducer(..), fromList, run)
|
|
--import Transducer (Transducer(..), RunState(..), Stack(..), fromList)
|
|
|
|
data Regular =
|
|
A
|
|
| Ae
|
|
| Ai
|
|
| Ay
|
|
| E
|
|
| Ea
|
|
| Ee
|
|
| Ei
|
|
| B
|
|
| C
|
|
| Ck
|
|
| D
|
|
| F
|
|
| G
|
|
| Gh
|
|
| L
|
|
| P
|
|
| S
|
|
| T
|
|
| Th
|
|
| W
|
|
| Wh
|
|
deriving (Bounded, Enum, Eq, Ord, Read, Show)
|
|
|
|
{-
|
|
card :: Int
|
|
card = fromEnum (maxBound :: Regular)
|
|
-}
|
|
|
|
data Grapheme = Grapheme Regular | Punctuation String deriving (Eq, Ord, Show)
|
|
|
|
instance Read Grapheme where
|
|
readsPrec _ next =
|
|
[(maybe (Punctuation next) Grapheme $ Map.lookup next stringToGrapheme, "")]
|
|
where
|
|
stringToGrapheme = foldr insert Map.empty [minBound .. maxBound]
|
|
insert regular = Map.insert (show regular) regular
|
|
|
|
|
|
instance {-# OVERLAPS #-} IsString [Grapheme] where
|
|
fromString = fmap read . groupBy (const isLower)
|
|
|
|
data Output = Output {
|
|
rawText :: String
|
|
, grapheme :: Grapheme
|
|
} deriving (Eq, Show)
|
|
|
|
{-
|
|
instance Enum Grapheme where
|
|
fromEnum (Regular r) = fromEnum r
|
|
fromEnum (Noise c) = card + fromEnum c
|
|
|
|
toEnum n
|
|
| n < card = Regular $ toEnum n
|
|
| otherwise = Noise $ toEnum (n - card)
|
|
-}
|
|
|
|
auto :: Regular -> ([Char], Either [Char] Grapheme)
|
|
auto r = (toLower <$> show r, Right (Grapheme r))
|
|
|
|
punctuation :: String -> ([Char], Either [Char] Grapheme)
|
|
punctuation signs = (signs, Right (Punctuation signs))
|
|
|
|
{-
|
|
groupSpaces :: RunState Char Char Grapheme -> Maybe (RunState Char Char Grapheme)
|
|
groupSpaces state@(RunState {transducer, stack = Stack (' ':ls)})
|
|
| focus (graph transducer) == 0 = Just $ state {stack = Stack ls}
|
|
groupSpaces _ = Nothing
|
|
-}
|
|
|
|
graphemes :: Transducer Char Char Grapheme
|
|
graphemes = (fromList $ punctuations ++ regulars) {
|
|
projector = toLower
|
|
}
|
|
where
|
|
regulars = auto <$> [minBound .. maxBound]
|
|
punctuations = punctuation <$> ["?", "!", ".", ",", ":", " "]
|
|
|
|
parse :: Stream Char -> Stream Output
|
|
parse = fmap (uncurry Output) . run graphemes
|