Mainate/src/Lang/En/Grapheme.hs

101 lines
2.3 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
module Lang.En.Grapheme (
Grapheme(..)
, Output(..)
, Regular(..)
, graphemes
, parse
) where
import Data.Char (toLower, isLower)
import Data.List (groupBy)
import qualified Data.Map as Map (empty, insert, lookup)
import Data.String (IsString(..))
--import Graph (Graph(..))
import Stream (Stream)
import Transducer (Transducer(..), fromList, run)
--import Transducer (Transducer(..), RunState(..), Stack(..), fromList)
data Regular =
A
| Ae
| Ai
| Ay
| E
| Ea
| Ee
| Ei
| B
| C
| Ck
| D
| F
| G
| Gh
| L
| P
| S
| T
| Th
| W
| Wh
deriving (Bounded, Enum, Eq, Ord, Read, Show)
{-
card :: Int
card = fromEnum (maxBound :: Regular)
-}
data Grapheme = Grapheme Regular | Punctuation String deriving (Eq, Ord, Show)
instance Read Grapheme where
readsPrec _ next =
[(maybe (Punctuation next) Grapheme $ Map.lookup next stringToGrapheme, "")]
where
stringToGrapheme = foldr insert Map.empty [minBound .. maxBound]
insert regular = Map.insert (show regular) regular
instance {-# OVERLAPS #-} IsString [Grapheme] where
fromString = fmap read . groupBy (const isLower)
data Output = Output {
rawText :: String
, grapheme :: Grapheme
} deriving (Eq, Show)
{-
instance Enum Grapheme where
fromEnum (Regular r) = fromEnum r
fromEnum (Noise c) = card + fromEnum c
toEnum n
| n < card = Regular $ toEnum n
| otherwise = Noise $ toEnum (n - card)
-}
auto :: Regular -> ([Char], Either [Char] Grapheme)
auto r = (toLower <$> show r, Right (Grapheme r))
punctuation :: String -> ([Char], Either [Char] Grapheme)
punctuation signs = (signs, Right (Punctuation signs))
{-
groupSpaces :: RunState Char Char Grapheme -> Maybe (RunState Char Char Grapheme)
groupSpaces state@(RunState {transducer, stack = Stack (' ':ls)})
| focus (graph transducer) == 0 = Just $ state {stack = Stack ls}
groupSpaces _ = Nothing
-}
graphemes :: Transducer Char Char Grapheme
graphemes = (fromList $ punctuations ++ regulars) {
projector = toLower
}
where
regulars = auto <$> [minBound .. maxBound]
punctuations = punctuation <$> ["?", "!", ".", ",", ":", " "]
parse :: Stream Char -> Stream Output
parse = fmap (uncurry Output) . run graphemes