{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE FlexibleInstances #-} module Lang.En.Grapheme ( Grapheme(..) , Output(..) , Regular(..) , graphemes , parse ) where import Data.Char (toLower, isLower) import Data.List (groupBy) import qualified Data.Map as Map (empty, insert, lookup) import Data.String (IsString(..)) --import Graph (Graph(..)) import Stream (Stream) import Transducer (Transducer(..), fromList, run) --import Transducer (Transducer(..), RunState(..), Stack(..), fromList) data Regular = A | Ae | Ai | Ay | E | Ea | Ee | Ei | B | C | Ck | D | F | G | Gh | L | P | S | T | Th | W | Wh deriving (Bounded, Enum, Eq, Ord, Read, Show) {- card :: Int card = fromEnum (maxBound :: Regular) -} data Grapheme = Grapheme Regular | Punctuation String deriving (Eq, Ord, Show) instance Read Grapheme where readsPrec _ next = [(maybe (Punctuation next) Grapheme $ Map.lookup next stringToGrapheme, "")] where stringToGrapheme = foldr insert Map.empty [minBound .. maxBound] insert regular = Map.insert (show regular) regular instance {-# OVERLAPS #-} IsString [Grapheme] where fromString = fmap read . groupBy (const isLower) data Output = Output { rawText :: String , grapheme :: Grapheme } deriving (Eq, Show) {- instance Enum Grapheme where fromEnum (Regular r) = fromEnum r fromEnum (Noise c) = card + fromEnum c toEnum n | n < card = Regular $ toEnum n | otherwise = Noise $ toEnum (n - card) -} auto :: Regular -> ([Char], Either [Char] Grapheme) auto r = (toLower <$> show r, Right (Grapheme r)) punctuation :: String -> ([Char], Either [Char] Grapheme) punctuation signs = (signs, Right (Punctuation signs)) {- groupSpaces :: RunState Char Char Grapheme -> Maybe (RunState Char Char Grapheme) groupSpaces state@(RunState {transducer, stack = Stack (' ':ls)}) | focus (graph transducer) == 0 = Just $ state {stack = Stack ls} groupSpaces _ = Nothing -} graphemes :: Transducer Char Char Grapheme graphemes = (fromList $ punctuations ++ regulars) { projector = toLower } where regulars = auto <$> [minBound .. maxBound] punctuations = punctuation <$> ["?", "!", ".", ",", ":", " "] parse :: Stream Char -> Stream Output parse = fmap (uncurry Output) . run graphemes