Try and implement basic linguistic stuff for english

This commit is contained in:
Tissevert 2019-06-02 22:06:57 +02:00
parent f44bf6259e
commit b5432a4358
4 changed files with 213 additions and 27 deletions

View file

@ -19,6 +19,8 @@ extra-source-files: CHANGELOG.md
library library
exposed-modules: Graph exposed-modules: Graph
, Lang.En.Grapheme
, Lang.En.Morpheme
, Stream , Stream
, Transducer , Transducer
, Tree , Tree

100
src/Lang/En/Grapheme.hs Normal file
View file

@ -0,0 +1,100 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
module Lang.En.Grapheme (
Grapheme(..)
, Output(..)
, Regular(..)
, graphemes
, parse
) where
import Data.Char (toLower, isLower)
import Data.List (groupBy)
import qualified Data.Map as Map (empty, insert, lookup)
import Data.String (IsString(..))
--import Graph (Graph(..))
import Stream (Stream)
import Transducer (Transducer(..), fromList, run)
--import Transducer (Transducer(..), RunState(..), Stack(..), fromList)
data Regular =
A
| Ae
| Ai
| Ay
| E
| Ea
| Ee
| Ei
| B
| C
| Ck
| D
| F
| G
| Gh
| L
| P
| S
| T
| Th
| W
| Wh
deriving (Bounded, Enum, Eq, Ord, Read, Show)
{-
card :: Int
card = fromEnum (maxBound :: Regular)
-}
data Grapheme = Grapheme Regular | Punctuation String deriving (Eq, Ord, Show)
instance Read Grapheme where
readsPrec _ next =
[(maybe (Punctuation next) Grapheme $ Map.lookup next stringToGrapheme, "")]
where
stringToGrapheme = foldr insert Map.empty [minBound .. maxBound]
insert regular = Map.insert (show regular) regular
instance {-# OVERLAPS #-} IsString [Grapheme] where
fromString = fmap read . groupBy (const isLower)
data Output = Output {
rawText :: String
, grapheme :: Grapheme
} deriving (Eq, Show)
{-
instance Enum Grapheme where
fromEnum (Regular r) = fromEnum r
fromEnum (Noise c) = card + fromEnum c
toEnum n
| n < card = Regular $ toEnum n
| otherwise = Noise $ toEnum (n - card)
-}
auto :: Regular -> ([Char], Either [Char] Grapheme)
auto r = (toLower <$> show r, Right (Grapheme r))
punctuation :: String -> ([Char], Either [Char] Grapheme)
punctuation signs = (signs, Right (Punctuation signs))
{-
groupSpaces :: RunState Char Char Grapheme -> Maybe (RunState Char Char Grapheme)
groupSpaces state@(RunState {transducer, stack = Stack (' ':ls)})
| focus (graph transducer) == 0 = Just $ state {stack = Stack ls}
groupSpaces _ = Nothing
-}
graphemes :: Transducer Char Char Grapheme
graphemes = (fromList $ punctuations ++ regulars) {
projector = toLower
}
where
regulars = auto <$> [minBound .. maxBound]
punctuations = punctuation <$> ["?", "!", ".", ",", ":", " "]
parse :: Stream Char -> Stream Output
parse = fmap (uncurry Output) . run graphemes

53
src/Lang/En/Morpheme.hs Normal file
View file

@ -0,0 +1,53 @@
{-# LANGUAGE NamedFieldPuns #-}
module Lang.En.Morpheme (
Morpheme(..)
, Output(..)
, morphemes
, parse
) where
import Data.String (fromString)
import Lang.En.Grapheme (Grapheme(..))
import qualified Lang.En.Grapheme as Grapheme (Output(..))
import Stream (Stream)
import Transducer (Transducer(..), fromList, run)
data Regular =
LY
| ED
| UN
| CAT
| DOG
| EaT
| SLEeP
| A
| ThE
| GOoD
| KINd
| ANd
| Space
deriving (Bounded, Enum, Eq, Ord, Show)
data Morpheme = Morpheme Regular | Punctuation String deriving (Eq, Ord, Show)
auto :: Morpheme -> ([Grapheme], Either [Grapheme] Morpheme)
auto morpheme = (fromString $ show morpheme, Right morpheme)
morphemes :: Transducer Grapheme.Output Grapheme Morpheme
morphemes = (fromList $ auto <$> [minBound .. maxBound]) {
projector = Grapheme.grapheme
, rules = []
}
data Output = Output {
rawText :: String
, morpheme :: Morpheme
} deriving (Eq, Show)
parse :: Stream Grapheme.Output -> Stream Output
parse = fmap fusionGraphemes . run morphemes
where
fusionGraphemes (graphemes, morpheme) = Output {
rawText = concat $ Grapheme.rawText <$> graphemes
, morpheme
}

View file

@ -1,34 +1,35 @@
{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NamedFieldPuns #-}
module Transducer ( module Transducer (
Transducer RunState(..)
, Stack(..)
, Transducer(..)
, fromList , fromList
, run , run
) where ) where
import Data.List (find)
import Data.Map ((!), insert) import Data.Map ((!), insert)
import Data.Maybe (isJust)
import Graph (Graph(..), Vertex(..), editVertex, follow, rewind, singleton, weave) import Graph (Graph(..), Vertex(..), editVertex, follow, rewind, singleton, weave)
import Stream (Stream(..), merge) import Stream (Stream(..), merge)
import qualified Stream (empty) import qualified Stream (empty)
type Transducer input output = Graph input [output] data Transducer input label output = Transducer {
newtype Stack a = Stack [a] graph :: Graph label [output]
, rules :: [RunState input label output -> Maybe (RunState input label output)]
, projector :: input -> label
}
push :: a -> Stack a -> Stack a empty :: Graph edge [label]
push a (Stack s) = Stack (a:s)
content :: Stack a -> [a]
content (Stack s) = reverse s
empty :: Transducer input output
empty = singleton [] empty = singleton []
entry :: Ord input => Transducer input output -> ([input], output) -> Transducer input output entry :: Ord edge => Graph edge [label] -> ([edge], label) -> Graph edge [label]
entry transducer (path, output) = entry transducer (path, output) =
rewind $ editVertex (weave transducer path) pushLabel rewind $ editVertex (weave transducer path) pushLabel
where where
pushLabel vertex = vertex {label = output:(label vertex)} pushLabel vertex = vertex {label = output:(label vertex)}
loop :: Ord input => Transducer input output -> ([input], [input]) -> Transducer input output loop :: Ord edge => Graph edge [label] -> ([edge], [edge]) -> Graph edge [label]
loop transducer ([], loopPath) = loop transducer ([], loopPath) =
case splitAt (length loopPath - 1) loopPath of case splitAt (length loopPath - 1) loopPath of
(_, []) -> transducer (_, []) -> transducer
@ -41,23 +42,53 @@ loop transducer ([], loopPath) =
vertex {edges = insert input (focus transducer) $ edges vertex} vertex {edges = insert input (focus transducer) $ edges vertex}
loop transducer (prefix, loopPath) = loop (weave transducer prefix) ([], loopPath) loop transducer (prefix, loopPath) = loop (weave transducer prefix) ([], loopPath)
fromList :: Ord input => [([input], Either [input] output)] -> Transducer input output fromList :: Ord input => [([input], Either [input] output)] -> Transducer input input output
fromList = foldl add empty fromList l = Transducer {graph = foldl add empty l, projector = id, rules = []}
where where
add transducer (path, Left loopPath) = loop transducer (path, loopPath) add transducer (path, Left loopPath) = loop transducer (path, loopPath)
add transducer (path, Right output) = entry transducer (path, output) add transducer (path, Right output) = entry transducer (path, output)
run :: (Ord input, Eq output) => Transducer input output -> Stream input -> Stream ([input], output) data RunState input label output = RunState {
run transducer = runWithStack initState transducer :: Transducer input label output
where , stack :: Stack input
initState = (transducer, Stack []) }
emit stack stream output = ((content stack, output), runWithStack initState stream)
runWithStack (state, stack) (Stream inputs) = foldl (\outputsStream (input, stream) -> initState :: Transducer input label output -> RunState input label output
case follow state input of initState transducer = RunState {
transducer = transducer {graph = rewind $ graph transducer}
, stack = Stack []
}
newtype Stack a = Stack [a]
push :: a -> Stack a -> Stack a
push a (Stack s) = Stack (a:s)
content :: Stack a -> [a]
content (Stack s) = reverse s
tryInput :: (Eq input, Ord label, Eq output) => RunState input label output -> Stream ([input], output) -> (input, Stream input) -> Stream ([input], output)
tryInput state@(RunState {transducer, stack}) outputsStream (input, stream) =
case follow (graph transducer) (projector transducer $ input) of
Nothing -> Stream.empty Nothing -> Stream.empty
Just newState@(Graph {vertices, focus}) -> Just newGraph@(Graph {vertices, focus}) ->
let newStack = push input stack in let newStack = push input stack in
let emitted = Stream $ emit newStack stream <$> label (vertices ! focus) in let emitted = Stream $ emit newStack <$> label (vertices ! focus) in
let continue = runWithStack (newState, newStack) stream in let continue = runState (state {
transducer = transducer {graph = newGraph}
, stack = newStack
}) stream in
emitted `merge` continue `merge` outputsStream emitted `merge` continue `merge` outputsStream
) Stream.empty inputs where
emit aStack output = (
(content aStack, output), runState (initState transducer) stream
)
runState :: (Eq input, Ord label, Eq output) => RunState input label output -> Stream input -> Stream ([input], output)
runState state@(RunState {transducer}) (Stream inputs) =
case find isJust $ ($ state) <$> rules transducer of
Just (Just newState) -> runState newState (Stream inputs)
_ -> foldl (tryInput state) Stream.empty inputs
run :: (Eq input, Ord label, Eq output) => Transducer input label output -> Stream input -> Stream ([input], output)
run transducer = runState $ initState transducer