Try and implement basic linguistic stuff for english

This commit is contained in:
Tissevert 2019-06-02 22:06:57 +02:00
parent f44bf6259e
commit b5432a4358
4 changed files with 213 additions and 27 deletions

View File

@ -19,6 +19,8 @@ extra-source-files: CHANGELOG.md
library
exposed-modules: Graph
, Lang.En.Grapheme
, Lang.En.Morpheme
, Stream
, Transducer
, Tree

100
src/Lang/En/Grapheme.hs Normal file
View File

@ -0,0 +1,100 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
module Lang.En.Grapheme (
Grapheme(..)
, Output(..)
, Regular(..)
, graphemes
, parse
) where
import Data.Char (toLower, isLower)
import Data.List (groupBy)
import qualified Data.Map as Map (empty, insert, lookup)
import Data.String (IsString(..))
--import Graph (Graph(..))
import Stream (Stream)
import Transducer (Transducer(..), fromList, run)
--import Transducer (Transducer(..), RunState(..), Stack(..), fromList)
data Regular =
A
| Ae
| Ai
| Ay
| E
| Ea
| Ee
| Ei
| B
| C
| Ck
| D
| F
| G
| Gh
| L
| P
| S
| T
| Th
| W
| Wh
deriving (Bounded, Enum, Eq, Ord, Read, Show)
{-
card :: Int
card = fromEnum (maxBound :: Regular)
-}
data Grapheme = Grapheme Regular | Punctuation String deriving (Eq, Ord, Show)
instance Read Grapheme where
readsPrec _ next =
[(maybe (Punctuation next) Grapheme $ Map.lookup next stringToGrapheme, "")]
where
stringToGrapheme = foldr insert Map.empty [minBound .. maxBound]
insert regular = Map.insert (show regular) regular
instance {-# OVERLAPS #-} IsString [Grapheme] where
fromString = fmap read . groupBy (const isLower)
data Output = Output {
rawText :: String
, grapheme :: Grapheme
} deriving (Eq, Show)
{-
instance Enum Grapheme where
fromEnum (Regular r) = fromEnum r
fromEnum (Noise c) = card + fromEnum c
toEnum n
| n < card = Regular $ toEnum n
| otherwise = Noise $ toEnum (n - card)
-}
auto :: Regular -> ([Char], Either [Char] Grapheme)
auto r = (toLower <$> show r, Right (Grapheme r))
punctuation :: String -> ([Char], Either [Char] Grapheme)
punctuation signs = (signs, Right (Punctuation signs))
{-
groupSpaces :: RunState Char Char Grapheme -> Maybe (RunState Char Char Grapheme)
groupSpaces state@(RunState {transducer, stack = Stack (' ':ls)})
| focus (graph transducer) == 0 = Just $ state {stack = Stack ls}
groupSpaces _ = Nothing
-}
graphemes :: Transducer Char Char Grapheme
graphemes = (fromList $ punctuations ++ regulars) {
projector = toLower
}
where
regulars = auto <$> [minBound .. maxBound]
punctuations = punctuation <$> ["?", "!", ".", ",", ":", " "]
parse :: Stream Char -> Stream Output
parse = fmap (uncurry Output) . run graphemes

53
src/Lang/En/Morpheme.hs Normal file
View File

@ -0,0 +1,53 @@
{-# LANGUAGE NamedFieldPuns #-}
module Lang.En.Morpheme (
Morpheme(..)
, Output(..)
, morphemes
, parse
) where
import Data.String (fromString)
import Lang.En.Grapheme (Grapheme(..))
import qualified Lang.En.Grapheme as Grapheme (Output(..))
import Stream (Stream)
import Transducer (Transducer(..), fromList, run)
data Regular =
LY
| ED
| UN
| CAT
| DOG
| EaT
| SLEeP
| A
| ThE
| GOoD
| KINd
| ANd
| Space
deriving (Bounded, Enum, Eq, Ord, Show)
data Morpheme = Morpheme Regular | Punctuation String deriving (Eq, Ord, Show)
auto :: Morpheme -> ([Grapheme], Either [Grapheme] Morpheme)
auto morpheme = (fromString $ show morpheme, Right morpheme)
morphemes :: Transducer Grapheme.Output Grapheme Morpheme
morphemes = (fromList $ auto <$> [minBound .. maxBound]) {
projector = Grapheme.grapheme
, rules = []
}
data Output = Output {
rawText :: String
, morpheme :: Morpheme
} deriving (Eq, Show)
parse :: Stream Grapheme.Output -> Stream Output
parse = fmap fusionGraphemes . run morphemes
where
fusionGraphemes (graphemes, morpheme) = Output {
rawText = concat $ Grapheme.rawText <$> graphemes
, morpheme
}

View File

@ -1,34 +1,35 @@
{-# LANGUAGE NamedFieldPuns #-}
module Transducer (
Transducer
RunState(..)
, Stack(..)
, Transducer(..)
, fromList
, run
) where
import Data.List (find)
import Data.Map ((!), insert)
import Data.Maybe (isJust)
import Graph (Graph(..), Vertex(..), editVertex, follow, rewind, singleton, weave)
import Stream (Stream(..), merge)
import qualified Stream (empty)
type Transducer input output = Graph input [output]
newtype Stack a = Stack [a]
data Transducer input label output = Transducer {
graph :: Graph label [output]
, rules :: [RunState input label output -> Maybe (RunState input label output)]
, projector :: input -> label
}
push :: a -> Stack a -> Stack a
push a (Stack s) = Stack (a:s)
content :: Stack a -> [a]
content (Stack s) = reverse s
empty :: Transducer input output
empty :: Graph edge [label]
empty = singleton []
entry :: Ord input => Transducer input output -> ([input], output) -> Transducer input output
entry :: Ord edge => Graph edge [label] -> ([edge], label) -> Graph edge [label]
entry transducer (path, output) =
rewind $ editVertex (weave transducer path) pushLabel
where
pushLabel vertex = vertex {label = output:(label vertex)}
loop :: Ord input => Transducer input output -> ([input], [input]) -> Transducer input output
loop :: Ord edge => Graph edge [label] -> ([edge], [edge]) -> Graph edge [label]
loop transducer ([], loopPath) =
case splitAt (length loopPath - 1) loopPath of
(_, []) -> transducer
@ -41,23 +42,53 @@ loop transducer ([], loopPath) =
vertex {edges = insert input (focus transducer) $ edges vertex}
loop transducer (prefix, loopPath) = loop (weave transducer prefix) ([], loopPath)
fromList :: Ord input => [([input], Either [input] output)] -> Transducer input output
fromList = foldl add empty
fromList :: Ord input => [([input], Either [input] output)] -> Transducer input input output
fromList l = Transducer {graph = foldl add empty l, projector = id, rules = []}
where
add transducer (path, Left loopPath) = loop transducer (path, loopPath)
add transducer (path, Right output) = entry transducer (path, output)
run :: (Ord input, Eq output) => Transducer input output -> Stream input -> Stream ([input], output)
run transducer = runWithStack initState
data RunState input label output = RunState {
transducer :: Transducer input label output
, stack :: Stack input
}
initState :: Transducer input label output -> RunState input label output
initState transducer = RunState {
transducer = transducer {graph = rewind $ graph transducer}
, stack = Stack []
}
newtype Stack a = Stack [a]
push :: a -> Stack a -> Stack a
push a (Stack s) = Stack (a:s)
content :: Stack a -> [a]
content (Stack s) = reverse s
tryInput :: (Eq input, Ord label, Eq output) => RunState input label output -> Stream ([input], output) -> (input, Stream input) -> Stream ([input], output)
tryInput state@(RunState {transducer, stack}) outputsStream (input, stream) =
case follow (graph transducer) (projector transducer $ input) of
Nothing -> Stream.empty
Just newGraph@(Graph {vertices, focus}) ->
let newStack = push input stack in
let emitted = Stream $ emit newStack <$> label (vertices ! focus) in
let continue = runState (state {
transducer = transducer {graph = newGraph}
, stack = newStack
}) stream in
emitted `merge` continue `merge` outputsStream
where
initState = (transducer, Stack [])
emit stack stream output = ((content stack, output), runWithStack initState stream)
runWithStack (state, stack) (Stream inputs) = foldl (\outputsStream (input, stream) ->
case follow state input of
Nothing -> Stream.empty
Just newState@(Graph {vertices, focus}) ->
let newStack = push input stack in
let emitted = Stream $ emit newStack stream <$> label (vertices ! focus) in
let continue = runWithStack (newState, newStack) stream in
emitted `merge` continue `merge` outputsStream
) Stream.empty inputs
emit aStack output = (
(content aStack, output), runState (initState transducer) stream
)
runState :: (Eq input, Ord label, Eq output) => RunState input label output -> Stream input -> Stream ([input], output)
runState state@(RunState {transducer}) (Stream inputs) =
case find isJust $ ($ state) <$> rules transducer of
Just (Just newState) -> runState newState (Stream inputs)
_ -> foldl (tryInput state) Stream.empty inputs
run :: (Eq input, Ord label, Eq output) => Transducer input label output -> Stream input -> Stream ([input], output)
run transducer = runState $ initState transducer