Try and implement basic linguistic stuff for english
This commit is contained in:
parent
f44bf6259e
commit
b5432a4358
4 changed files with 213 additions and 27 deletions
|
@ -19,6 +19,8 @@ extra-source-files: CHANGELOG.md
|
||||||
|
|
||||||
library
|
library
|
||||||
exposed-modules: Graph
|
exposed-modules: Graph
|
||||||
|
, Lang.En.Grapheme
|
||||||
|
, Lang.En.Morpheme
|
||||||
, Stream
|
, Stream
|
||||||
, Transducer
|
, Transducer
|
||||||
, Tree
|
, Tree
|
||||||
|
|
100
src/Lang/En/Grapheme.hs
Normal file
100
src/Lang/En/Grapheme.hs
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
{-# LANGUAGE NamedFieldPuns #-}
|
||||||
|
{-# LANGUAGE FlexibleInstances #-}
|
||||||
|
module Lang.En.Grapheme (
|
||||||
|
Grapheme(..)
|
||||||
|
, Output(..)
|
||||||
|
, Regular(..)
|
||||||
|
, graphemes
|
||||||
|
, parse
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Char (toLower, isLower)
|
||||||
|
import Data.List (groupBy)
|
||||||
|
import qualified Data.Map as Map (empty, insert, lookup)
|
||||||
|
import Data.String (IsString(..))
|
||||||
|
--import Graph (Graph(..))
|
||||||
|
import Stream (Stream)
|
||||||
|
import Transducer (Transducer(..), fromList, run)
|
||||||
|
--import Transducer (Transducer(..), RunState(..), Stack(..), fromList)
|
||||||
|
|
||||||
|
data Regular =
|
||||||
|
A
|
||||||
|
| Ae
|
||||||
|
| Ai
|
||||||
|
| Ay
|
||||||
|
| E
|
||||||
|
| Ea
|
||||||
|
| Ee
|
||||||
|
| Ei
|
||||||
|
| B
|
||||||
|
| C
|
||||||
|
| Ck
|
||||||
|
| D
|
||||||
|
| F
|
||||||
|
| G
|
||||||
|
| Gh
|
||||||
|
| L
|
||||||
|
| P
|
||||||
|
| S
|
||||||
|
| T
|
||||||
|
| Th
|
||||||
|
| W
|
||||||
|
| Wh
|
||||||
|
deriving (Bounded, Enum, Eq, Ord, Read, Show)
|
||||||
|
|
||||||
|
{-
|
||||||
|
card :: Int
|
||||||
|
card = fromEnum (maxBound :: Regular)
|
||||||
|
-}
|
||||||
|
|
||||||
|
data Grapheme = Grapheme Regular | Punctuation String deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
instance Read Grapheme where
|
||||||
|
readsPrec _ next =
|
||||||
|
[(maybe (Punctuation next) Grapheme $ Map.lookup next stringToGrapheme, "")]
|
||||||
|
where
|
||||||
|
stringToGrapheme = foldr insert Map.empty [minBound .. maxBound]
|
||||||
|
insert regular = Map.insert (show regular) regular
|
||||||
|
|
||||||
|
|
||||||
|
instance {-# OVERLAPS #-} IsString [Grapheme] where
|
||||||
|
fromString = fmap read . groupBy (const isLower)
|
||||||
|
|
||||||
|
data Output = Output {
|
||||||
|
rawText :: String
|
||||||
|
, grapheme :: Grapheme
|
||||||
|
} deriving (Eq, Show)
|
||||||
|
|
||||||
|
{-
|
||||||
|
instance Enum Grapheme where
|
||||||
|
fromEnum (Regular r) = fromEnum r
|
||||||
|
fromEnum (Noise c) = card + fromEnum c
|
||||||
|
|
||||||
|
toEnum n
|
||||||
|
| n < card = Regular $ toEnum n
|
||||||
|
| otherwise = Noise $ toEnum (n - card)
|
||||||
|
-}
|
||||||
|
|
||||||
|
auto :: Regular -> ([Char], Either [Char] Grapheme)
|
||||||
|
auto r = (toLower <$> show r, Right (Grapheme r))
|
||||||
|
|
||||||
|
punctuation :: String -> ([Char], Either [Char] Grapheme)
|
||||||
|
punctuation signs = (signs, Right (Punctuation signs))
|
||||||
|
|
||||||
|
{-
|
||||||
|
groupSpaces :: RunState Char Char Grapheme -> Maybe (RunState Char Char Grapheme)
|
||||||
|
groupSpaces state@(RunState {transducer, stack = Stack (' ':ls)})
|
||||||
|
| focus (graph transducer) == 0 = Just $ state {stack = Stack ls}
|
||||||
|
groupSpaces _ = Nothing
|
||||||
|
-}
|
||||||
|
|
||||||
|
graphemes :: Transducer Char Char Grapheme
|
||||||
|
graphemes = (fromList $ punctuations ++ regulars) {
|
||||||
|
projector = toLower
|
||||||
|
}
|
||||||
|
where
|
||||||
|
regulars = auto <$> [minBound .. maxBound]
|
||||||
|
punctuations = punctuation <$> ["?", "!", ".", ",", ":", " "]
|
||||||
|
|
||||||
|
parse :: Stream Char -> Stream Output
|
||||||
|
parse = fmap (uncurry Output) . run graphemes
|
53
src/Lang/En/Morpheme.hs
Normal file
53
src/Lang/En/Morpheme.hs
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
{-# LANGUAGE NamedFieldPuns #-}
|
||||||
|
module Lang.En.Morpheme (
|
||||||
|
Morpheme(..)
|
||||||
|
, Output(..)
|
||||||
|
, morphemes
|
||||||
|
, parse
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.String (fromString)
|
||||||
|
import Lang.En.Grapheme (Grapheme(..))
|
||||||
|
import qualified Lang.En.Grapheme as Grapheme (Output(..))
|
||||||
|
import Stream (Stream)
|
||||||
|
import Transducer (Transducer(..), fromList, run)
|
||||||
|
|
||||||
|
data Regular =
|
||||||
|
LY
|
||||||
|
| ED
|
||||||
|
| UN
|
||||||
|
| CAT
|
||||||
|
| DOG
|
||||||
|
| EaT
|
||||||
|
| SLEeP
|
||||||
|
| A
|
||||||
|
| ThE
|
||||||
|
| GOoD
|
||||||
|
| KINd
|
||||||
|
| ANd
|
||||||
|
| Space
|
||||||
|
deriving (Bounded, Enum, Eq, Ord, Show)
|
||||||
|
|
||||||
|
data Morpheme = Morpheme Regular | Punctuation String deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
auto :: Morpheme -> ([Grapheme], Either [Grapheme] Morpheme)
|
||||||
|
auto morpheme = (fromString $ show morpheme, Right morpheme)
|
||||||
|
|
||||||
|
morphemes :: Transducer Grapheme.Output Grapheme Morpheme
|
||||||
|
morphemes = (fromList $ auto <$> [minBound .. maxBound]) {
|
||||||
|
projector = Grapheme.grapheme
|
||||||
|
, rules = []
|
||||||
|
}
|
||||||
|
|
||||||
|
data Output = Output {
|
||||||
|
rawText :: String
|
||||||
|
, morpheme :: Morpheme
|
||||||
|
} deriving (Eq, Show)
|
||||||
|
|
||||||
|
parse :: Stream Grapheme.Output -> Stream Output
|
||||||
|
parse = fmap fusionGraphemes . run morphemes
|
||||||
|
where
|
||||||
|
fusionGraphemes (graphemes, morpheme) = Output {
|
||||||
|
rawText = concat $ Grapheme.rawText <$> graphemes
|
||||||
|
, morpheme
|
||||||
|
}
|
|
@ -1,34 +1,35 @@
|
||||||
{-# LANGUAGE NamedFieldPuns #-}
|
{-# LANGUAGE NamedFieldPuns #-}
|
||||||
module Transducer (
|
module Transducer (
|
||||||
Transducer
|
RunState(..)
|
||||||
|
, Stack(..)
|
||||||
|
, Transducer(..)
|
||||||
, fromList
|
, fromList
|
||||||
, run
|
, run
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
import Data.List (find)
|
||||||
import Data.Map ((!), insert)
|
import Data.Map ((!), insert)
|
||||||
|
import Data.Maybe (isJust)
|
||||||
import Graph (Graph(..), Vertex(..), editVertex, follow, rewind, singleton, weave)
|
import Graph (Graph(..), Vertex(..), editVertex, follow, rewind, singleton, weave)
|
||||||
import Stream (Stream(..), merge)
|
import Stream (Stream(..), merge)
|
||||||
import qualified Stream (empty)
|
import qualified Stream (empty)
|
||||||
|
|
||||||
type Transducer input output = Graph input [output]
|
data Transducer input label output = Transducer {
|
||||||
newtype Stack a = Stack [a]
|
graph :: Graph label [output]
|
||||||
|
, rules :: [RunState input label output -> Maybe (RunState input label output)]
|
||||||
|
, projector :: input -> label
|
||||||
|
}
|
||||||
|
|
||||||
push :: a -> Stack a -> Stack a
|
empty :: Graph edge [label]
|
||||||
push a (Stack s) = Stack (a:s)
|
|
||||||
|
|
||||||
content :: Stack a -> [a]
|
|
||||||
content (Stack s) = reverse s
|
|
||||||
|
|
||||||
empty :: Transducer input output
|
|
||||||
empty = singleton []
|
empty = singleton []
|
||||||
|
|
||||||
entry :: Ord input => Transducer input output -> ([input], output) -> Transducer input output
|
entry :: Ord edge => Graph edge [label] -> ([edge], label) -> Graph edge [label]
|
||||||
entry transducer (path, output) =
|
entry transducer (path, output) =
|
||||||
rewind $ editVertex (weave transducer path) pushLabel
|
rewind $ editVertex (weave transducer path) pushLabel
|
||||||
where
|
where
|
||||||
pushLabel vertex = vertex {label = output:(label vertex)}
|
pushLabel vertex = vertex {label = output:(label vertex)}
|
||||||
|
|
||||||
loop :: Ord input => Transducer input output -> ([input], [input]) -> Transducer input output
|
loop :: Ord edge => Graph edge [label] -> ([edge], [edge]) -> Graph edge [label]
|
||||||
loop transducer ([], loopPath) =
|
loop transducer ([], loopPath) =
|
||||||
case splitAt (length loopPath - 1) loopPath of
|
case splitAt (length loopPath - 1) loopPath of
|
||||||
(_, []) -> transducer
|
(_, []) -> transducer
|
||||||
|
@ -41,23 +42,53 @@ loop transducer ([], loopPath) =
|
||||||
vertex {edges = insert input (focus transducer) $ edges vertex}
|
vertex {edges = insert input (focus transducer) $ edges vertex}
|
||||||
loop transducer (prefix, loopPath) = loop (weave transducer prefix) ([], loopPath)
|
loop transducer (prefix, loopPath) = loop (weave transducer prefix) ([], loopPath)
|
||||||
|
|
||||||
fromList :: Ord input => [([input], Either [input] output)] -> Transducer input output
|
fromList :: Ord input => [([input], Either [input] output)] -> Transducer input input output
|
||||||
fromList = foldl add empty
|
fromList l = Transducer {graph = foldl add empty l, projector = id, rules = []}
|
||||||
where
|
where
|
||||||
add transducer (path, Left loopPath) = loop transducer (path, loopPath)
|
add transducer (path, Left loopPath) = loop transducer (path, loopPath)
|
||||||
add transducer (path, Right output) = entry transducer (path, output)
|
add transducer (path, Right output) = entry transducer (path, output)
|
||||||
|
|
||||||
run :: (Ord input, Eq output) => Transducer input output -> Stream input -> Stream ([input], output)
|
data RunState input label output = RunState {
|
||||||
run transducer = runWithStack initState
|
transducer :: Transducer input label output
|
||||||
where
|
, stack :: Stack input
|
||||||
initState = (transducer, Stack [])
|
}
|
||||||
emit stack stream output = ((content stack, output), runWithStack initState stream)
|
|
||||||
runWithStack (state, stack) (Stream inputs) = foldl (\outputsStream (input, stream) ->
|
initState :: Transducer input label output -> RunState input label output
|
||||||
case follow state input of
|
initState transducer = RunState {
|
||||||
|
transducer = transducer {graph = rewind $ graph transducer}
|
||||||
|
, stack = Stack []
|
||||||
|
}
|
||||||
|
|
||||||
|
newtype Stack a = Stack [a]
|
||||||
|
|
||||||
|
push :: a -> Stack a -> Stack a
|
||||||
|
push a (Stack s) = Stack (a:s)
|
||||||
|
|
||||||
|
content :: Stack a -> [a]
|
||||||
|
content (Stack s) = reverse s
|
||||||
|
|
||||||
|
tryInput :: (Eq input, Ord label, Eq output) => RunState input label output -> Stream ([input], output) -> (input, Stream input) -> Stream ([input], output)
|
||||||
|
tryInput state@(RunState {transducer, stack}) outputsStream (input, stream) =
|
||||||
|
case follow (graph transducer) (projector transducer $ input) of
|
||||||
Nothing -> Stream.empty
|
Nothing -> Stream.empty
|
||||||
Just newState@(Graph {vertices, focus}) ->
|
Just newGraph@(Graph {vertices, focus}) ->
|
||||||
let newStack = push input stack in
|
let newStack = push input stack in
|
||||||
let emitted = Stream $ emit newStack stream <$> label (vertices ! focus) in
|
let emitted = Stream $ emit newStack <$> label (vertices ! focus) in
|
||||||
let continue = runWithStack (newState, newStack) stream in
|
let continue = runState (state {
|
||||||
|
transducer = transducer {graph = newGraph}
|
||||||
|
, stack = newStack
|
||||||
|
}) stream in
|
||||||
emitted `merge` continue `merge` outputsStream
|
emitted `merge` continue `merge` outputsStream
|
||||||
) Stream.empty inputs
|
where
|
||||||
|
emit aStack output = (
|
||||||
|
(content aStack, output), runState (initState transducer) stream
|
||||||
|
)
|
||||||
|
|
||||||
|
runState :: (Eq input, Ord label, Eq output) => RunState input label output -> Stream input -> Stream ([input], output)
|
||||||
|
runState state@(RunState {transducer}) (Stream inputs) =
|
||||||
|
case find isJust $ ($ state) <$> rules transducer of
|
||||||
|
Just (Just newState) -> runState newState (Stream inputs)
|
||||||
|
_ -> foldl (tryInput state) Stream.empty inputs
|
||||||
|
|
||||||
|
run :: (Eq input, Ord label, Eq output) => Transducer input label output -> Stream input -> Stream ([input], output)
|
||||||
|
run transducer = runState $ initState transducer
|
||||||
|
|
Loading…
Reference in a new issue