> {-# OPTIONS_HADDOCK show-extensions #-}
> {-|
> Module    : LTK.Learn.SL
> Copyright : (c) 2019-2020,2023 Dakotah Lambert
> License   : MIT

> This module implements a string extension learner for the SL class.
>
> @since 0.3
> -}

> module LTK.Learn.SL (SLG(..), fSL) where

> import Data.Set (Set)
> import qualified Data.Set as Set

> import LTK.Factors
> import LTK.FSA
> import LTK.Learn.StringExt

> -- |Return the set of \(k\)-factors under successor in the given word.
> -- Factors are triples, where the first and last components are
> -- Booleans that indicate whether the factor is anchored at
> -- its head or tail, respectively, and the central component is
> -- the factor itself.
> -- If a word is short enough to not contain any \(k\)-factors,
> -- the entire word, appropriately anchored, is included in the set.
> fSL :: Ord a => Int -> [a] -> SLG a
> fSL :: forall a. Ord a => Int -> [a] -> SLG a
fSL = forall a. Ord a => Bool -> Int -> [a] -> SLG a
fSL' Bool
True

> fSL' :: Ord a => Bool -> Int -> [a] -> SLG a
> fSL' :: forall a. Ord a => Bool -> Int -> [a] -> SLG a
fSL' Bool
h Int
k [a]
w
>     | forall (t :: * -> *) a. Foldable t => t a -> Bool
null (forall a. Int -> [a] -> [a]
drop (Int
k' forall a. Num a => a -> a -> a
- Int
1) [a]
w)  =  forall a. Ord a => Int -> (Bool, [a], Bool) -> SLG a
mkSLG Int
k (Bool
h, [a]
w, Bool
True)
>     | Bool
otherwise               =  forall (g :: * -> *) a. (Grammar g, Ord a) => g a -> g a -> g a
augmentG (forall a. Ord a => Int -> (Bool, [a], Bool) -> SLG a
mkSLG Int
k (Bool
h, forall a. Int -> [a] -> [a]
take Int
k' [a]
w, Bool
False)) forall a b. (a -> b) -> a -> b
$
>                                  forall a. Ord a => Bool -> Int -> [a] -> SLG a
fSL' Bool
False Int
k [a]
w'
>     where k' :: Int
k' = if Bool
h then Int
k forall a. Num a => a -> a -> a
- Int
1 else Int
k
>           w' :: [a]
w' = if Bool
h then [a]
w else forall a. Int -> [a] -> [a]
drop Int
1 [a]
w

> -- |A representation of an SL grammar.
> data SLG a = SLG { forall a. SLG a -> Set a
slgAlpha :: Set a
>                  , forall a. SLG a -> Int
slgK :: Int
>                  , forall a. SLG a -> Set (Bool, [a], Bool)
slg :: Set (Bool, [a], Bool)
>                  }
>              deriving (SLG a -> SLG a -> Bool
forall a. Eq a => SLG a -> SLG a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SLG a -> SLG a -> Bool
$c/= :: forall a. Eq a => SLG a -> SLG a -> Bool
== :: SLG a -> SLG a -> Bool
$c== :: forall a. Eq a => SLG a -> SLG a -> Bool
Eq, SLG a -> SLG a -> Bool
SLG a -> SLG a -> Ordering
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall {a}. Ord a => Eq (SLG a)
forall a. Ord a => SLG a -> SLG a -> Bool
forall a. Ord a => SLG a -> SLG a -> Ordering
forall a. Ord a => SLG a -> SLG a -> SLG a
min :: SLG a -> SLG a -> SLG a
$cmin :: forall a. Ord a => SLG a -> SLG a -> SLG a
max :: SLG a -> SLG a -> SLG a
$cmax :: forall a. Ord a => SLG a -> SLG a -> SLG a
>= :: SLG a -> SLG a -> Bool
$c>= :: forall a. Ord a => SLG a -> SLG a -> Bool
> :: SLG a -> SLG a -> Bool
$c> :: forall a. Ord a => SLG a -> SLG a -> Bool
<= :: SLG a -> SLG a -> Bool
$c<= :: forall a. Ord a => SLG a -> SLG a -> Bool
< :: SLG a -> SLG a -> Bool
$c< :: forall a. Ord a => SLG a -> SLG a -> Bool
compare :: SLG a -> SLG a -> Ordering
$ccompare :: forall a. Ord a => SLG a -> SLG a -> Ordering
Ord, ReadPrec [SLG a]
ReadPrec (SLG a)
ReadS [SLG a]
forall a. (Read a, Ord a) => ReadPrec [SLG a]
forall a. (Read a, Ord a) => ReadPrec (SLG a)
forall a. (Read a, Ord a) => Int -> ReadS (SLG a)
forall a. (Read a, Ord a) => ReadS [SLG a]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [SLG a]
$creadListPrec :: forall a. (Read a, Ord a) => ReadPrec [SLG a]
readPrec :: ReadPrec (SLG a)
$creadPrec :: forall a. (Read a, Ord a) => ReadPrec (SLG a)
readList :: ReadS [SLG a]
$creadList :: forall a. (Read a, Ord a) => ReadS [SLG a]
readsPrec :: Int -> ReadS (SLG a)
$creadsPrec :: forall a. (Read a, Ord a) => Int -> ReadS (SLG a)
Read, Int -> SLG a -> ShowS
forall a. Show a => Int -> SLG a -> ShowS
forall a. Show a => [SLG a] -> ShowS
forall a. Show a => SLG a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SLG a] -> ShowS
$cshowList :: forall a. Show a => [SLG a] -> ShowS
show :: SLG a -> String
$cshow :: forall a. Show a => SLG a -> String
showsPrec :: Int -> SLG a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> SLG a -> ShowS
Show)

> mkSLG :: Ord a => Int -> (Bool, [a], Bool) -> SLG a
> mkSLG :: forall a. Ord a => Int -> (Bool, [a], Bool) -> SLG a
mkSLG Int
k x :: (Bool, [a], Bool)
x@(Bool
_,[a]
b,Bool
_) = SLG { slgAlpha :: Set a
slgAlpha  =  forall a. Ord a => [a] -> Set a
Set.fromList [a]
b
>                         , slgK :: Int
slgK      =  Int
k
>                         , slg :: Set (Bool, [a], Bool)
slg       =  forall c a. Container c a => a -> c
singleton (Bool, [a], Bool)
x
>                         }

> instance HasAlphabet SLG
>     where alphabet :: forall a. SLG a -> Set a
alphabet = forall a. SLG a -> Set a
slgAlpha

> instance Grammar SLG
>     where emptyG :: forall a. Ord a => SLG a
emptyG = forall a. Set a -> Int -> Set (Bool, [a], Bool) -> SLG a
SLG forall c a. Container c a => c
empty Int
0 forall c a. Container c a => c
empty
>           augmentG :: forall a. Ord a => SLG a -> SLG a -> SLG a
augmentG SLG a
g1 SLG a
g2
>               = SLG { slgAlpha :: Set a
slgAlpha = forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SLG a
g1 forall c a. Container c a => c -> c -> c
`union` forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SLG a
g2
>                     , slgK :: Int
slgK = forall a. Ord a => a -> a -> a
max (forall a. SLG a -> Int
slgK SLG a
g1) (forall a. SLG a -> Int
slgK SLG a
g2)
>                     , slg :: Set (Bool, [a], Bool)
slg = forall a. SLG a -> Set (Bool, [a], Bool)
slg SLG a
g1 forall c a. Container c a => c -> c -> c
`union` forall a. SLG a -> Set (Bool, [a], Bool)
slg SLG a
g2
>                     }
>           isSubGOf :: forall a. Ord a => SLG a -> SLG a -> Bool
isSubGOf SLG a
g1 SLG a
g2 = forall c a. (Container c a, Eq a) => c -> c -> Bool
isSubsetOf (forall a. SLG a -> Set (Bool, [a], Bool)
slg SLG a
g1) (forall a. SLG a -> Set (Bool, [a], Bool)
slg SLG a
g2)
>           genFSA :: forall a. (NFData a, Ord a) => SLG a -> FSA Integer a
genFSA SLG a
g = forall {e}. Ord e => FSA Integer e -> FSA Integer e
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall n e.
(Enum n, Ord n, NFData n, Ord e, NFData e) =>
[FSA n e] -> FSA n e
flatIntersection forall b c a. (b -> c) -> (a -> b) -> a -> c
. (FSA Integer a
free forall a. a -> [a] -> [a]
:)
>                      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (forall n e. (Enum n, Ord n, Ord e) => Set e -> Literal e -> FSA n e
buildLiteral (forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SLG a
g) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e. Factor e -> Literal e
forbidden forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {a}. Ord a => (Bool, [a], Bool) -> Factor a
f)
>                      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Set a -> [a]
Set.toList forall a b. (a -> b) -> a -> b
$ forall a. Ord a => SLG a -> Set (Bool, [a], Bool)
complG SLG a
g
>               where f :: (Bool, [a], Bool) -> Factor a
f (Bool
h, [a]
b, Bool
t) = forall e. [Set e] -> Bool -> Bool -> Factor e
Substring (forall a b. (a -> b) -> [a] -> [b]
map forall c a. Container c a => a -> c
singleton [a]
b) Bool
h Bool
t
>                     n :: FSA Integer e -> FSA Integer e
n FSA Integer e
x = forall e n. (Ord e, Ord n) => FSA n e -> FSA Integer e
normalize FSA Integer e
x forall a. a -> a -> a
`asTypeOf` FSA Integer e
x
>                     free :: FSA Integer a
free = forall e n. (Ord e, Enum n, Ord n) => Set e -> FSA n e
totalWithAlphabet forall a b. (a -> b) -> a -> b
$ forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SLG a
g

> complG :: Ord a => SLG a -> Set (Bool, [a], Bool)
> complG :: forall a. Ord a => SLG a -> Set (Bool, [a], Bool)
complG SLG a
g = forall c a. (Container c a, Eq a) => c -> c -> c
difference (forall a. Ord a => Int -> Set a -> Set (Bool, [a], Bool)
allFs (forall a. SLG a -> Int
slgK SLG a
g) (forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SLG a
g)) (forall a. SLG a -> Set (Bool, [a], Bool)
slg SLG a
g)

> astrings :: Int -> [a] -> [(Bool, [a], Bool)]
> astrings :: forall a. Int -> [a] -> [(Bool, [a], Bool)]
astrings Int
k = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap forall {t :: * -> *} {a}. Foldable t => t a -> [(Bool, t a, Bool)]
f forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
takeWhile ((forall a. Ord a => a -> a -> Bool
<= Int
k) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> Int
length) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [[a]]
sequencesOver
>     where f :: t a -> [(Bool, t a, Bool)]
f t a
s = case forall a. Ord a => a -> a -> Ordering
compare (forall (t :: * -> *) a. Foldable t => t a -> Int
length t a
s) (Int
k forall a. Num a => a -> a -> a
- Int
1)
>                 of Ordering
LT -> [(Bool
True, t a
s, Bool
True)]
>                    Ordering
EQ -> [(Bool
True, t a
s, Bool
False), (Bool
False, t a
s, Bool
True)]
>                    Ordering
GT -> [(Bool
False, t a
s, Bool
False)]

> -- |All possible factors of width \(k\) under adjacency,
> -- as well as shorter fully-anchored factors.
> allFs :: Ord a => Int -> Set a -> Set (Bool, [a], Bool)
> allFs :: forall a. Ord a => Int -> Set a -> Set (Bool, [a], Bool)
allFs Int
k = forall a. Ord a => [a] -> Set a
Set.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Int -> [a] -> [(Bool, [a], Bool)]
astrings Int
k forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Set a -> [a]
Set.toList