> {-# OPTIONS_HADDOCK show-extensions #-}
> {-|
> Module    : LTK.Learn.SP
> Copyright : (c) 2019-2020,2023 Dakotah Lambert
> License   : MIT

> This module implements a string extension learner for the SP class.
>
> @since 0.3
> -}

> module LTK.Learn.SP (SPG, fSP) where

> import Data.Set (Set)
> import qualified Data.Set as Set

> import LTK.Factors
> import LTK.FSA
> import LTK.Learn.StringExt

When gathering subsequences of words to build a positive grammar,
we should keep in mind that if a given subsequence is considered
acceptable, the definition of SP guarantees that in turn all of
its subsequences are also acceptable.  Therefore unlike for SL, it
makes sense to also gather the factors of width less than \(k\)
when generating a grammar from positive data.

> -- |Return the set of factors under precedence of length \(k\) or less
> -- in the given word.
> fSP :: Ord a => Int -> [a] -> SPG a
> fSP :: forall a. Ord a => Int -> [a] -> SPG a
fSP Int
k = (Set a, Set [a]) -> SPG a
forall {a}. (Set a, Set [a]) -> SPG a
f ((Set a, Set [a]) -> SPG a)
-> ([a] -> (Set a, Set [a])) -> [a] -> SPG a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> Int -> [a] -> (Set a, Set [a])
forall a. Ord a => Bool -> Int -> [a] -> (Set a, Set [a])
fSP' Bool
True Int
k
>     where f :: (Set a, Set [a]) -> SPG a
f (Set a
s, Set [a]
g) = SPG { spgAlpha :: Set a
spgAlpha  =  Set a
s
>                          , spgK :: Int
spgK      =  Int
k
>                          , spg :: Set [a]
spg       =  Set [a]
g
>                          }

> -- |Auxiliary function to gather subsequences.
> -- If the first argument is True,
> -- gather those of length less than or equal to \(k\).
> -- Otherwise, only gather those of length exactly \(k\).
> fSP' :: Ord a => Bool -> Int -> [a] -> (Set a, Set [a])
> fSP' :: forall a. Ord a => Bool -> Int -> [a] -> (Set a, Set [a])
fSP' Bool
lt Int
k = ([a] -> (Set a, Set [a]) -> (Set a, Set [a]))
-> (Set a, Set [a]) -> [[a]] -> (Set a, Set [a])
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr [a] -> (Set a, Set [a]) -> (Set a, Set [a])
forall {a}. Ord a => [a] -> (Set a, Set [a]) -> (Set a, Set [a])
g (Set a
forall c a. Container c a => c
empty, Set [a]
forall c a. Container c a => c
empty) ([[a]] -> (Set a, Set [a]))
-> ([a] -> [[a]]) -> [a] -> (Set a, Set [a])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> [[a]]
forall a. [a] -> [[a]]
ssqs
>     where f :: [a] -> Bool
f = if Bool
lt then [a] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null ([a] -> Bool) -> ([a] -> [a]) -> [a] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [a] -> [a]
forall a. Int -> [a] -> [a]
drop Int
k else Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
(==) Int
k (Int -> Bool) -> ([a] -> Int) -> [a] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length
>           g :: [a] -> (Set a, Set [a]) -> (Set a, Set [a])
g [a]
x (Set a
xs, Set [a]
ys)
>             = ( case [a]
x
>                 of [a
s]  -> a -> Set a -> Set a
forall a. Ord a => a -> Set a -> Set a
Set.insert a
s Set a
xs
>                    [a]
_    -> Set a
xs
>               , (if [a] -> Bool
forall a. [a] -> Bool
f [a]
x then [a] -> Set [a] -> Set [a]
forall a. Ord a => a -> Set a -> Set a
Set.insert [a]
x else Set [a] -> Set [a]
forall a. a -> a
id) Set [a]
ys
>               )

> -- |A representation of an SP grammar.
> data SPG a = SPG { forall a. SPG a -> Set a
spgAlpha :: Set a
>                  , forall a. SPG a -> Int
spgK :: Int
>                  , forall a. SPG a -> Set [a]
spg :: Set [a]
>                  }
>              deriving (SPG a -> SPG a -> Bool
(SPG a -> SPG a -> Bool) -> (SPG a -> SPG a -> Bool) -> Eq (SPG a)
forall a. Eq a => SPG a -> SPG a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall a. Eq a => SPG a -> SPG a -> Bool
== :: SPG a -> SPG a -> Bool
$c/= :: forall a. Eq a => SPG a -> SPG a -> Bool
/= :: SPG a -> SPG a -> Bool
Eq, Eq (SPG a)
Eq (SPG a) =>
(SPG a -> SPG a -> Ordering)
-> (SPG a -> SPG a -> Bool)
-> (SPG a -> SPG a -> Bool)
-> (SPG a -> SPG a -> Bool)
-> (SPG a -> SPG a -> Bool)
-> (SPG a -> SPG a -> SPG a)
-> (SPG a -> SPG a -> SPG a)
-> Ord (SPG a)
SPG a -> SPG a -> Bool
SPG a -> SPG a -> Ordering
SPG a -> SPG a -> SPG a
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall a. Ord a => Eq (SPG a)
forall a. Ord a => SPG a -> SPG a -> Bool
forall a. Ord a => SPG a -> SPG a -> Ordering
forall a. Ord a => SPG a -> SPG a -> SPG a
$ccompare :: forall a. Ord a => SPG a -> SPG a -> Ordering
compare :: SPG a -> SPG a -> Ordering
$c< :: forall a. Ord a => SPG a -> SPG a -> Bool
< :: SPG a -> SPG a -> Bool
$c<= :: forall a. Ord a => SPG a -> SPG a -> Bool
<= :: SPG a -> SPG a -> Bool
$c> :: forall a. Ord a => SPG a -> SPG a -> Bool
> :: SPG a -> SPG a -> Bool
$c>= :: forall a. Ord a => SPG a -> SPG a -> Bool
>= :: SPG a -> SPG a -> Bool
$cmax :: forall a. Ord a => SPG a -> SPG a -> SPG a
max :: SPG a -> SPG a -> SPG a
$cmin :: forall a. Ord a => SPG a -> SPG a -> SPG a
min :: SPG a -> SPG a -> SPG a
Ord, ReadPrec [SPG a]
ReadPrec (SPG a)
Int -> ReadS (SPG a)
ReadS [SPG a]
(Int -> ReadS (SPG a))
-> ReadS [SPG a]
-> ReadPrec (SPG a)
-> ReadPrec [SPG a]
-> Read (SPG a)
forall a. (Read a, Ord a) => ReadPrec [SPG a]
forall a. (Read a, Ord a) => ReadPrec (SPG a)
forall a. (Read a, Ord a) => Int -> ReadS (SPG a)
forall a. (Read a, Ord a) => ReadS [SPG a]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: forall a. (Read a, Ord a) => Int -> ReadS (SPG a)
readsPrec :: Int -> ReadS (SPG a)
$creadList :: forall a. (Read a, Ord a) => ReadS [SPG a]
readList :: ReadS [SPG a]
$creadPrec :: forall a. (Read a, Ord a) => ReadPrec (SPG a)
readPrec :: ReadPrec (SPG a)
$creadListPrec :: forall a. (Read a, Ord a) => ReadPrec [SPG a]
readListPrec :: ReadPrec [SPG a]
Read, Int -> SPG a -> ShowS
[SPG a] -> ShowS
SPG a -> String
(Int -> SPG a -> ShowS)
-> (SPG a -> String) -> ([SPG a] -> ShowS) -> Show (SPG a)
forall a. Show a => Int -> SPG a -> ShowS
forall a. Show a => [SPG a] -> ShowS
forall a. Show a => SPG a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall a. Show a => Int -> SPG a -> ShowS
showsPrec :: Int -> SPG a -> ShowS
$cshow :: forall a. Show a => SPG a -> String
show :: SPG a -> String
$cshowList :: forall a. Show a => [SPG a] -> ShowS
showList :: [SPG a] -> ShowS
Show)

> instance HasAlphabet SPG
>     where alphabet :: forall a. SPG a -> Set a
alphabet = SPG e -> Set e
forall a. SPG a -> Set a
spgAlpha

> instance Grammar SPG
>     where emptyG :: forall a. Ord a => SPG a
emptyG = Set a -> Int -> Set [a] -> SPG a
forall a. Set a -> Int -> Set [a] -> SPG a
SPG Set a
forall c a. Container c a => c
empty Int
0 Set [a]
forall c a. Container c a => c
empty
>           augmentG :: forall a. Ord a => SPG a -> SPG a -> SPG a
augmentG SPG a
g1 SPG a
g2
>               = SPG { spgAlpha :: Set a
spgAlpha = SPG a -> Set a
forall a. SPG a -> Set a
forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SPG a
g1 Set a -> Set a -> Set a
forall c a. Container c a => c -> c -> c
`union` SPG a -> Set a
forall a. SPG a -> Set a
forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SPG a
g2
>                     , spgK :: Int
spgK = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (SPG a -> Int
forall a. SPG a -> Int
spgK SPG a
g1) (SPG a -> Int
forall a. SPG a -> Int
spgK SPG a
g2)
>                     , spg :: Set [a]
spg = SPG a -> Set [a]
forall a. SPG a -> Set [a]
spg SPG a
g1 Set [a] -> Set [a] -> Set [a]
forall c a. Container c a => c -> c -> c
`union` SPG a -> Set [a]
forall a. SPG a -> Set [a]
spg SPG a
g2
>                     }
>           isSubGOf :: forall a. Ord a => SPG a -> SPG a -> Bool
isSubGOf SPG a
g1 SPG a
g2 = Set [a] -> Set [a] -> Bool
forall c a. (Container c a, Eq a) => c -> c -> Bool
isSubsetOf (SPG a -> Set [a]
forall a. SPG a -> Set [a]
spg SPG a
g1) (SPG a -> Set [a]
forall a. SPG a -> Set [a]
spg SPG a
g2)
>           genFSA :: forall a. (NFData a, Ord a) => SPG a -> FSA Integer a
genFSA SPG a
g = FSA Integer a -> FSA Integer a
forall {e}. Ord e => FSA Integer e -> FSA Integer e
n (FSA Integer a -> FSA Integer a)
-> (Set [a] -> FSA Integer a) -> Set [a] -> FSA Integer a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [FSA Integer a] -> FSA Integer a
forall n e.
(Enum n, Ord n, NFData n, Ord e, NFData e) =>
[FSA n e] -> FSA n e
flatIntersection ([FSA Integer a] -> FSA Integer a)
-> (Set [a] -> [FSA Integer a]) -> Set [a] -> FSA Integer a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (FSA Integer a
free FSA Integer a -> [FSA Integer a] -> [FSA Integer a]
forall a. a -> [a] -> [a]
:) ([FSA Integer a] -> [FSA Integer a])
-> (Set [a] -> [FSA Integer a]) -> Set [a] -> [FSA Integer a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
>                      ([a] -> FSA Integer a) -> [[a]] -> [FSA Integer a]
forall a b. (a -> b) -> [a] -> [b]
map (Set a -> Literal a -> FSA Integer a
forall n e. (Enum n, Ord n, Ord e) => Set e -> Literal e -> FSA n e
buildLiteral (SPG a -> Set a
forall a. SPG a -> Set a
forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SPG a
g) (Literal a -> FSA Integer a)
-> ([a] -> Literal a) -> [a] -> FSA Integer a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Factor a -> Literal a
forall e. Factor e -> Literal e
forbidden (Factor a -> Literal a) -> ([a] -> Factor a) -> [a] -> Literal a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> Factor a
f) ([[a]] -> [FSA Integer a])
-> (Set [a] -> [[a]]) -> Set [a] -> [FSA Integer a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
>                      Set [a] -> [[a]]
forall a. Set a -> [a]
Set.toList (Set [a] -> FSA Integer a) -> Set [a] -> FSA Integer a
forall a b. (a -> b) -> a -> b
$ SPG a -> Set [a]
forall a. Ord a => SPG a -> Set [a]
complG SPG a
g
>               where f :: [a] -> Factor a
f = [Set a] -> Factor a
forall e. [Set e] -> Factor e
Subsequence ([Set a] -> Factor a) -> ([a] -> [Set a]) -> [a] -> Factor a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> Set a) -> [a] -> [Set a]
forall a b. (a -> b) -> [a] -> [b]
map a -> Set a
forall c a. Container c a => a -> c
singleton
>                     n :: FSA Integer e -> FSA Integer e
n FSA Integer e
x = FSA Integer e -> FSA Integer e
forall e n. (Ord e, Ord n) => FSA n e -> FSA Integer e
normalize FSA Integer e
x FSA Integer e -> FSA Integer e -> FSA Integer e
forall a. a -> a -> a
`asTypeOf` FSA Integer e
x
>                     free :: FSA Integer a
free = Set a -> FSA Integer a
forall e n. (Ord e, Enum n, Ord n) => Set e -> FSA n e
totalWithAlphabet (Set a -> FSA Integer a) -> Set a -> FSA Integer a
forall a b. (a -> b) -> a -> b
$ SPG a -> Set a
forall a. SPG a -> Set a
forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SPG a
g

> complG :: Ord a => SPG a -> Set [a]
> complG :: forall a. Ord a => SPG a -> Set [a]
complG SPG a
g = Set [a] -> Set [a] -> Set [a]
forall c a. (Container c a, Eq a) => c -> c -> c
difference (Int -> Set a -> Set [a]
forall a. Ord a => Int -> Set a -> Set [a]
allFs (SPG a -> Int
forall a. SPG a -> Int
spgK SPG a
g) (SPG a -> Set a
forall a. SPG a -> Set a
forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SPG a
g)) (SPG a -> Set [a]
forall a. SPG a -> Set [a]
spg SPG a
g)

> allFs :: Ord a => Int -> Set a -> Set [a]
> allFs :: forall a. Ord a => Int -> Set a -> Set [a]
allFs Int
k = [[a]] -> Set [a]
forall a. Ord a => [a] -> Set a
Set.fromList ([[a]] -> Set [a]) -> (Set a -> [[a]]) -> Set a -> Set [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([a] -> Bool) -> [[a]] -> [[a]]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
k) (Int -> Bool) -> ([a] -> Int) -> [a] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length)
>           ([[a]] -> [[a]]) -> (Set a -> [[a]]) -> Set a -> [[a]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> [[a]]
forall a. [a] -> [[a]]
sequencesOver ([a] -> [[a]]) -> (Set a -> [a]) -> Set a -> [[a]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set a -> [a]
forall a. Set a -> [a]
Set.toList


Efficient subsequence finding for omega-words
=============================================

The @ssqs'@ function computes non-empty subsequences with multiplicity.
For example, the sequence "a" appears twice for "aba".
We then add in the empty subsequence for @ssqs@

> ssqs' :: [a] -> [[a]]
> ssqs' :: forall a. [a] -> [[a]]
ssqs' [] = []
> ssqs' (a
x:[a]
xs) = [a
x] [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: [[a]] -> [[a]] -> [[a]]
forall (c :: * -> *) a.
(Linearizable c, Container (c a) a) =>
c a -> c a -> c a
interleave (([a] -> [a]) -> [[a]] -> [[a]]
forall a b. (a -> b) -> [a] -> [b]
map (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:) [[a]]
ys) [[a]]
ys
>     where ys :: [[a]]
ys = [a] -> [[a]]
forall a. [a] -> [[a]]
ssqs' [a]
xs

> ssqs :: [a] -> [[a]]
> ssqs :: forall a. [a] -> [[a]]
ssqs = ([][a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
:) ([[a]] -> [[a]]) -> ([a] -> [[a]]) -> [a] -> [[a]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> [[a]]
forall a. [a] -> [[a]]
ssqs'