-----------------------------------------------------------------------------
-- |
-- Module      :  Data.SuffixTreeCluster
-- Copyright   :  (c) Bryan O'Sullivan 2007, JP Bernardy 2010
-- License     :  BSD-style
-- Maintainer  :  nobody
-- Stability   :  experimental
-- Portability :  portable
--
-- A suffix tree cluster implementation.
--
-- Since many function names (but not the type name) clash with
-- "Prelude" names, this module is usually imported @qualified@, e.g.
--
-- >  import Data.SuffixTreeCluster (STree)
-- >  import qualified Data.SuffixTreeCluster as T
--
-- This implementation constructs the suffix tree lazily, so subtrees
-- are not created until they are traversed.  
--
-- Estimates are given for performance.  The value /k/ is a constant;
-- /n/ is the length of a query string; and /t/ is the number of
-- elements (nodes and leaves) in a suffix tree.

module SuffixTreeCluster
    (
    -- * Types
      Alphabet
    , Edge
    , Prefix
    , STree(..)

    -- * Construction
    , construct

    -- * Querying
    , elem
    , findEdge
    , findTree
    , findPath

    -- * Other useful functions
    , mkPrefix
    , prefix
    , suffixes
    , select
    , commonStrings

    -- * Debug
    , printTree
    ) where
                   
import Prelude hiding (elem, foldl, foldr)
import qualified Data.Map as M
import Control.Arrow (second)
import qualified Data.Text as SB
import qualified Data.Text.Lazy as LB
import qualified Data.List as L
import Data.Maybe (listToMaybe, mapMaybe, catMaybes)
import Data.Monoid hiding (Sum)
-- import Text.Groom
import Data.Function (on)
import qualified Data.Tree as T

-- | The length of a prefix list.  This type is formulated to do cheap
-- work eagerly (to avoid constructing a pile of deferred thunks),
-- while deferring potentially expensive work (computing the length of
-- a list).
data Length a = Exactly {-# UNPACK #-} !Int
              | Sum {-# UNPACK #-} !Int [a]
              deriving (Int -> Length a -> ShowS
[Length a] -> ShowS
Length a -> String
(Int -> Length a -> ShowS)
-> (Length a -> String) -> ([Length a] -> ShowS) -> Show (Length a)
forall a. Show a => Int -> Length a -> ShowS
forall a. Show a => [Length a] -> ShowS
forall a. Show a => Length a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall a. Show a => Int -> Length a -> ShowS
showsPrec :: Int -> Length a -> ShowS
$cshow :: forall a. Show a => Length a -> String
show :: Length a -> String
$cshowList :: forall a. Show a => [Length a] -> ShowS
showList :: [Length a] -> ShowS
Show)

-- | The list of symbols that 'constructWith' can possibly see in its
-- input.
type Alphabet a = [a]

-- | The prefix string associated with an 'Edge'.  Use 'mkPrefix' to
-- create a value of this type, and 'prefix' to deconstruct one.
newtype Prefix a = Prefix ([a], Length a)

instance (Eq a) => Eq (Prefix a) where
    Prefix a
a == :: Prefix a -> Prefix a -> Bool
== Prefix a
b = Prefix a -> [a]
forall a. Prefix a -> [a]
prefix Prefix a
a [a] -> [a] -> Bool
forall a. Eq a => a -> a -> Bool
== Prefix a -> [a]
forall a. Prefix a -> [a]
prefix Prefix a
b

instance (Ord a) => Ord (Prefix a) where
    compare :: Prefix a -> Prefix a -> Ordering
compare Prefix a
a Prefix a
b = [a] -> [a] -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Prefix a -> [a]
forall a. Prefix a -> [a]
prefix Prefix a
a) (Prefix a -> [a]
forall a. Prefix a -> [a]
prefix Prefix a
b)

instance (Show a) => Show (Prefix a) where
    show :: Prefix a -> String
show Prefix a
a = String
"mkPrefix " String -> ShowS
forall a. [a] -> [a] -> [a]
++ [a] -> String
forall a. Show a => a -> String
show (Prefix a -> [a]
forall a. Prefix a -> [a]
prefix Prefix a
a)

-- | An edge in the suffix tree.
type Edge a b = (Prefix a, STree a b)

-- | /O(1)/. Construct a 'Prefix' value.
mkPrefix :: [a] -> Prefix a
mkPrefix :: forall a. [a] -> Prefix a
mkPrefix [a]
xs = ([a], Length a) -> Prefix a
forall a. ([a], Length a) -> Prefix a
Prefix ([a]
xs, Int -> [a] -> Length a
forall a. Int -> [a] -> Length a
Sum Int
0 [a]
xs)

pmap :: (a -> b) -> Prefix a -> Prefix b
pmap :: forall a b. (a -> b) -> Prefix a -> Prefix b
pmap a -> b
f = [b] -> Prefix b
forall a. [a] -> Prefix a
mkPrefix ([b] -> Prefix b) -> (Prefix a -> [b]) -> Prefix a -> Prefix b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> b) -> [a] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map a -> b
f ([a] -> [b]) -> (Prefix a -> [a]) -> Prefix a -> [b]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Prefix a -> [a]
forall a. Prefix a -> [a]
prefix

instance Functor Prefix where
    fmap :: forall a b. (a -> b) -> Prefix a -> Prefix b
fmap = (a -> b) -> Prefix a -> Prefix b
forall a b. (a -> b) -> Prefix a -> Prefix b
pmap

-- | The suffix tree type.  The implementation is exposed to ease the
-- development of custom traversal functions.  Note that @('Prefix' a,
-- 'STree' a b)@ pairs are not stored in any order.
data STree a b = Node b [Edge a b]
               deriving (Int -> STree a b -> ShowS
[STree a b] -> ShowS
STree a b -> String
(Int -> STree a b -> ShowS)
-> (STree a b -> String)
-> ([STree a b] -> ShowS)
-> Show (STree a b)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall a b. (Show b, Show a) => Int -> STree a b -> ShowS
forall a b. (Show b, Show a) => [STree a b] -> ShowS
forall a b. (Show b, Show a) => STree a b -> String
$cshowsPrec :: forall a b. (Show b, Show a) => Int -> STree a b -> ShowS
showsPrec :: Int -> STree a b -> ShowS
$cshow :: forall a b. (Show b, Show a) => STree a b -> String
show :: STree a b -> String
$cshowList :: forall a b. (Show b, Show a) => [STree a b] -> ShowS
showList :: [STree a b] -> ShowS
Show)

smap :: (a -> b) -> STree a c -> STree b c
smap :: forall a b c. (a -> b) -> STree a c -> STree b c
smap a -> b
f (Node c
b [Edge a c]
es) = c -> [Edge b c] -> STree b c
forall a b. b -> [Edge a b] -> STree a b
Node c
b ((Edge a c -> Edge b c) -> [Edge a c] -> [Edge b c]
forall a b. (a -> b) -> [a] -> [b]
map (\(Prefix a
p, STree a c
t) -> ((a -> b) -> Prefix a -> Prefix b
forall a b. (a -> b) -> Prefix a -> Prefix b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> b
f Prefix a
p, (a -> b) -> STree a c -> STree b c
forall a b c. (a -> b) -> STree a c -> STree b c
smap a -> b
f STree a c
t)) [Edge a c]
es)

lmap :: (a -> b) -> STree c a -> STree c b
lmap :: forall a b c. (a -> b) -> STree c a -> STree c b
lmap a -> b
f (Node a
b [Edge c a]
es) = b -> [Edge c b] -> STree c b
forall a b. b -> [Edge a b] -> STree a b
Node (a -> b
f a
b) ((Edge c a -> Edge c b) -> [Edge c a] -> [Edge c b]
forall a b. (a -> b) -> [a] -> [b]
map ((STree c a -> STree c b) -> Edge c a -> Edge c b
forall b c d. (b -> c) -> (d, b) -> (d, c)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second ((a -> b) -> STree c a -> STree c b
forall a b c. (a -> b) -> STree c a -> STree c b
lmap a -> b
f)) [Edge c a]
es)

instance Functor (STree a) where
    fmap :: forall a b. (a -> b) -> STree a a -> STree a b
fmap = (a -> b) -> STree a a -> STree a b
forall a b c. (a -> b) -> STree c a -> STree c b
lmap

-- | /O(n)/. Obtain the list stored in a 'Prefix'.
prefix :: Prefix a -> [a]
prefix :: forall a. Prefix a -> [a]
prefix (Prefix ([a]
ys, Exactly Int
n)) = Int -> [a] -> [a]
forall a. Int -> [a] -> [a]
take Int
n [a]
ys
prefix (Prefix ([a]
ys, Sum Int
n [a]
xs)) = Int -> [a] -> [a]
forall {t} {c}. (Eq t, Num t) => t -> [c] -> [c]
tk Int
n [a]
ys
    where tk :: t -> [c] -> [c]
tk t
0 [c]
ys = (a -> c -> c) -> [a] -> [c] -> [c]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith ((c -> c) -> a -> c -> c
forall a b. a -> b -> a
const c -> c
forall a. a -> a
id) [a]
xs [c]
ys
          tk t
n (c
y:[c]
ys) = c
y c -> [c] -> [c]
forall a. a -> [a] -> [a]
: t -> [c] -> [c]
tk (t
nt -> t -> t
forall a. Num a => a -> a -> a
-t
1) [c]
ys



-- | Increments the length of a prefix.
inc :: Length a -> Length a
inc :: forall a. Length a -> Length a
inc (Exactly Int
n) = Int -> Length a
forall a. Int -> Length a
Exactly (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
inc (Sum Int
n [a]
xs)  = Int -> [a] -> Length a
forall a. Int -> [a] -> Length a
Sum (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) [a]
xs

-- | /O(n)/. Returns all non-empty suffixes of the argument, longest
-- first.  Behaves as follows:
--
-- >suffixes xs == init (tails xs)
suffixes :: [a] -> [[a]]
suffixes :: forall a. [a] -> [[a]]
suffixes (xs :: [a]
xs@(a
_:[a]
xs')) = ([a]
xs) [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: [a] -> [[a]]
forall a. [a] -> [[a]]
suffixes ([a]
xs')
suffixes [a]
_ = []

constr :: (Eq b, Monoid b, Ord a) => [(b,[[a]])] -> STree a b
constr :: forall b a. (Eq b, Monoid b, Ord a) => [(b, [[a]])] -> STree a b
constr [(b, [[a]])]
is = b -> [Edge a b] -> STree a b
forall a b. b -> [Edge a b] -> STree a b
Node b
label ([(b, [[a]])] -> [Edge a b]
forall b a. (Eq b, Monoid b, Ord a) => [(b, [[a]])] -> [Edge a b]
suf [(b, [[a]])]
xs)
    where xs :: [(b, [[a]])]
xs = ((b, [[a]]) -> Bool) -> [(b, [[a]])] -> [(b, [[a]])]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> ((b, [[a]]) -> Bool) -> (b, [[a]]) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[a]] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null ([[a]] -> Bool) -> ((b, [[a]]) -> [[a]]) -> (b, [[a]]) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (b, [[a]]) -> [[a]]
forall a b. (a, b) -> b
snd) ([(b, [[a]])] -> [(b, [[a]])]) -> [(b, [[a]])] -> [(b, [[a]])]
forall a b. (a -> b) -> a -> b
$ ((b, [[a]]) -> (b, [[a]])) -> [(b, [[a]])] -> [(b, [[a]])]
forall a b. (a -> b) -> [a] -> [b]
map (([[a]] -> [[a]]) -> (b, [[a]]) -> (b, [[a]])
forall b c d. (b -> c) -> (d, b) -> (d, c)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second (([a] -> Bool) -> [[a]] -> [[a]]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> ([a] -> Bool) -> [a] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null))) [(b, [[a]])]
is
          label :: b
label = ([b] -> b
forall a. Monoid a => [a] -> a
mconcat ([b] -> b) -> [b] -> b
forall a b. (a -> b) -> a -> b
$ ((b, [[a]]) -> b) -> [(b, [[a]])] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map (b, [[a]]) -> b
forall a b. (a, b) -> a
fst [(b, [[a]])]
is)

construct :: (Eq b, Monoid b, Ord a) => [([a],b)] -> STree a b
construct :: forall b a. (Eq b, Monoid b, Ord a) => [([a], b)] -> STree a b
construct [([a], b)]
is = [(b, [[a]])] -> STree a b
forall b a. (Eq b, Monoid b, Ord a) => [(b, [[a]])] -> STree a b
constr [(b
b,[a] -> [[a]]
forall a. [a] -> [[a]]
L.tails [a]
a) | ([a]
a,b
b) <- [([a], b)]
is]

suf :: (Eq b, Monoid b, Ord a) => [(b,[[a]])] -> [Edge a b]
suf :: forall b a. (Eq b, Monoid b, Ord a) => [(b, [[a]])] -> [Edge a b]
suf [(b, [[a]])]
ss = [(([a], Length a) -> Prefix a
forall a. ([a], Length a) -> Prefix a
Prefix (a
aa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
sa, Length a -> Length a
forall a. Length a -> Length a
inc Length a
cpl), [(b, [[a]])] -> STree a b
forall b a. (Eq b, Monoid b, Ord a) => [(b, [[a]])] -> STree a b
constr [(b, [[a]])]
ssr)
              | (a
a, n :: [(b, [[a]])]
n@((b
_,[a]
sa:[[a]]
_):[(b, [[a]])]
_)) <- [(b, [[a]])] -> [(a, [(b, [[a]])])]
forall b a. (Eq b, Ord a) => [(b, [[a]])] -> [(a, [(b, [[a]])])]
suffixMap [(b, [[a]])]
ss,
              let (Length a
cpl,[(b, [[a]])]
ssr) = [(b, [[a]])] -> (Length a, [(b, [[a]])])
forall b a.
(Monoid b, Eq b, Eq a) =>
[(b, [[a]])] -> (Length a, [(b, [[a]])])
cst [(b, [[a]])]
n]

regroup :: (Eq b) => [(b, a)] -> [(b, [a])]
regroup :: forall b a. Eq b => [(b, a)] -> [(b, [a])]
regroup [(b, a)]
xs = [(b
b,((b, a) -> a) -> [(b, a)] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map (b, a) -> a
forall a b. (a, b) -> b
snd [(b, a)]
as) | as :: [(b, a)]
as@((b
b,a
_):[(b, a)]
_) <- ((b, a) -> (b, a) -> Bool) -> [(b, a)] -> [[(b, a)]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
L.groupBy (b -> b -> Bool
forall a. Eq a => a -> a -> Bool
(==) (b -> b -> Bool) -> ((b, a) -> b) -> (b, a) -> (b, a) -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (b, a) -> b
forall a b. (a, b) -> a
fst) [(b, a)]
xs]

-- 

suffixMap :: (Eq b, Ord a) => [(b,[[a]])] -> [(a, [(b, [[a]])])]
suffixMap :: forall b a. (Eq b, Ord a) => [(b, [[a]])] -> [(a, [(b, [[a]])])]
suffixMap [(b, [[a]])]
sss = ((a, [(b, [a])]) -> (a, [(b, [[a]])]))
-> [(a, [(b, [a])])] -> [(a, [(b, [[a]])])]
forall a b. (a -> b) -> [a] -> [b]
map (([(b, [a])] -> [(b, [[a]])])
-> (a, [(b, [a])]) -> (a, [(b, [[a]])])
forall b c d. (b -> c) -> (d, b) -> (d, c)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second ([(b, [a])] -> [(b, [[a]])]
forall b a. Eq b => [(b, a)] -> [(b, [a])]
regroup ([(b, [a])] -> [(b, [[a]])])
-> ([(b, [a])] -> [(b, [a])]) -> [(b, [a])] -> [(b, [[a]])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(b, [a])] -> [(b, [a])]
forall a. [a] -> [a]
reverse)) ([(a, [(b, [a])])] -> [(a, [(b, [[a]])])])
-> ([(b, [a])] -> [(a, [(b, [a])])])
-> [(b, [a])]
-> [(a, [(b, [[a]])])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                Map a [(b, [a])] -> [(a, [(b, [a])])]
forall k a. Map k a -> [(k, a)]
M.toList (Map a [(b, [a])] -> [(a, [(b, [a])])])
-> ([(b, [a])] -> Map a [(b, [a])])
-> [(b, [a])]
-> [(a, [(b, [a])])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Map a [(b, [a])] -> (b, [a]) -> Map a [(b, [a])])
-> Map a [(b, [a])] -> [(b, [a])] -> Map a [(b, [a])]
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' Map a [(b, [a])] -> (b, [a]) -> Map a [(b, [a])]
forall {k} {a}.
Ord k =>
Map k [(a, [k])] -> (a, [k]) -> Map k [(a, [k])]
step Map a [(b, [a])]
forall k a. Map k a
M.empty ([(b, [a])] -> [(a, [(b, [[a]])])])
-> [(b, [a])] -> [(a, [(b, [[a]])])]
forall a b. (a -> b) -> a -> b
$ [(b
b,[a]
s) | (b
b,[[a]]
ss) <- [(b, [[a]])]
sss, [a]
s <- [[a]]
ss]
    where step :: Map k [(a, [k])] -> (a, [k]) -> Map k [(a, [k])]
step Map k [(a, [k])]
m (a
b,k
x:[k]
xs) = (Maybe [(a, [k])] -> Maybe [(a, [k])])
-> k -> Map k [(a, [k])] -> Map k [(a, [k])]
forall k a.
Ord k =>
(Maybe a -> Maybe a) -> k -> Map k a -> Map k a
M.alter ((a, [k]) -> Maybe [(a, [k])] -> Maybe [(a, [k])]
forall {a}. a -> Maybe [a] -> Maybe [a]
f (a
b,[k]
xs)) k
x Map k [(a, [k])]
m
          step Map k [(a, [k])]
m (a, [k])
_ = Map k [(a, [k])]
m
          f :: a -> Maybe [a] -> Maybe [a]
f a
x Maybe [a]
Nothing = [a] -> Maybe [a]
forall a. a -> Maybe a
Just [a
x]
          f a
x (Just [a]
xs) = [a] -> Maybe [a]
forall a. a -> Maybe a
Just (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
xs)

cst :: (Monoid b, Eq b, Eq a) => [(b, [[a]])] -> (Length a, [(b, [[a]])])

cst :: forall b a.
(Monoid b, Eq b, Eq a) =>
[(b, [[a]])] -> (Length a, [(b, [[a]])])
cst [(b, [[a]])]
bss | [Bool] -> Bool
forall (t :: * -> *). Foldable t => t Bool -> Bool
or [[a] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [a]
s | (b
_,[[a]]
ss) <- [(b, [[a]])]
bss, [a]
s <- [[a]]
ss] = (Int -> Length a
forall a. Int -> Length a
Exactly Int
0, [(b, [[a]])]
bss)
cst ss0 :: [(b, [[a]])]
ss0@((b
_,[[a]
s]):[(b, [[a]])]
ss) | ((b, [[a]]) -> Bool) -> [(b, [[a]])] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (([[a]] -> [[a]] -> Bool
forall a. Eq a => a -> a -> Bool
== [[a]
s]) ([[a]] -> Bool) -> ((b, [[a]]) -> [[a]]) -> (b, [[a]]) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (b, [[a]]) -> [[a]]
forall a b. (a, b) -> b
snd) [(b, [[a]])]
ss = (Int -> [a] -> Length a
forall a. Int -> [a] -> Length a
Sum Int
0 [a]
s, [(b
b, []) | (b
b,[[a]]
_) <- [(b, [[a]])]
ss0])
cst awss :: [(b, [[a]])]
awss@((b
_,(a
a:[a]
w):[[a]]
_):[(b, [[a]])]
ss)
    | [a] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [a
c | (b
_,[[a]]
x) <- [(b, [[a]])]
ss, (a
c:[a]
_) <- [[a]]
x, a
a a -> a -> Bool
forall a. Eq a => a -> a -> Bool
/= a
c] 
        = let cpl' :: Length a
cpl' = Length a -> Length a
forall a. Length a -> Length a
inc Length a
cpl
          in Length a -> (Length a, [(b, [[a]])]) -> (Length a, [(b, [[a]])])
forall a b. a -> b -> b
seq Length a
cpl' (Length a
cpl', [(b, [[a]])]
rss)
    | Bool
otherwise = (Int -> Length a
forall a. Int -> Length a
Exactly Int
0, [(b, [[a]])]
awss)
    where (Length a
cpl, [(b, [[a]])]
rss) = [(b, [[a]])] -> (Length a, [(b, [[a]])])
forall b a.
(Monoid b, Eq b, Eq a) =>
[(b, [[a]])] -> (Length a, [(b, [[a]])])
cst ([(b, [[a]])] -> (Length a, [(b, [[a]])]))
-> [(b, [[a]])] -> (Length a, [(b, [[a]])])
forall a b. (a -> b) -> a -> b
$ ((b, [[a]]) -> (b, [[a]])) -> [(b, [[a]])] -> [(b, [[a]])]
forall a b. (a -> b) -> [a] -> [b]
map (([[a]] -> [[a]]) -> (b, [[a]]) -> (b, [[a]])
forall b c d. (b -> c) -> (d, b) -> (d, c)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second (([a] -> [a]) -> [[a]] -> [[a]]
forall a b. (a -> b) -> [a] -> [b]
map [a] -> [a]
forall a. [a] -> [a]
tail')) [(b, [[a]])]
awss
          tail' :: [a] -> [a]
tail' (a
_:[a]
t) = [a]
t -- so we see the pattern fail here instead of a generic tail.


suffix :: (Eq a) => [a] -> [a] -> Maybe [a]
suffix :: forall a. Eq a => [a] -> [a] -> Maybe [a]
suffix (a
p:[a]
ps) (a
x:[a]
xs) | a
p a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
x = [a] -> [a] -> Maybe [a]
forall a. Eq a => [a] -> [a] -> Maybe [a]
suffix [a]
ps [a]
xs
                     | Bool
otherwise = Maybe [a]
forall a. Maybe a
Nothing
suffix [a]
_ [a]
xs = [a] -> Maybe [a]
forall a. a -> Maybe a
Just [a]
xs

{-# SPECIALISE elem :: [Char] -> STree Char b -> Bool #-}
{-# SPECIALISE elem :: [[Char]] -> STree [Char] b -> Bool #-}
{-# SPECIALISE elem :: [SB.Text] -> STree SB.Text b -> Bool #-}
{-# SPECIALISE elem :: [LB.Text] -> STree LB.Text b -> Bool #-}
{-# SPECIALISE elem :: (Eq a) => [[a]] -> STree [a] b -> Bool #-}

-- | /O(n)/.  Indicates whether the suffix tree contains the given
-- sublist.  Performance is linear in the length /n/ of the
-- sublist.
elem :: (Eq a) => [a] -> STree a b -> Bool
elem :: forall a b. Eq a => [a] -> STree a b -> Bool
elem [] STree a b
_ = Bool
True
elem [a]
xs (Node b
mb [Edge a b]
es) = (Edge a b -> Bool) -> [Edge a b] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any Edge a b -> Bool
forall {b}. (Prefix a, STree a b) -> Bool
pfx [Edge a b]
es
    where pfx :: (Prefix a, STree a b) -> Bool
pfx (Prefix a
e, STree a b
t) = Bool -> ([a] -> Bool) -> Maybe [a] -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False ([a] -> STree a b -> Bool
forall a b. Eq a => [a] -> STree a b -> Bool
`elem` STree a b
t) ([a] -> [a] -> Maybe [a]
forall a. Eq a => [a] -> [a] -> Maybe [a]
suffix (Prefix a -> [a]
forall a. Prefix a -> [a]
prefix Prefix a
e) [a]
xs)

lkup :: (Monoid b, Eq a) => [a] -> STree a b -> b
lkup :: forall b a. (Monoid b, Eq a) => [a] -> STree a b -> b
lkup [] (Node b
b [Edge a b]
es) = b
b
lkup [a]
xs (Node b
_ [Edge a b]
es) = [b] -> b
forall a. Monoid a => [a] -> a
mconcat ([b] -> b) -> [b] -> b
forall a b. (a -> b) -> a -> b
$ (Edge a b -> b) -> [Edge a b] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map Edge a b -> b
forall {b}. Monoid b => (Prefix a, STree a b) -> b
pfx [Edge a b]
es
    where pfx :: (Prefix a, STree a b) -> b
pfx (Prefix a
e, STree a b
t) = b -> ([a] -> b) -> Maybe [a] -> b
forall b a. b -> (a -> b) -> Maybe a -> b
maybe b
forall a. Monoid a => a
mempty ([a] -> STree a b -> b
forall b a. (Monoid b, Eq a) => [a] -> STree a b -> b
`lkup` STree a b
t) ([a] -> [a] -> Maybe [a]
forall a. Eq a => [a] -> [a] -> Maybe [a]
suffix (Prefix a -> [a]
forall a. Prefix a -> [a]
prefix Prefix a
e) [a]
xs)

{-# SPECIALISE findEdge :: [Char] -> STree Char b
                        -> Maybe (Edge Char b, Int) #-}
{-# SPECIALISE findEdge :: [String] -> STree String b
                        -> Maybe (Edge String b, Int) #-}
{-# SPECIALISE findEdge :: [SB.Text] -> STree SB.Text b
                        -> Maybe (Edge SB.Text b, Int) #-}
{-# SPECIALISE findEdge :: [ LB.Text] -> STree LB.Text b
                        -> Maybe (Edge LB.Text b, Int) #-}
{-# SPECIALISE findEdge :: (Eq a) => [[a]] -> STree [a] b
                        -> Maybe (Edge [a] b, Int) #-}

-- | /O(n)/.  Finds the given subsequence in the suffix tree.  On
-- failure, returns 'Nothing'.  On success, returns the 'Edge' in the
-- suffix tree at which the subsequence ends, along with the number of
-- elements to drop from the prefix of the 'Edge' to get the \"real\"
-- remaining prefix.
--
-- Here is an example:
--
-- >> find "ssip" (construct "mississippi")
-- >Just ((mkPrefix "ppi",Leaf),1)
--
-- This indicates that the edge @('mkPrefix' \"ppi\",'Leaf')@ matches,
-- and that we must strip 1 character from the string @\"ppi\"@ to get
-- the remaining prefix string @\"pi\"@.
--
-- Performance is linear in the length /n/ of the query list.
findEdge :: (Eq a) => [a] -> STree a b -> Maybe (Edge a b, Int)
findEdge :: forall a b. Eq a => [a] -> STree a b -> Maybe (Edge a b, Int)
findEdge [a]
xs (Node b
mb [Edge a b]
es) = [(Edge a b, Int)] -> Maybe (Edge a b, Int)
forall a. [a] -> Maybe a
listToMaybe ((Edge a b -> Maybe (Edge a b, Int))
-> [Edge a b] -> [(Edge a b, Int)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Edge a b -> Maybe (Edge a b, Int)
forall {b}.
(Prefix a, STree a b) -> Maybe ((Prefix a, STree a b), Int)
pfx [Edge a b]
es)
    where pfx :: (Prefix a, STree a b) -> Maybe ((Prefix a, STree a b), Int)
pfx e :: (Prefix a, STree a b)
e@(Prefix a
p, STree a b
t) = let p' :: [a]
p' = Prefix a -> [a]
forall a. Prefix a -> [a]
prefix Prefix a
p
                         in [a] -> [a] -> Maybe [a]
forall a. Eq a => [a] -> [a] -> Maybe [a]
suffix [a]
p' [a]
xs Maybe [a]
-> ([a] -> Maybe ((Prefix a, STree a b), Int))
-> Maybe ((Prefix a, STree a b), Int)
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \[a]
suf ->
            case [a]
suf of
              [] -> ((Prefix a, STree a b), Int) -> Maybe ((Prefix a, STree a b), Int)
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return ((Prefix a, STree a b)
e, [a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ((a -> a -> a) -> [a] -> [a] -> [a]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith a -> a -> a
forall a b. a -> b -> a
const [a]
xs [a]
p'))
              [a]
s -> [a] -> STree a b -> Maybe ((Prefix a, STree a b), Int)
forall a b. Eq a => [a] -> STree a b -> Maybe (Edge a b, Int)
findEdge [a]
s STree a b
t

-- | /O(n)/. Finds the subtree rooted at the end of the given query
-- sequence.  On failure, returns 'Nothing'.
--
-- Performance is linear in the length /n/ of the query list.
findTree :: (Eq a) => [a] -> STree a b -> Maybe (STree a b)
findTree :: forall a b. Eq a => [a] -> STree a b -> Maybe (STree a b)
findTree [a]
s STree a b
t = ((Prefix a, STree a b) -> STree a b
forall a b. (a, b) -> b
snd ((Prefix a, STree a b) -> STree a b)
-> (((Prefix a, STree a b), Int) -> (Prefix a, STree a b))
-> ((Prefix a, STree a b), Int)
-> STree a b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Prefix a, STree a b), Int) -> (Prefix a, STree a b)
forall a b. (a, b) -> a
fst) (((Prefix a, STree a b), Int) -> STree a b)
-> Maybe ((Prefix a, STree a b), Int) -> Maybe (STree a b)
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [a] -> STree a b -> Maybe ((Prefix a, STree a b), Int)
forall a b. Eq a => [a] -> STree a b -> Maybe (Edge a b, Int)
findEdge [a]
s STree a b
t

-- | /O(n)/. Returns the path from the 'Edge' in the suffix tree at
-- which the given subsequence ends, up to the root of the tree.  If
-- the subsequence is not found, returns the empty list.
--
-- Performance is linear in the length of the query list.
findPath :: (Eq a) => [a] -> STree a b -> [Edge a b]
findPath :: forall a b. Eq a => [a] -> STree a b -> [Edge a b]
findPath = [(Prefix a, STree a b)]
-> [a] -> STree a b -> [(Prefix a, STree a b)]
forall {a} {b}.
Eq a =>
[(Prefix a, STree a b)]
-> [a] -> STree a b -> [(Prefix a, STree a b)]
go []
    where go :: [(Prefix a, STree a b)]
-> [a] -> STree a b -> [(Prefix a, STree a b)]
go [(Prefix a, STree a b)]
me [a]
xs (Node b
mb [(Prefix a, STree a b)]
es) = [(Prefix a, STree a b)]
-> [(Prefix a, STree a b)] -> [(Prefix a, STree a b)]
pfx [(Prefix a, STree a b)]
me [(Prefix a, STree a b)]
es
              where pfx :: [(Prefix a, STree a b)]
-> [(Prefix a, STree a b)] -> [(Prefix a, STree a b)]
pfx [(Prefix a, STree a b)]
_ [] = []
                    pfx [(Prefix a, STree a b)]
me (e :: (Prefix a, STree a b)
e@(Prefix a
p, STree a b
t):[(Prefix a, STree a b)]
es) =
                        case [a] -> [a] -> Maybe [a]
forall a. Eq a => [a] -> [a] -> Maybe [a]
suffix (Prefix a -> [a]
forall a. Prefix a -> [a]
prefix Prefix a
p) [a]
xs of
                          Maybe [a]
Nothing -> [(Prefix a, STree a b)]
-> [(Prefix a, STree a b)] -> [(Prefix a, STree a b)]
pfx [(Prefix a, STree a b)]
me [(Prefix a, STree a b)]
es
                          Just [] -> (Prefix a, STree a b)
e(Prefix a, STree a b)
-> [(Prefix a, STree a b)] -> [(Prefix a, STree a b)]
forall a. a -> [a] -> [a]
:[(Prefix a, STree a b)]
me
                          Just [a]
s -> [(Prefix a, STree a b)]
-> [a] -> STree a b -> [(Prefix a, STree a b)]
go ((Prefix a, STree a b)
e(Prefix a, STree a b)
-> [(Prefix a, STree a b)] -> [(Prefix a, STree a b)]
forall a. a -> [a] -> [a]
:[(Prefix a, STree a b)]
me) [a]
s STree a b
t


-- select :: SC.STree a b -> Tree Int
selectPairs :: ([a], STree a [a]) -> Tree ([a], [a])
selectPairs ([a]
p0,Node [a]
entries [Edge a [a]]
sub) = ([a], [a]) -> [Tree ([a], [a])] -> Tree ([a], [a])
forall a. a -> [Tree a] -> Tree a
T.Node ([a]
p0,[a]
entries) ([Tree ([a], [a])] -> Tree ([a], [a]))
-> [Tree ([a], [a])] -> Tree ([a], [a])
forall a b. (a -> b) -> a -> b
$
              [([a], STree a [a]) -> Tree ([a], [a])
forall b a. Eq b => ([a], STree a [b]) -> Tree ([a], [b])
select ([a]
p0 [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ Prefix a -> [a]
forall a. Prefix a -> [a]
prefix Prefix a
p,STree a [a]
n) | (Prefix a
p,STree a [a]
n) <- [Edge a [a]]
sub, STree a [a] -> Bool
forall {a} {a}. STree a [a] -> Bool
isRelevantNode STree a [a]
n]

selectLen :: Tree ([a], b) -> Tree ([a], b)
selectLen (T.Node ([a]
shared,b
entries) [Tree ([a], b)]
sub) = ([a], b) -> [Tree ([a], b)] -> Tree ([a], b)
forall a. a -> [Tree a] -> Tree a
T.Node ([a], b)
here ((Tree ([a], b) -> Tree ([a], b))
-> [Tree ([a], b)] -> [Tree ([a], b)]
forall a b. (a -> b) -> [a] -> [b]
map Tree ([a], b) -> Tree ([a], b)
selectLen [Tree ([a], b)]
sub)
   where here :: ([a], b)
here | [a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
shared Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
7 = ([],b
forall a. Monoid a => a
mempty) 
              | Bool
otherwise = ([a]
shared,b
entries)


selectLongest :: Tree ([a], [a]) -> Tree ([a], [a])
selectLongest (T.Node ([a]
shared,[a]
entries) [Tree ([a], [a])]
sub) = ([a], [a]) -> [Tree ([a], [a])] -> Tree ([a], [a])
forall a. a -> [Tree a] -> Tree a
T.Node ([a], [a])
here ((Tree ([a], [a]) -> Tree ([a], [a]))
-> [Tree ([a], [a])] -> [Tree ([a], [a])]
forall a b. (a -> b) -> [a] -> [b]
map Tree ([a], [a]) -> Tree ([a], [a])
selectLongest [Tree ([a], [a])]
sub)
   where here :: ([a], [a])
here | Bool
subsumed = ([],[]) 
              | Bool
otherwise = ([a]
shared,[a]
entries)
         subsumed :: Bool
subsumed = (a -> Bool) -> [a] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (a -> [a] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`L.elem` ((Tree ([a], [a]) -> [a]) -> [Tree ([a], [a])] -> [a]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (([a], [a]) -> [a]
forall a b. (a, b) -> b
snd (([a], [a]) -> [a])
-> (Tree ([a], [a]) -> ([a], [a])) -> Tree ([a], [a]) -> [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Tree ([a], [a]) -> ([a], [a])
forall a. Tree a -> a
T.rootLabel) [Tree ([a], [a])]
sub)) [a]
entries 
    

select :: Eq b => ([a],STree a [b]) -> T.Tree ([a],[b])
select :: forall b a. Eq b => ([a], STree a [b]) -> Tree ([a], [b])
select = Tree ([a], [b]) -> Tree ([a], [b])
forall {a} {a}. Eq a => Tree ([a], [a]) -> Tree ([a], [a])
selectLongest (Tree ([a], [b]) -> Tree ([a], [b]))
-> (([a], STree a [b]) -> Tree ([a], [b]))
-> ([a], STree a [b])
-> Tree ([a], [b])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Tree ([a], [b]) -> Tree ([a], [b])
forall {b} {a}. Monoid b => Tree ([a], b) -> Tree ([a], b)
selectLen (Tree ([a], [b]) -> Tree ([a], [b]))
-> (([a], STree a [b]) -> Tree ([a], [b]))
-> ([a], STree a [b])
-> Tree ([a], [b])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([a], STree a [b]) -> Tree ([a], [b])
forall b a. Eq b => ([a], STree a [b]) -> Tree ([a], [b])
selectPairs

buildMap :: (Eq a, Ord b) => T.Tree ([a],[b]) -> M.Map [b] [[a]]
buildMap :: forall a b. (Eq a, Ord b) => Tree ([a], [b]) -> Map [b] [[a]]
buildMap Tree ([a], [b])
t = (Map [b] [[a]] -> ([a], [b]) -> Map [b] [[a]])
-> Map [b] [[a]] -> [([a], [b])] -> Map [b] [[a]]
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' Map [b] [[a]] -> ([a], [b]) -> Map [b] [[a]]
forall {k} {a}.
(Ord k, Eq a) =>
Map k [[a]] -> ([a], k) -> Map k [[a]]
step Map [b] [[a]]
forall k a. Map k a
M.empty ([([a], [b])] -> Map [b] [[a]]) -> [([a], [b])] -> Map [b] [[a]]
forall a b. (a -> b) -> a -> b
$ (([a], [b]) -> Bool) -> [([a], [b])] -> [([a], [b])]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (([a], [b]) -> Bool) -> ([a], [b]) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [b] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null ([b] -> Bool) -> (([a], [b]) -> [b]) -> ([a], [b]) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([a], [b]) -> [b]
forall a b. (a, b) -> b
snd) ([([a], [b])] -> [([a], [b])]) -> [([a], [b])] -> [([a], [b])]
forall a b. (a -> b) -> a -> b
$ Tree ([a], [b]) -> [([a], [b])]
forall a. Tree a -> [a]
T.flatten Tree ([a], [b])
t
    where step :: Map k [[a]] -> ([a], k) -> Map k [[a]]
step Map k [[a]]
m ([a]
x,k
bs) = (Maybe [[a]] -> Maybe [[a]]) -> k -> Map k [[a]] -> Map k [[a]]
forall k a.
Ord k =>
(Maybe a -> Maybe a) -> k -> Map k a -> Map k a
M.alter ([a] -> Maybe [[a]] -> Maybe [[a]]
forall {a}. Eq a => [a] -> Maybe [[a]] -> Maybe [[a]]
lub [a]
x) k
bs Map k [[a]]
m

commonStrings :: (Eq b, Ord a, Ord b) => [([a],[b])] -> M.Map [b] [[a]]
commonStrings :: forall b a. (Eq b, Ord a, Ord b) => [([a], [b])] -> Map [b] [[a]]
commonStrings [([a], [b])]
xs = Tree ([a], [b]) -> Map [b] [[a]]
forall a b. (Eq a, Ord b) => Tree ([a], [b]) -> Map [b] [[a]]
buildMap (Tree ([a], [b]) -> Map [b] [[a]])
-> Tree ([a], [b]) -> Map [b] [[a]]
forall a b. (a -> b) -> a -> b
$ ([a], STree a [b]) -> Tree ([a], [b])
forall b a. Eq b => ([a], STree a [b]) -> Tree ([a], [b])
select ([],[([a], [b])] -> STree a [b]
forall b a. (Eq b, Monoid b, Ord a) => [([a], b)] -> STree a b
construct [([a], [b])]
xs)

lub :: [a] -> Maybe [[a]] -> Maybe [[a]]
lub [a]
y Maybe [[a]]
Nothing = [[a]] -> Maybe [[a]]
forall a. a -> Maybe a
Just [[a]
y]
lub [a]
y (Just [[a]]
xs)
    | ([a]
y [a] -> [a] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`L.isSuffixOf`) ([a] -> Bool) -> [[a]] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
`any` [[a]]
xs = [[a]] -> Maybe [[a]]
forall a. a -> Maybe a
Just [[a]]
xs
    | Bool
otherwise = [[a]] -> Maybe [[a]]
forall a. a -> Maybe a
Just ([a]
y [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: ([a] -> Bool) -> [[a]] -> [[a]]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> ([a] -> Bool) -> [a] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([a] -> [a] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`L.isSuffixOf` [a]
y)) [[a]]
xs)

printTree :: Show a => T.Tree a -> IO ()
printTree :: forall a. Show a => Tree a -> IO ()
printTree = String -> IO ()
putStrLn (String -> IO ()) -> (Tree a -> String) -> Tree a -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Tree String -> String
T.drawTree (Tree String -> String)
-> (Tree a -> Tree String) -> Tree a -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> String) -> Tree a -> Tree String
forall a b. (a -> b) -> Tree a -> Tree b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> String
forall a. Show a => a -> String
show

isRelevant :: [a] -> Bool
isRelevant [] = Bool
False
isRelevant [a
_] = Bool
False
isRelevant [a]
_ = Bool
True

isRelevantNode :: STree a [a] -> Bool
isRelevantNode (Node [a]
e [Edge a [a]]
_) = [a] -> Bool
forall a. [a] -> Bool
isRelevant [a]
e

emptyPrefix :: Prefix a
emptyPrefix :: forall a. Prefix a
emptyPrefix = [a] -> Prefix a
forall a. [a] -> Prefix a
mkPrefix []