---
--- Yu Hirate, Hayato Yamana: Generalized Sequential Pattern Mining with Item Interval, Journal of Computer Vol. 1, No 3, June 2006
---
--
-- Configuration
--
items := [a, b, c, d, e, f]
ISDB :=
[[[(0, [a]), (86400, [a, b, c]), (259200, [a, c])]]
,[[(0, [a, d]), (259200, [c])]]
,[[(0, [a, e, f]), (172800, [a, b])]]]
N := length ISDB
minSup := ceiling (0.5 * N)
C1 := 0 -- min_interval
C2 := 172800 -- max_interval
C3 := 0 -- min_whole_interval
C4 := 300000 -- max_whole_interval
I t := floor (rtof (t / (60 * 60 * 24)))
--
-- Utils
--
query := list (integer, eq)
sequence := list (time, list eq)
time := matcher
| interval $ $ as (integer, integer) with
| $t -> [(I t, t)]
| $ as something with
| $tgt -> [tgt]
--
-- Algorithm
--
-- calculate ISDB|α
project α ISDB := match α as query with
| (#0, $x) :: $α' -> project' α' (map (\xss -> matchAllDFS xss as set sequence with
| (_ ++ ($t, _ ++ #x :: $cs) :: $ls) :: _
-> (0, cs) :: (map (\t' xs -> (t' - t, xs)) ls))
ISDB)
project' α ISDB := match α as query with
| [] -> ISDB
| ($a, $x) :: $α' -> project' (map (\b y -> (b - a, y)) α')
(map (\xss -> matchAllDFS xss as set sequence with
| (_ ++ (interval #a $t, _ ++ #x :: $cs) :: $ls) :: _
-> (0, cs) :: (map (\t' xs -> (t' - t, xs)) ls))
ISDB)
-- main function
gspm items ISDB I minSup C1 C2 C3 C4 :=
let φ := [] in
let R := [] in
let fs := filter (\α ISDB' -> match ISDB' as multiset sequence with
| loop $i (1, minSup) (![] :: ...) _ -> True
| _ -> False)
(map (\α -> (α, project α ISDB)) (map (\x -> [(0, x)]) items)) in
let iss := map (\α ISDB' -> α) fs in
iss ++ concat (map (\α ISDB' -> projection α ISDB' I minSup C1 C2 C3 C4) fs)
projection α ISDB' I minSup C1 C2 C3 C4 :=
let fs := filter (\a t x -> C1 <= t && t <= C2) (freqItem ISDB' minSup C1 C2 C3 C4) in
let iss' := map (\a t x -> α ++ [(a, x)]) fs in
-- TODO: apply C4
-- TODO: apply C3
iss' ++ concat (map (\α' -> projection α' (project α' ISDB) I minSup C1 C2 C3 C4)
iss')
freqItem ISDB minSup C1 C2 C3 C4 :=
matchAll ISDB as list (list sequence) with
| first (interval $a $t) $x
(loop $i (2, minSup)
(first (interval #a _) #x ...)
!(first (interval #a _) #x _))
-> (a, t, x)
first := \pt px ps =>
{@ ++ (@ ++ (@ ++ ((interval $t _ & ~pt), _ ++ ($x & ~px) :: _) :: _) :: _) :: @,
(!(_ ++ (_ ++ (_ ++ (interval #t _, _ ++ #x :: _) :: _) :: _) :: _),
!(_ ++ (_ ++ (interval #t _, _ ++ #x :: _) :: _) :: _),
!(_ ++ (interval #t _, _ ++ #x :: _) :: _),
~ps)}
--
-- Execute
--
--gspm items ISDB I minSup C1 C2 C3 C4
--
-- Test
--
assertEqual "project (level 1)"
(project [(0, a)] ISDB)
[[[(0, []), (86400, [a, b, c]), (259200, [a, c])], [(0, [b, c]), (172800, [a, c])], [(0, [c])]], [[(0, [d]), (259200, [c])]], [[(0, [e, f]), (172800, [a, b])], [(0, [b])]]]
assertEqual "project (level 2)"
(project [(0, a),(0, b)] ISDB)
[[[(0, [c]), (172800, [a, c])]], [], [[(0, [])]]]
assertEqual "project (level 2)"
(project [(0, a),(2, a)] ISDB)
[[[(0, [c])]], [], [[(0, [b])]]]
assertEqual "freqItem"
(freqItem
[[[(0, []), (86400, [a, b, c]), (259200, [a, c])], [(0, [b, c]), (172800, [a, c])], [(0, [c])]], [[(0, [d]), (259200, [c])]], [[(0, [e, f]), (172800, [a, b])], [(0, [b])]]]
minSup C1 C2 C3 C4)
[(0, 0, b), (3, 259200, c), (2, 172800, a)]
(filter (\a t x -> C1 <= t && t <= C2)
(freqItem
[[[(0, []), (86400, [a, b, c]), (259200, [a, c])], [(0, [b, c]), (172800, [a, c])], [(0, [c])]],
[[(0, [d]), (259200, [c])]],
[[(0, [b])]]]
minSup C1 C2 C3 C4))
--[(0, 0, b)]
gspm items ISDB I minSup C1 C2 C3 C4
[[(0, a)],
[(0, b)],
[(0, c)],
[(0, a), (0, b)],
[(0, a), (2, a)]]