{-# LANGUAGE OverloadedStrings #-} -- | Representations of paths in an FTA, data structures for -- equality constraints over paths, algorithms for saturating these constraints module Data.ECTA.Internal.Paths ( Path(.., EmptyPath, ConsPath) , unPath , path , Pathable(..) , pathHeadUnsafe , pathTailUnsafe , isSubpath , isStrictSubpath , substSubpath , smallestNonempty , largestNonempty , getMaxNonemptyIndex , PathTrie(..) , isEmptyPathTrie , isTerminalPathTrie , toPathTrie , fromPathTrie , pathTrieDescend , PathEClass(PathEClass, ..) , unPathEClass , hasSubsumingMember , completedSubsumptionOrdering , EqConstraints(.., EmptyConstraints) , rawMkEqConstraints , unsafeGetEclasses , hasSubsumingMemberListBased , isContradicting , mkEqConstraints , combineEqConstraints , eqConstraintsDescend , constraintsAreContradictory , constraintsImply , subsumptionOrderedEclasses , unsafeSubsumptionOrderedEclasses ) where import Prelude hiding ( round ) import Data.Function ( on ) import Data.Hashable ( Hashable ) import Data.List ( isSubsequenceOf, nub, sort, sortBy ) import Data.Monoid ( Any(..) ) import Data.Semigroup ( Max(..) ) import qualified Data.Text as Text import Data.Vector ( Vector ) import qualified Data.Vector as Vector import Data.Vector.Instances () import GHC.Exts ( inline ) import GHC.Generics ( Generic ) import Data.Equivalence.Monad ( runEquivM, equate, desc, classes ) import Data.Memoization ( MemoCacheTag(..), memo2 ) import Data.Text.Extended.Pretty import Utility.Fixpoint ------------------------------------------------------- ----------------------------------------------------------------------- --------------------------- Misc / general ---------------------------- ----------------------------------------------------------------------- flipOrdering :: Ordering -> Ordering flipOrdering GT = LT flipOrdering LT = GT flipOrdering EQ = EQ ----------------------------------------------------------------------- -------------------------------- Paths -------------------------------- ----------------------------------------------------------------------- data Path = Path ![Int] deriving (Eq, Ord, Show, Generic) unPath :: Path -> [Int] unPath (Path p) = p instance Hashable Path path :: [Int] -> Path path = Path {-# COMPLETE EmptyPath, ConsPath #-} pattern EmptyPath :: Path pattern EmptyPath = Path [] pattern ConsPath :: Int -> Path -> Path pattern ConsPath p ps <- Path (p : (Path -> ps)) where ConsPath p (Path ps) = Path (p : ps) pathHeadUnsafe :: Path -> Int pathHeadUnsafe (Path ps) = head ps pathTailUnsafe :: Path -> Path pathTailUnsafe (Path ps) = Path (tail ps) instance Pretty Path where pretty (Path ps) = Text.intercalate "." (map (Text.pack . show) ps) isSubpath :: Path -> Path -> Bool isSubpath EmptyPath _ = True isSubpath (ConsPath p1 ps1) (ConsPath p2 ps2) | p1 == p2 = isSubpath ps1 ps2 isSubpath _ _ = False isStrictSubpath :: Path -> Path -> Bool isStrictSubpath EmptyPath EmptyPath = False isStrictSubpath EmptyPath _ = True isStrictSubpath (ConsPath p1 ps1) (ConsPath p2 ps2) | p1 == p2 = isStrictSubpath ps1 ps2 isStrictSubpath _ _ = False -- | Read `substSubpath p1 p2 p3` as `[p1/p2]p3` -- -- `substSubpath replacement toReplace target` takes `toReplace`, a prefix of target, -- and returns a new path in which `toReplace` has been replaced by `replacement`. -- -- Undefined if toReplace is not a prefix of target substSubpath :: Path -> Path -> Path -> Path substSubpath replacement toReplace target = Path $ (unPath replacement) ++ drop (length $ unPath toReplace) (unPath target) -------------------------------------------------------------------------- ---------------------------- Using paths --------------------------------- -------------------------------------------------------------------------- -- | TODO: Should this be redone as a lens-library traversal? -- | TODO: I am unhappy about this Emptyable design; makes one question whether -- this should be a typeclass at all. (Terms/ECTAs differ in that -- there is always an ECTA Node that represents the value at a path) class Pathable t t' | t -> t' where type Emptyable t' getPath :: Path -> t -> Emptyable t' getAllAtPath :: Path -> t -> [t'] modifyAtPath :: (t' -> t') -> Path -> t -> t ----------------------------------------------------------------------- ---------------------------- Path tries ------------------------------- ----------------------------------------------------------------------- --------------------- ------- Generic-ish utility functions --------------------- -- | Precondition: A nonempty cell exists smallestNonempty :: Vector PathTrie -> Int smallestNonempty v = Vector.ifoldr (\i pt oldMin -> case pt of EmptyPathTrie -> oldMin _ -> i) maxBound v -- | Precondition: A nonempty cell exists largestNonempty :: Vector PathTrie -> Int largestNonempty v = Vector.ifoldl (\oldMin i pt -> case pt of EmptyPathTrie -> oldMin _ -> i) minBound v getMaxNonemptyIndex :: PathTrie -> Maybe Int getMaxNonemptyIndex EmptyPathTrie = Nothing getMaxNonemptyIndex TerminalPathTrie = Nothing getMaxNonemptyIndex (PathTrieSingleChild i _) = Just i getMaxNonemptyIndex (PathTrie vec) = Just $ largestNonempty vec --------------------- ------- Path tries --------------------- data PathTrie = EmptyPathTrie | TerminalPathTrie | PathTrieSingleChild {-# UNPACK #-} !Int !PathTrie | PathTrie !(Vector PathTrie) -- Invariant: Must have at least two nonempty nodes deriving ( Eq, Show, Generic ) instance Hashable PathTrie isEmptyPathTrie :: PathTrie -> Bool isEmptyPathTrie EmptyPathTrie = True isEmptyPathTrie _ = False isTerminalPathTrie :: PathTrie -> Bool isTerminalPathTrie TerminalPathTrie = True isTerminalPathTrie _ = False comparePathTrieVectors :: Vector PathTrie -> Vector PathTrie -> Ordering comparePathTrieVectors v1 v2 = foldr (\i res -> let (t1, t2) = (v1 `Vector.unsafeIndex` i, v2 `Vector.unsafeIndex` i) in case (isEmptyPathTrie t1, isEmptyPathTrie t2) of (False, True) -> LT (True, False) -> GT (True, True) -> res (False, False) -> case compare t1 t2 of LT -> LT GT -> GT EQ -> res) valueIfComponentsMatch [0..(min (Vector.length v1) (Vector.length v2) - 1)] where valueIfComponentsMatch = compare (Vector.length v1) (Vector.length v2) instance Ord PathTrie where compare EmptyPathTrie EmptyPathTrie = EQ compare EmptyPathTrie _ = LT compare _ EmptyPathTrie = GT compare TerminalPathTrie TerminalPathTrie = EQ compare TerminalPathTrie _ = LT compare _ TerminalPathTrie = GT compare (PathTrieSingleChild i1 pt1) (PathTrieSingleChild i2 pt2) | i1 < i2 = LT | i1 > i2 = GT | otherwise = compare pt1 pt2 compare (PathTrieSingleChild i1 pt1) (PathTrie v2) = let i2 = smallestNonempty v2 in case compare i1 i2 of LT -> LT GT -> GT EQ -> case compare pt1 (v2 `Vector.unsafeIndex` i2) of LT -> LT GT -> GT EQ -> LT -- v2 must have a second nonempty compare a@(PathTrie _) b@(PathTrieSingleChild _ _) = flipOrdering $ inline compare b a -- TODO: Check whether this inlining is effective compare (PathTrie v1) (PathTrie v2) = comparePathTrieVectors v1 v2 -- | Precondition: No path in the input is a subpath of another toPathTrie :: [Path] -> PathTrie toPathTrie [] = EmptyPathTrie toPathTrie [EmptyPath] = TerminalPathTrie toPathTrie ps = if all (\p -> pathHeadUnsafe p == pathHeadUnsafe (head ps)) ps then PathTrieSingleChild (pathHeadUnsafe $ head ps) (toPathTrie $ map pathTailUnsafe ps) else PathTrie vec where maxIndex = getMax $ foldMap (Max . pathHeadUnsafe) ps -- TODO: Inefficient to use this; many passes. over the list. -- This may not be used in a place where perf matters, though pathsStartingWith :: Int -> [Path] -> [Path] pathsStartingWith i = concatMap (\case EmptyPath -> [] ConsPath j p -> if i == j then [p] else []) vec = Vector.generate (maxIndex + 1) (\i -> toPathTrie $ pathsStartingWith i ps) fromPathTrie :: PathTrie -> [Path] fromPathTrie EmptyPathTrie = [] fromPathTrie TerminalPathTrie = [EmptyPath] fromPathTrie (PathTrieSingleChild i pt) = map (ConsPath i) $ fromPathTrie pt fromPathTrie (PathTrie v) = Vector.ifoldr (\i pt acc -> map (ConsPath i) (fromPathTrie pt) ++ acc) [] v pathTrieDescend :: PathTrie -> Int -> PathTrie pathTrieDescend EmptyPathTrie _ = EmptyPathTrie pathTrieDescend TerminalPathTrie _ = EmptyPathTrie pathTrieDescend (PathTrie v) i = if Vector.length v > i then v `Vector.unsafeIndex` i else EmptyPathTrie pathTrieDescend (PathTrieSingleChild j pt') i | i == j = pt' | otherwise = EmptyPathTrie -------------------------------------------------------------------------- ---------------------- Equality constraints over paths ------------------- -------------------------------------------------------------------------- --------------------------- ---------- Path E-classes --------------------------- data PathEClass = PathEClass' { getPathTrie :: !PathTrie , getOrigPaths :: [Path] -- Intentionally lazy because -- not available when calling `mkPathEClassFromPathTrie` } deriving ( Show, Generic ) instance Eq PathEClass where (==) = (==) `on` getPathTrie instance Ord PathEClass where compare = compare `on` getPathTrie -- | TODO: This pattern (and the caching of the original path list) is a temporary affair -- until we convert all clients of PathEclass to fully be based on tries pattern PathEClass :: [Path] -> PathEClass pattern PathEClass ps <- PathEClass' _ ps where PathEClass ps = PathEClass' (toPathTrie $ nub ps) (sort $ nub ps) unPathEClass :: PathEClass -> [Path] unPathEClass (PathEClass' _ paths) = paths instance Pretty PathEClass where pretty pec = "{" <> (Text.intercalate "=" $ map pretty $ unPathEClass pec) <> "}" instance Hashable PathEClass mkPathEClassFromPathTrie :: PathTrie -> PathEClass mkPathEClassFromPathTrie pt = PathEClass' pt (fromPathTrie pt) pathEClassDescend :: PathEClass -> Int -> PathEClass pathEClassDescend (PathEClass' pt _) i = mkPathEClassFromPathTrie $ pathTrieDescend pt i hasSubsumingMember :: PathEClass -> PathEClass -> Bool hasSubsumingMember pec1 pec2 = go (getPathTrie pec1) (getPathTrie pec2) where go :: PathTrie -> PathTrie -> Bool go EmptyPathTrie _ = False go _ EmptyPathTrie = False go TerminalPathTrie TerminalPathTrie = False go TerminalPathTrie _ = True go _ TerminalPathTrie = False go (PathTrieSingleChild i1 pt1) (PathTrieSingleChild i2 pt2) = if i1 == i2 then go pt1 pt2 else False go (PathTrieSingleChild i1 pt1) (PathTrie v2) = case v2 Vector.!? i1 of Nothing -> False Just pt2 -> go pt1 pt2 go (PathTrie v1) (PathTrieSingleChild i2 pt2) = case v1 Vector.!? i2 of Nothing -> False Just pt1 -> go pt1 pt2 go (PathTrie v1) (PathTrie v2) = any (\i -> go (v1 `Vector.unsafeIndex` i) (v2 `Vector.unsafeIndex` i)) [0..(min (Vector.length v1) (Vector.length v2) - 1)] -- | Extends the subsumption ordering to a total ordering by using the default lexicographic -- comparison for incomparable elements. -- | TODO: Optimization opportunity: Redundant work in the hasSubsumingMember calls completedSubsumptionOrdering :: PathEClass -> PathEClass -> Ordering completedSubsumptionOrdering pec1 pec2 | hasSubsumingMember pec1 pec2 = LT | hasSubsumingMember pec2 pec1 = GT -- This next line is some hacky magic. Basically, it means that for the -- Hoogle+/TermSearch workload, where there is no subsumption, -- constraints will be evaluated in left-to-right order (instead of the default -- right-to-left), which for that particular workload produces better -- constraint-propagation | otherwise = compare pec2 pec1 -------------------------------- ---------- Equality constraints -------------------------------- data EqConstraints = EqConstraints { getEclasses :: [PathEClass] -- ^ Must be sorted } | EqContradiction deriving ( Eq, Ord, Show, Generic ) instance Hashable EqConstraints instance Pretty EqConstraints where pretty ecs = "{" <> (Text.intercalate "," $ map pretty (getEclasses ecs)) <> "}" --------- Destructors and patterns -- | Unsafe. Internal use only ecsGetPaths :: EqConstraints -> [[Path]] ecsGetPaths = map unPathEClass . getEclasses pattern EmptyConstraints :: EqConstraints pattern EmptyConstraints = EqConstraints [] unsafeGetEclasses :: EqConstraints -> [PathEClass] unsafeGetEclasses EqContradiction = error "unsafeGetEclasses: Illegal argument 'EqContradiction'" unsafeGetEclasses ecs = getEclasses ecs rawMkEqConstraints :: [[Path]] -> EqConstraints rawMkEqConstraints = EqConstraints . map PathEClass constraintsAreContradictory :: EqConstraints -> Bool constraintsAreContradictory = (== EqContradiction) --------- Construction hasSubsumingMemberListBased :: [Path] -> [Path] -> Bool hasSubsumingMemberListBased ps1 ps2 = getAny $ mconcat [Any (isStrictSubpath p1 p2) | p1 <- ps1 , p2 <- ps2] -- | The real contradiction condition is a cycle in the subsumption ordering. -- But, after congruence closure, this will reduce into a self-cycle in the subsumption ordering. -- -- TODO; Prove this. isContradicting :: [[Path]] -> Bool isContradicting cs = any (\pec -> hasSubsumingMemberListBased pec pec) cs -- Contains an inefficient implementation of the congruence closure algorithm mkEqConstraints :: [[Path]] -> EqConstraints mkEqConstraints initialConstraints = case completedConstraints of Nothing -> EqContradiction Just cs -> EqConstraints $ sort $ map PathEClass cs where removeTrivial :: (Eq a) => [[a]] -> [[a]] removeTrivial = filter (\x -> length x > 1) . map nub -- Reason for the extra "complete" in this line: -- The first simplification done to the constraints is eclass-completion, -- to remove redundancy and shrink things before the very inefficienc -- addCongruences step (important in tests; less so in realistic input). -- The last simplification must also be completion, to give a valid value. completedConstraints = fixMaybe round $ complete $ removeTrivial initialConstraints round :: [[Path]] -> Maybe [[Path]] round cs = let cs' = addCongruences cs cs'' = complete cs' in if isContradicting cs'' then Nothing else Just cs'' addCongruences :: [[Path]] -> [[Path]] addCongruences cs = cs ++ [map (\z -> substSubpath z x y) left | left <- cs, right <- cs, x <- left, y <- right, isStrictSubpath x y] assertEquivs xs = mapM (\y -> equate (head xs) y) (tail xs) complete :: (Ord a) => [[a]] -> [[a]] complete initialClasses = runEquivM (:[]) (++) $ do mapM_ assertEquivs initialClasses mapM desc =<< classes ---------- Operations combineEqConstraints :: EqConstraints -> EqConstraints -> EqConstraints combineEqConstraints = memo2 (NameTag "combineEqConstraints") go where go EqContradiction _ = EqContradiction go _ EqContradiction = EqContradiction go ec1 ec2 = mkEqConstraints $ ecsGetPaths ec1 ++ ecsGetPaths ec2 {-# NOINLINE combineEqConstraints #-} eqConstraintsDescend :: EqConstraints -> Int -> EqConstraints eqConstraintsDescend EqContradiction _ = EqContradiction eqConstraintsDescend ecs i = EqConstraints $ sort $ map (`pathEClassDescend` i) (getEclasses ecs) -- A faster implementation would be: Merge the eclasses of both, run mkEqConstraints (or at least do eclass completion), -- check result equal to ecs2 constraintsImply :: EqConstraints -> EqConstraints -> Bool constraintsImply EqContradiction _ = True constraintsImply _ EqContradiction = False constraintsImply ecs1 ecs2 = all (\cs -> any (isSubsequenceOf cs) (ecsGetPaths ecs1)) (ecsGetPaths ecs2) subsumptionOrderedEclasses :: EqConstraints -> Maybe [PathEClass] subsumptionOrderedEclasses ecs = case ecs of EqContradiction -> Nothing EqConstraints pecs -> Just $ sortBy completedSubsumptionOrdering pecs unsafeSubsumptionOrderedEclasses :: EqConstraints -> [PathEClass] unsafeSubsumptionOrderedEclasses (EqConstraints pecs) = sortBy completedSubsumptionOrdering pecs unsafeSubsumptionOrderedEclasses EqContradiction = error $ "unsafeSubsumptionOrderedEclasses: unexpected EqContradiction"