{-# LANGUAGE CPP #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} -- | Coverage checking step of the -- [Lower Your Guards paper](https://dl.acm.org/doi/abs/10.1145/3408989). -- -- Coverage check guard trees (like @'PmMatch' 'Pre'@) to get a -- 'CheckResult', containing -- -- 1. The set of uncovered values, 'cr_uncov' -- 2. And an annotated tree variant (like @'PmMatch' 'Post'@) that captures -- redundancy and inaccessibility information as 'RedSets' annotations -- -- Basically the UA function from Section 5.1, which is an optimised -- interleaving of U and A from Section 3.2 (Figure 5). -- The Normalised Refinement Types 'Nablas' are maintained in -- "GHC.HsToCore.Pmc.Solver". module GHC.HsToCore.Pmc.Check ( CheckAction(..), checkMatchGroup, checkGRHSs, checkPatBind, checkEmptyCase ) where #include "HsVersions.h" import GHC.Prelude import GHC.Builtin.Names ( hasKey, considerAccessibleIdKey, trueDataConKey ) import GHC.HsToCore.Monad ( DsM ) import GHC.HsToCore.Pmc.Types import GHC.HsToCore.Pmc.Utils import GHC.HsToCore.Pmc.Solver import GHC.Driver.Session import GHC.Utils.Outputable import GHC.Tc.Utils.TcType (evVarPred) import GHC.Data.OrdList import qualified Data.Semigroup as Semi import Data.List.NonEmpty ( NonEmpty(..) ) import qualified Data.List.NonEmpty as NE import Data.Coerce -- | Coverage checking action. Can be composed 'leftToRight' or 'topToBottom'. newtype CheckAction a = CA { unCA :: Nablas -> DsM (CheckResult a) } deriving Functor -- | Composes 'CheckAction's top-to-bottom: -- If a value falls through the resulting action, then it must fall through the -- first action and then through the second action. -- If a value matches the resulting action, then it either matches the -- first action or matches the second action. -- Basically the semantics of the LYG branching construct. topToBottom :: (top -> bot -> ret) -> CheckAction top -> CheckAction bot -> CheckAction ret topToBottom f (CA top) (CA bot) = CA $ \inc -> do t <- top inc b <- bot (cr_uncov t) pure CheckResult { cr_ret = f (cr_ret t) (cr_ret b) , cr_uncov = cr_uncov b , cr_approx = cr_approx t Semi.<> cr_approx b } -- | Composes 'CheckAction's left-to-right: -- If a value falls through the resulting action, then it either falls through the -- first action or through the second action. -- If a value matches the resulting action, then it must match the first action -- and then match the second action. -- Basically the semantics of the LYG guard construct. leftToRight :: (RedSets -> right -> ret) -> CheckAction RedSets -> CheckAction right -> CheckAction ret leftToRight f (CA left) (CA right) = CA $ \inc -> do l <- left inc r <- right (rs_cov (cr_ret l)) limit <- maxPmCheckModels <$> getDynFlags let uncov = cr_uncov l Semi.<> cr_uncov r -- See Note [Countering exponential blowup] let (prec', uncov') = throttle limit inc uncov pure CheckResult { cr_ret = f (cr_ret l) (cr_ret r) , cr_uncov = uncov' , cr_approx = prec' Semi.<> cr_approx l Semi.<> cr_approx r } -- | @throttle limit old new@ returns @old@ if the number of 'Nabla's in @new@ -- is exceeding the given @limit@ and the @old@ number of 'Nabla's. -- See Note [Countering exponential blowup]. throttle :: Int -> Nablas -> Nablas -> (Precision, Nablas) throttle limit old@(MkNablas old_ds) new@(MkNablas new_ds) --- | pprTrace "PmCheck:throttle" (ppr (length old_ds) <+> ppr (length new_ds) <+> ppr limit) False = undefined | length new_ds > max limit (length old_ds) = (Approximate, old) | otherwise = (Precise, new) checkSequence :: (grdtree -> CheckAction anntree) -> NonEmpty grdtree -> CheckAction (NonEmpty anntree) -- The implementation is pretty similar to -- @traverse1 :: Apply f => (a -> f b) -> NonEmpty a -> f (NonEmpty b)@ checkSequence act (t :| []) = (:| []) <$> act t checkSequence act (t1 :| (t2:ts)) = topToBottom (NE.<|) (act t1) (checkSequence act (t2:|ts)) emptyRedSets :: RedSets -- Semigroup instance would be misleading! emptyRedSets = RedSets mempty mempty mempty checkGrd :: PmGrd -> CheckAction RedSets checkGrd grd = CA $ \inc -> case grd of -- let x = e: Refine with x ~ e PmLet x e -> do matched <- addPhiCtNablas inc (PhiCoreCt x e) tracePm "check:Let" (ppr x <+> char '=' <+> ppr e) pure CheckResult { cr_ret = emptyRedSets { rs_cov = matched } , cr_uncov = mempty , cr_approx = Precise } -- Bang x _: Diverge on x ~ ⊥, refine with x ≁ ⊥ PmBang x mb_info -> do div <- addPhiCtNablas inc (PhiBotCt x) matched <- addPhiCtNablas inc (PhiNotBotCt x) -- See Note [Dead bang patterns] -- mb_info = Just info <==> PmBang originates from bang pattern in source let bangs | Just info <- mb_info = unitOL (div, info) | otherwise = NilOL tracePm "check:Bang" (ppr x <+> ppr div) pure CheckResult { cr_ret = RedSets { rs_cov = matched, rs_div = div, rs_bangs = bangs } , cr_uncov = mempty , cr_approx = Precise } -- See point (3) of Note [considerAccessible] PmCon x (PmAltConLike con) _ _ _ | x `hasKey` considerAccessibleIdKey , con `hasKey` trueDataConKey -> pure CheckResult { cr_ret = emptyRedSets { rs_cov = initNablas } , cr_uncov = mempty , cr_approx = Precise } -- Con: Fall through on x ≁ K and refine with x ~ K ys and type info PmCon x con tvs dicts args -> do !div <- if isPmAltConMatchStrict con then addPhiCtNablas inc (PhiBotCt x) else pure mempty !matched <- addPhiCtNablas inc (PhiConCt x con tvs (map evVarPred dicts) args) !uncov <- addPhiCtNablas inc (PhiNotConCt x con) tracePm "check:Con" $ vcat [ ppr grd , ppr inc , hang (text "div") 2 (ppr div) , hang (text "matched") 2 (ppr matched) , hang (text "uncov") 2 (ppr uncov) ] pure CheckResult { cr_ret = emptyRedSets { rs_cov = matched, rs_div = div } , cr_uncov = uncov , cr_approx = Precise } checkGrds :: [PmGrd] -> CheckAction RedSets checkGrds [] = CA $ \inc -> pure CheckResult { cr_ret = emptyRedSets { rs_cov = inc } , cr_uncov = mempty , cr_approx = Precise } checkGrds (g:grds) = leftToRight merge (checkGrd g) (checkGrds grds) where merge ri_g ri_grds = -- This operation would /not/ form a Semigroup! RedSets { rs_cov = rs_cov ri_grds , rs_div = rs_div ri_g Semi.<> rs_div ri_grds , rs_bangs = rs_bangs ri_g Semi.<> rs_bangs ri_grds } checkMatchGroup :: PmMatchGroup Pre -> CheckAction (PmMatchGroup Post) checkMatchGroup (PmMatchGroup matches) = PmMatchGroup <$> checkSequence checkMatch matches checkMatch :: PmMatch Pre -> CheckAction (PmMatch Post) checkMatch (PmMatch { pm_pats = GrdVec grds, pm_grhss = grhss }) = leftToRight PmMatch (checkGrds grds) (checkGRHSs grhss) checkGRHSs :: PmGRHSs Pre -> CheckAction (PmGRHSs Post) checkGRHSs (PmGRHSs { pgs_lcls = GrdVec lcls, pgs_grhss = grhss }) = leftToRight PmGRHSs (checkGrds lcls) (checkSequence checkGRHS grhss) checkGRHS :: PmGRHS Pre -> CheckAction (PmGRHS Post) checkGRHS (PmGRHS { pg_grds = GrdVec grds, pg_rhs = rhs_info }) = flip PmGRHS rhs_info <$> checkGrds grds checkEmptyCase :: PmEmptyCase -> CheckAction PmEmptyCase -- See Note [Checking EmptyCase] checkEmptyCase pe@(PmEmptyCase { pe_var = var }) = CA $ \inc -> do unc <- addPhiCtNablas inc (PhiNotBotCt var) pure CheckResult { cr_ret = pe, cr_uncov = unc, cr_approx = mempty } checkPatBind :: (PmPatBind Pre) -> CheckAction (PmPatBind Post) checkPatBind = coerce checkGRHS {- Note [Checking EmptyCase] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -XEmptyCase is useful for matching on empty data types like 'Void'. For example, the following is a complete match: f :: Void -> () f x = case x of {} Really, -XEmptyCase is the only way to write a program that at the same time is safe (@f _ = error "boom"@ is not because of ⊥), doesn't trigger a warning (@f !_ = error "inaccessible" has inaccessible RHS) and doesn't turn an exception into divergence (@f x = f x@). Semantically, unlike every other case expression, -XEmptyCase is strict in its match var x, which rules out ⊥ as an inhabitant. So we add x ≁ ⊥ to the initial Nabla and check if there are any values left to match on. Note [Dead bang patterns] ~~~~~~~~~~~~~~~~~~~~~~~~~ Consider f :: Bool -> Int f True = 1 f !x = 2 Whenever we fall through to the second equation, we will already have evaluated the argument. Thus, the bang pattern serves no purpose and should be warned about. We call this kind of bang patterns "dead". Dead bangs are the ones that under no circumstances can force a thunk that wasn't already forced. Dead bangs are a form of redundant bangs; see below. We can detect dead bang patterns by checking whether @x ~ ⊥@ is satisfiable where the PmBang appears in 'checkGrd'. If not, then clearly the bang is dead. So for a source bang, we add the refined Nabla and the source info to the 'RedSet's 'rs_bangs'. When collecting stuff to warn, we test that Nabla for inhabitants. If it's empty, we'll warn that it's redundant. Note that we don't want to warn for a dead bang that appears on a redundant clause. That is because in that case, we recommend to delete the clause wholly, including its leading pattern match. Dead bang patterns are redundant. But there are bang patterns which are redundant that aren't dead, for example f !() = 0 the bang still forces the match variable, before we attempt to match on (). But it is redundant with the forcing done by the () match. We currently don't detect redundant bangs that aren't dead. Note [Countering exponential blowup] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Precise pattern match exhaustiveness checking is necessarily exponential in the size of some input programs. We implement a counter-measure in the form of the -fmax-pmcheck-models flag, limiting the number of Nablas we check against each pattern by a constant. How do we do that? Consider f True True = () f True True = () And imagine we set our limit to 1 for the sake of the example. The first clause will be checked against the initial Nabla, {}. Doing so will produce an Uncovered set of size 2, containing the models {x≁True} and {x~True,y≁True}. Also we find the first clause to cover the model {x~True,y~True}. But the Uncovered set we get out of the match is too huge! We somehow have to ensure not to make things worse as they are already, so we continue checking with a singleton Uncovered set of the initial Nabla {}. Why is this sound (wrt. the notion in GADTs Meet Their Match)? Well, it basically amounts to forgetting that we matched against the first clause. The values represented by {} are a superset of those represented by its two refinements {x≁True} and {x~True,y≁True}. This forgetfulness becomes very apparent in the example above: By continuing with {} we don't detect the second clause as redundant, as it again covers the same non-empty subset of {}. So we don't flag everything as redundant anymore, but still will never flag something as redundant that isn't. For exhaustivity, the converse applies: We will report @f@ as non-exhaustive and report @f _ _@ as missing, which is a superset of the actual missing matches. But soundness means we will never fail to report a missing match. This mechanism is implemented in 'throttle'. Guards are an extreme example in this regard, with #11195 being a particularly dreadful example: Since their RHS are often pretty much unique, we split on a variable (the one representing the RHS) that doesn't occur anywhere else in the program, so we don't actually get useful information out of that split! Note [considerAccessible] ~~~~~~~~~~~~~~~~~~~~~~~~~ Consider (T18610) f :: Bool -> Int f x = case (x, x) of (True, True) -> 1 (False, False) -> 2 (True, False) -> 3 -- Warning: Redundant The third case is detected as redundant. But it may be the intent of the programmer to keep the dead code, in order for it not to bitrot or to support debugging scenarios. But there is no way to communicate that to the pattern-match checker! The only way is to deactivate pattern-match checking whole-sale, which is quite annoying. Hence, we define in "GHC.Exts": considerAccessible = True 'considerAccessible' is treated specially by the pattern-match checker in that a guard with it as the scrutinee expression will keep its parent clause alive: g :: Bool -> Int g x = case (x, x) of (True, True) -> 1 (False, False) -> 2 (True, False) | GHC.Exts.considerAccessible -> 3 -- No warning The key bits of the implementation are: 1. Its definition is recognised as known-key (see "GHC.Builtin.Names"). 2. After "GHC.HsToCore.Pmc.Desugar", the guard will end up as a 'PmCon', where the match var is the known-key 'considerAccessible' and the constructor against which it matches is 'True'. 3. We recognise the 'PmCon' in 'GHC.HsToCore.Check.checkGrd' and inflate the incoming set of values for all guards downstream to the unconstrained 'initNablas' set, e.g. /all/ values. (The set of values that falls through that particular guard is empty, as matching 'considerAccessible' against 'True' can't fail.) Note that 'considerAccessible' breaks the invariant that incoming sets of values reaching syntactic children are subsets of that of the syntactic ancestor: A whole match, like that of the third clause of the example, might have no incoming value, but its single RHS has incoming values because of (3). That means the 'is_covered' flag computed in 'GHC.HsToCore.Pmc.cirbsMatch' is irrelevant and should not be used to flag all children as redundant (which is what we used to do). We achieve great benefits with a very simple implementation. There are caveats, though: (A) Putting potentially failing guards /after/ the 'considerAccessible' guard might lead to weird check results, e.g., h :: Bool -> Int h x = case (x, x) of (True, True) -> 1 (False, False) -> 2 (True, False) | GHC.Exts.considerAccessible, False <- x -> 3 -- Warning: Not matched: (_, _) That *is* fixable, although we would pay with a much more complicated implementation. (B) If the programmer puts a 'considerAccessible' marker on an accessible clause, the checker doesn't warn about it. E.g., f :: Bool -> Int f True | considerAccessible = 0 f False = 1 will not emit any warning whatsoever. We could implement code that warns here, but it wouldn't be as simple as it is now. -}