{- (c) The University of Glasgow 2006 (c) The AQUA Project, Glasgow University, 1994-1998 This module contains inlining logic used by the simplifier. -} module GHC.Core.Opt.Simplify.Inline ( -- * Cheap and cheerful inlining checks. couldBeSmallEnoughToInline, smallEnoughToInline, activeUnfolding, -- * The smart inlining decisions are made by callSiteInline callSiteInline, CallCtxt(..), ) where import GHC.Prelude import GHC.Driver.Flags import GHC.Core.Opt.Simplify.Env import GHC.Core import GHC.Core.Unfold import GHC.Core.FVs( exprFreeIds ) import GHC.Types.Id import GHC.Types.Var.Env( InScopeSet, lookupInScope ) import GHC.Types.Var.Set import GHC.Types.Basic ( Arity, RecFlag(..), isActive ) import GHC.Utils.Logger import GHC.Utils.Misc import GHC.Utils.Outputable import GHC.Types.Name import Data.List (isPrefixOf) {- ************************************************************************ * * \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding} * * ************************************************************************ We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that we ``couldn't possibly use'' on the other side. Can be overridden w/ flaggery. Just the same as smallEnoughToInline, except that it has no actual arguments. -} couldBeSmallEnoughToInline :: UnfoldingOpts -> Int -> CoreExpr -> Bool couldBeSmallEnoughToInline :: UnfoldingOpts -> Int -> CoreExpr -> Bool couldBeSmallEnoughToInline UnfoldingOpts opts Int threshold CoreExpr rhs = case UnfoldingOpts -> Int -> [Id] -> CoreExpr -> ExprSize sizeExpr UnfoldingOpts opts Int threshold [] CoreExpr body of ExprSize TooBig -> Bool False ExprSize _ -> Bool True where ([Id] _, CoreExpr body) = CoreExpr -> ([Id], CoreExpr) forall b. Expr b -> ([b], Expr b) collectBinders CoreExpr rhs ---------------- smallEnoughToInline :: UnfoldingOpts -> Unfolding -> Bool smallEnoughToInline :: UnfoldingOpts -> Unfolding -> Bool smallEnoughToInline UnfoldingOpts opts (CoreUnfolding {uf_guidance :: Unfolding -> UnfoldingGuidance uf_guidance = UnfoldingGuidance guidance}) = case UnfoldingGuidance guidance of UnfIfGoodArgs {ug_size :: UnfoldingGuidance -> Int ug_size = Int size} -> Int size Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <= UnfoldingOpts -> Int unfoldingUseThreshold UnfoldingOpts opts UnfWhen {} -> Bool True UnfoldingGuidance UnfNever -> Bool False smallEnoughToInline UnfoldingOpts _ Unfolding _ = Bool False {- ************************************************************************ * * \subsection{callSiteInline} * * ************************************************************************ This is the key function. It decides whether to inline a variable at a call site callSiteInline is used at call sites, so it is a bit more generous. It's a very important function that embodies lots of heuristics. A non-WHNF can be inlined if it doesn't occur inside a lambda, and occurs exactly once or occurs once in each branch of a case and is small If the thing is in WHNF, there's no danger of duplicating work, so we can inline if it occurs once, or is small NOTE: we don't want to inline top-level functions that always diverge. It just makes the code bigger. Tt turns out that the convenient way to prevent them inlining is to give them a NOINLINE pragma, which we do in StrictAnal.addStrictnessInfoToTopId -} callSiteInline :: SimplEnv -> Logger -> Id -- The Id -> Bool -- True if there are no arguments at all (incl type args) -> [ArgSummary] -- One for each value arg; True if it is interesting -> CallCtxt -- True <=> continuation is interesting -> Maybe CoreExpr -- Unfolding, if any callSiteInline :: SimplEnv -> Logger -> Id -> Bool -> [ArgSummary] -> CallCtxt -> Maybe CoreExpr callSiteInline SimplEnv env Logger logger Id id Bool lone_variable [ArgSummary] arg_infos CallCtxt cont_info = case IdUnfoldingFun idUnfolding Id id of -- idUnfolding checks for loop-breakers, returning NoUnfolding -- Things with an INLINE pragma may have an unfolding *and* -- be a loop breaker (maybe the knot is not yet untied) CoreUnfolding { uf_tmpl :: Unfolding -> CoreExpr uf_tmpl = CoreExpr unf_template , uf_cache :: Unfolding -> UnfoldingCache uf_cache = UnfoldingCache unf_cache , uf_guidance :: Unfolding -> UnfoldingGuidance uf_guidance = UnfoldingGuidance guidance } | Bool active_unf -> SimplEnv -> Logger -> Id -> Bool -> [ArgSummary] -> CallCtxt -> CoreExpr -> UnfoldingCache -> UnfoldingGuidance -> Maybe CoreExpr tryUnfolding SimplEnv env Logger logger Id id Bool lone_variable [ArgSummary] arg_infos CallCtxt cont_info CoreExpr unf_template UnfoldingCache unf_cache UnfoldingGuidance guidance | Bool otherwise -> Logger -> UnfoldingOpts -> Id -> String -> SDoc -> Maybe CoreExpr -> Maybe CoreExpr forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a traceInline Logger logger UnfoldingOpts uf_opts Id id String "Inactive unfolding:" (Id -> SDoc forall a. Outputable a => a -> SDoc ppr Id id) Maybe CoreExpr forall a. Maybe a Nothing Unfolding NoUnfolding -> Maybe CoreExpr forall a. Maybe a Nothing Unfolding BootUnfolding -> Maybe CoreExpr forall a. Maybe a Nothing OtherCon {} -> Maybe CoreExpr forall a. Maybe a Nothing DFunUnfolding {} -> Maybe CoreExpr forall a. Maybe a Nothing -- Never unfold a DFun where uf_opts :: UnfoldingOpts uf_opts = SimplEnv -> UnfoldingOpts seUnfoldingOpts SimplEnv env active_unf :: Bool active_unf = SimplMode -> Id -> Bool activeUnfolding (SimplEnv -> SimplMode seMode SimplEnv env) Id id activeUnfolding :: SimplMode -> Id -> Bool activeUnfolding :: SimplMode -> Id -> Bool activeUnfolding SimplMode mode Id id | Unfolding -> Bool isCompulsoryUnfolding (IdUnfoldingFun realIdUnfolding Id id) = Bool True -- Even sm_inline can't override compulsory unfoldings | Bool otherwise = CompilerPhase -> Activation -> Bool isActive (SimplMode -> CompilerPhase sm_phase SimplMode mode) (Id -> Activation idInlineActivation Id id) Bool -> Bool -> Bool && SimplMode -> Bool sm_inline SimplMode mode -- `or` isStableUnfolding (realIdUnfolding id) -- Inline things when -- (a) they are active -- (b) sm_inline says so, except that for stable unfoldings -- (ie pragmas) we inline anyway -- | Report the inlining of an identifier's RHS to the user, if requested. traceInline :: Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a traceInline :: forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a traceInline Logger logger UnfoldingOpts opts Id inline_id String str SDoc doc a result -- We take care to ensure that doc is used in only one branch, ensuring that -- the simplifier can push its allocation into the branch. See Note [INLINE -- conditional tracing utilities]. | Bool enable = Logger -> String -> SDoc -> a -> a forall a. Logger -> String -> SDoc -> a -> a logTraceMsg Logger logger String str SDoc doc a result | Bool otherwise = a result where enable :: Bool enable | Logger -> DumpFlag -> Bool logHasDumpFlag Logger logger DumpFlag Opt_D_dump_verbose_inlinings = Bool True | Just String prefix <- UnfoldingOpts -> Maybe String unfoldingReportPrefix UnfoldingOpts opts = String prefix String -> String -> Bool forall a. Eq a => [a] -> [a] -> Bool `isPrefixOf` OccName -> String occNameString (Id -> OccName forall a. NamedThing a => a -> OccName getOccName Id inline_id) | Bool otherwise = Bool False {-# INLINE traceInline #-} -- see Note [INLINE conditional tracing utilities] {- Note [Avoid inlining into deeply nested cases] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Also called "exponential inlining". Consider a function f like this: (#18730) f arg1 arg2 = case ... ... -> g arg1 ... -> g arg2 This function is small. So should be safe to inline. However sometimes this doesn't quite work out like that. Consider this code: f1 arg1 arg2 ... = ... case _foo of alt1 -> ... f2 arg1 ... alt2 -> ... f2 arg2 ... f2 arg1 arg2 ... = ... case _foo of alt1 -> ... f3 arg1 ... alt2 -> ... f3 arg2 ... f3 arg1 arg2 ... = ... ... repeats up to n times. And then f1 is applied to some arguments: foo = ... f1 <interestingArgs> ... Initially f2..fn are not interesting to inline so we don't. However we see that f1 is applied to interesting args. So it's an obvious choice to inline those: foo = ... case _foo of alt1 -> ... f2 <interestingArg> ... alt2 -> ... f2 <interestingArg> ... As a result we go and inline f2 both mentions of f2 in turn are now applied to interesting arguments and f2 is small: foo = ... case _foo of alt1 -> ... case _foo of alt1 -> ... f3 <interestingArg> ... alt2 -> ... f3 <interestingArg> ... alt2 -> ... case _foo of alt1 -> ... f3 <interestingArg> ... alt2 -> ... f3 <interestingArg> ... The same thing happens for each binding up to f_n, duplicating the amount of inlining done in each step. Until at some point we are either done or run out of simplifier ticks/RAM. This pattern happened #18730. To combat this we introduce one more heuristic when weighing inlining decision. We keep track of a "case-depth". Which increases each time we look inside a case expression with more than one alternative. We then apply a penalty to inlinings based on the case-depth at which they would be inlined. Bounding the number of inlinings in such a scenario. The heuristic can be tuned in two ways: * We can ignore the first n levels of case nestings for inlining decisions using -funfolding-case-threshold. * The penalty grows linear with the depth. It's computed as size*(depth-threshold)/scaling. Scaling can be set with -funfolding-case-scaling. Reflections and wrinkles * See also Note [Do not add unfoldings to join points at birth] in GHC.Core.Opt.Simplify.Iteration * The total case depth is really the wrong thing; it will inhibit inlining of a local function, just because there is some giant case nest further out. What we want is the /difference/ in case-depth between the binding site and the call site. That could be done quite easily by adding the case-depth to the Unfolding of the function. * What matters more than /depth/ is total /width/; that is how many alternatives are in the tree. We could perhaps multiply depth by width at each case expression. * There might be a case nest with many alternatives, but the function is called in only a handful of them. So maybe we should ignore case-depth, and instead penalise funtions that are called many times -- after all, inlining them bloats code. But in the scenario above, we are simplifying an inlined fuction, without doing a global occurrence analysis each time. So if we based the penalty on multiple occurences, we should /also/ add a penalty when simplifying an already-simplified expression. We do track this (seInlineDepth) but currently we barely use it. An advantage of using occurrences+inline depth is that it'll work when no case expressions are involved. See #15488. * Test T18730 did not involve join points. But join points are very prone to the same kind of thing. For exampe in #13253, and several related tickets, we got an exponential blowup in code size from a program that looks like this. let j1a x = case f y of { True -> p; False -> q } j1b x = case f y of { True -> q; False -> p } j2a x = case f (y+1) of { True -> j1a x; False -> j1b x} j2b x = case f (y+1) of { True -> j1b x; False -> j1a x} ... in case f (y+10) of { True -> j10a 7; False -> j10b 8 } The first danger is this: in Simplifier iteration 1 postInlineUnconditionally inlines the last functions, j10a and j10b (they are both small). Now we have two calls to j9a and two to j9b. In the next Simplifer iteration, postInlineUnconditionally inlines all four of these calls, leaving four calls to j8a and j8b. Etc. Happily, this probably /won't/ happen because the Simplifier works top down, so it'll inline j1a/j1b into j2a/j2b, which will make the latter bigger; so the process will stop. But we still need to stop the inline cascade described at the head of this Note. Some guidance on setting these defaults: * A low threshold (<= 2) is needed to prevent exponential cases from spiraling out of control. We picked 2 for no particular reason. * Scaling the penalty by any more than 30 means the reproducer from T18730 won't compile even with reasonably small values of n. Instead it will run out of runs/ticks. This means to positively affect the reproducer a scaling <= 30 is required. * A scaling of >= 15 still causes a few very large regressions on some nofib benchmarks. (+80% for gc/fulsom, +90% for real/ben-raytrace, +20% for spectral/fibheaps) * A scaling of >= 25 showed no regressions on nofib. However it showed a number of (small) regression for compiler perf benchmarks. The end result is that we are settling for a scaling of 30, with a threshold of 2. This gives us minimal compiler perf regressions. No nofib runtime regressions and will still avoid this pattern sometimes. This is a "safe" default, where we err on the side of compiler blowup instead of risking runtime regressions. For cases where the default falls short the flag can be changed to allow more/less inlining as needed on a per-module basis. -} tryUnfolding :: SimplEnv -> Logger -> Id -> Bool -> [ArgSummary] -> CallCtxt -> CoreExpr -> UnfoldingCache -> UnfoldingGuidance -> Maybe CoreExpr tryUnfolding :: SimplEnv -> Logger -> Id -> Bool -> [ArgSummary] -> CallCtxt -> CoreExpr -> UnfoldingCache -> UnfoldingGuidance -> Maybe CoreExpr tryUnfolding SimplEnv env Logger logger Id id Bool lone_variable [ArgSummary] arg_infos CallCtxt cont_info CoreExpr unf_template UnfoldingCache unf_cache UnfoldingGuidance guidance = case UnfoldingGuidance guidance of UnfoldingGuidance UnfNever -> Logger -> UnfoldingOpts -> Id -> String -> SDoc -> Maybe CoreExpr -> Maybe CoreExpr forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a traceInline Logger logger UnfoldingOpts opts Id id String str (String -> SDoc forall doc. IsLine doc => String -> doc text String "UnfNever") Maybe CoreExpr forall a. Maybe a Nothing UnfWhen { ug_arity :: UnfoldingGuidance -> Int ug_arity = Int uf_arity, ug_unsat_ok :: UnfoldingGuidance -> Bool ug_unsat_ok = Bool unsat_ok, ug_boring_ok :: UnfoldingGuidance -> Bool ug_boring_ok = Bool boring_ok } | Bool enough_args Bool -> Bool -> Bool && (Bool boring_ok Bool -> Bool -> Bool || Bool some_benefit Bool -> Bool -> Bool || UnfoldingOpts -> Bool unfoldingVeryAggressive UnfoldingOpts opts) -- See Note [INLINE for small functions] (3) -> Logger -> UnfoldingOpts -> Id -> String -> SDoc -> Maybe CoreExpr -> Maybe CoreExpr forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a traceInline Logger logger UnfoldingOpts opts Id id String str (Bool -> SDoc -> Bool -> SDoc mk_doc Bool some_benefit SDoc forall doc. IsOutput doc => doc empty Bool True) (CoreExpr -> Maybe CoreExpr forall a. a -> Maybe a Just CoreExpr unf_template) | Bool otherwise -> Logger -> UnfoldingOpts -> Id -> String -> SDoc -> Maybe CoreExpr -> Maybe CoreExpr forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a traceInline Logger logger UnfoldingOpts opts Id id String str (Bool -> SDoc -> Bool -> SDoc mk_doc Bool some_benefit SDoc forall doc. IsOutput doc => doc empty Bool False) Maybe CoreExpr forall a. Maybe a Nothing where some_benefit :: Bool some_benefit = Int -> Bool -> Bool calc_some_benefit Int uf_arity Bool True enough_args :: Bool enough_args = (Int n_val_args Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >= Int uf_arity) Bool -> Bool -> Bool || (Bool unsat_ok Bool -> Bool -> Bool && Int n_val_args Int -> Int -> Bool forall a. Ord a => a -> a -> Bool > Int 0) UnfIfGoodArgs { ug_args :: UnfoldingGuidance -> [Int] ug_args = [Int] arg_discounts, ug_res :: UnfoldingGuidance -> Int ug_res = Int res_discount, ug_size :: UnfoldingGuidance -> Int ug_size = Int size } | Id -> Bool isJoinId Id id, Bool small_enough -> Maybe CoreExpr inline_join_point | UnfoldingOpts -> Bool unfoldingVeryAggressive UnfoldingOpts opts -> Maybe CoreExpr yes | Bool is_wf, Bool some_benefit, Bool small_enough -> Maybe CoreExpr yes | Bool otherwise -> Maybe CoreExpr no where yes :: Maybe CoreExpr yes = Logger -> UnfoldingOpts -> Id -> String -> SDoc -> Maybe CoreExpr -> Maybe CoreExpr forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a traceInline Logger logger UnfoldingOpts opts Id id String str (Bool -> SDoc -> Bool -> SDoc mk_doc Bool some_benefit SDoc extra_doc Bool True) (CoreExpr -> Maybe CoreExpr forall a. a -> Maybe a Just CoreExpr unf_template) no :: Maybe CoreExpr no = Logger -> UnfoldingOpts -> Id -> String -> SDoc -> Maybe CoreExpr -> Maybe CoreExpr forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a traceInline Logger logger UnfoldingOpts opts Id id String str (Bool -> SDoc -> Bool -> SDoc mk_doc Bool some_benefit SDoc extra_doc Bool False) Maybe CoreExpr forall a. Maybe a Nothing some_benefit :: Bool some_benefit = Int -> Bool -> Bool calc_some_benefit ([Int] -> Int forall a. [a] -> Int forall (t :: * -> *) a. Foldable t => t a -> Int length [Int] arg_discounts) Bool False -- depth_penalty: see Note [Avoid inlining into deeply nested cases] depth_threshold :: Int depth_threshold = UnfoldingOpts -> Int unfoldingCaseThreshold UnfoldingOpts opts depth_scaling :: Int depth_scaling = UnfoldingOpts -> Int unfoldingCaseScaling UnfoldingOpts opts depth_penalty :: Int depth_penalty | Int case_depth Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <= Int depth_threshold = Int 0 | Bool otherwise = (Int size Int -> Int -> Int forall a. Num a => a -> a -> a * (Int case_depth Int -> Int -> Int forall a. Num a => a -> a -> a - Int depth_threshold)) Int -> Int -> Int forall a. Integral a => a -> a -> a `div` Int depth_scaling adjusted_size :: Int adjusted_size = Int size Int -> Int -> Int forall a. Num a => a -> a -> a + Int depth_penalty Int -> Int -> Int forall a. Num a => a -> a -> a - Int discount small_enough :: Bool small_enough = Int adjusted_size Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <= UnfoldingOpts -> Int unfoldingUseThreshold UnfoldingOpts opts discount :: Int discount = [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int computeDiscount [Int] arg_discounts Int res_discount [ArgSummary] arg_infos CallCtxt cont_info extra_doc :: SDoc extra_doc = [SDoc] -> SDoc forall doc. IsDoc doc => [doc] -> doc vcat [ Bool -> SDoc -> SDoc forall doc. IsOutput doc => Bool -> doc -> doc ppWhen (Id -> Bool isJoinId Id id) (SDoc -> SDoc) -> SDoc -> SDoc forall a b. (a -> b) -> a -> b $ String -> SDoc forall doc. IsLine doc => String -> doc text String "join" SDoc -> SDoc -> SDoc forall doc. IsLine doc => doc -> doc -> doc <+> [SDoc] -> SDoc forall doc. IsLine doc => [doc] -> doc fsep [ (Id, Bool, Maybe Bool, Bool) -> SDoc forall a. Outputable a => a -> SDoc ppr (Id v, Unfolding -> Bool hasCoreUnfolding (IdUnfoldingFun idUnfolding Id v) , (Id -> Bool) -> Maybe Id -> Maybe Bool forall a b. (a -> b) -> Maybe a -> Maybe b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap (Unfolding -> Bool isEvaldUnfolding (Unfolding -> Bool) -> IdUnfoldingFun -> Id -> Bool forall b c a. (b -> c) -> (a -> b) -> a -> c . IdUnfoldingFun idUnfolding) (InScopeSet -> Id -> Maybe Id lookupInScope InScopeSet in_scope Id v) , InScopeSet -> Id -> Bool is_more_evald InScopeSet in_scope Id v) | Id v <- VarSet -> [Id] vselems (CoreExpr -> VarSet exprFreeIds CoreExpr unf_template) ] , String -> SDoc forall doc. IsLine doc => String -> doc text String "depth based penalty =" SDoc -> SDoc -> SDoc forall doc. IsLine doc => doc -> doc -> doc <+> Int -> SDoc forall doc. IsLine doc => Int -> doc int Int depth_penalty , String -> SDoc forall doc. IsLine doc => String -> doc text String "adjusted size =" SDoc -> SDoc -> SDoc forall doc. IsLine doc => doc -> doc -> doc <+> Int -> SDoc forall doc. IsLine doc => Int -> doc int Int adjusted_size ] inline_join_point :: Maybe CoreExpr inline_join_point -- See Note [Inlining join points] | [Bool] -> Bool forall (t :: * -> *). Foldable t => t Bool -> Bool or ((Int -> ArgSummary -> Bool) -> [Int] -> [ArgSummary] -> [Bool] forall a b c. (a -> b -> c) -> [a] -> [b] -> [c] zipWith Int -> ArgSummary -> Bool forall {a}. (Ord a, Num a) => a -> ArgSummary -> Bool scrut_arg [Int] arg_discounts [ArgSummary] arg_infos) = Maybe CoreExpr yes | (Id -> Bool) -> VarSet -> Bool anyVarSet (InScopeSet -> Id -> Bool is_more_evald InScopeSet in_scope) (VarSet -> Bool) -> VarSet -> Bool forall a b. (a -> b) -> a -> b $ CoreExpr -> VarSet exprFreeIds CoreExpr unf_template = Maybe CoreExpr yes | Bool otherwise = Maybe CoreExpr no -- scrut_arg is True if the function body has a discount and the arg is a value scrut_arg :: a -> ArgSummary -> Bool scrut_arg a disc ArgSummary ValueArg = a disc a -> a -> Bool forall a. Ord a => a -> a -> Bool > a 0 scrut_arg a _ ArgSummary _ = Bool False where opts :: UnfoldingOpts opts = SimplEnv -> UnfoldingOpts seUnfoldingOpts SimplEnv env case_depth :: Int case_depth = SimplEnv -> Int seCaseDepth SimplEnv env inline_depth :: Int inline_depth = SimplEnv -> Int seInlineDepth SimplEnv env in_scope :: InScopeSet in_scope = SimplEnv -> InScopeSet seInScope SimplEnv env -- Unpack the UnfoldingCache lazily because it may not be needed, and all -- its fields are strict; so evaluating unf_cache at all forces all the -- isWorkFree etc computations to take place. That risks wasting effort for -- Ids that are never going to inline anyway. -- See Note [UnfoldingCache] in GHC.Core UnfoldingCache{ uf_is_work_free :: UnfoldingCache -> Bool uf_is_work_free = Bool is_wf, uf_expandable :: UnfoldingCache -> Bool uf_expandable = Bool is_exp } = UnfoldingCache unf_cache mk_doc :: Bool -> SDoc -> Bool -> SDoc mk_doc Bool some_benefit SDoc extra_doc Bool yes_or_no = [SDoc] -> SDoc forall doc. IsDoc doc => [doc] -> doc vcat [ String -> SDoc forall doc. IsLine doc => String -> doc text String "arg infos" SDoc -> SDoc -> SDoc forall doc. IsLine doc => doc -> doc -> doc <+> [ArgSummary] -> SDoc forall a. Outputable a => a -> SDoc ppr [ArgSummary] arg_infos , String -> SDoc forall doc. IsLine doc => String -> doc text String "interesting continuation" SDoc -> SDoc -> SDoc forall doc. IsLine doc => doc -> doc -> doc <+> CallCtxt -> SDoc forall a. Outputable a => a -> SDoc ppr CallCtxt cont_info , String -> SDoc forall doc. IsLine doc => String -> doc text String "some_benefit" SDoc -> SDoc -> SDoc forall doc. IsLine doc => doc -> doc -> doc <+> Bool -> SDoc forall a. Outputable a => a -> SDoc ppr Bool some_benefit , String -> SDoc forall doc. IsLine doc => String -> doc text String "is exp:" SDoc -> SDoc -> SDoc forall doc. IsLine doc => doc -> doc -> doc <+> Bool -> SDoc forall a. Outputable a => a -> SDoc ppr Bool is_exp , String -> SDoc forall doc. IsLine doc => String -> doc text String "is work-free:" SDoc -> SDoc -> SDoc forall doc. IsLine doc => doc -> doc -> doc <+> Bool -> SDoc forall a. Outputable a => a -> SDoc ppr Bool is_wf , String -> SDoc forall doc. IsLine doc => String -> doc text String "guidance" SDoc -> SDoc -> SDoc forall doc. IsLine doc => doc -> doc -> doc <+> UnfoldingGuidance -> SDoc forall a. Outputable a => a -> SDoc ppr UnfoldingGuidance guidance , String -> SDoc forall doc. IsLine doc => String -> doc text String "case depth =" SDoc -> SDoc -> SDoc forall doc. IsLine doc => doc -> doc -> doc <+> Int -> SDoc forall doc. IsLine doc => Int -> doc int Int case_depth , String -> SDoc forall doc. IsLine doc => String -> doc text String "inline depth =" SDoc -> SDoc -> SDoc forall doc. IsLine doc => doc -> doc -> doc <+> Int -> SDoc forall doc. IsLine doc => Int -> doc int Int inline_depth , SDoc extra_doc , String -> SDoc forall doc. IsLine doc => String -> doc text String "ANSWER =" SDoc -> SDoc -> SDoc forall doc. IsLine doc => doc -> doc -> doc <+> if Bool yes_or_no then String -> SDoc forall doc. IsLine doc => String -> doc text String "YES" else String -> SDoc forall doc. IsLine doc => String -> doc text String "NO"] ctx :: SDocContext ctx = LogFlags -> SDocContext log_default_dump_context (Logger -> LogFlags logFlags Logger logger) str :: String str = String "Considering inlining: " String -> String -> String forall a. [a] -> [a] -> [a] ++ SDocContext -> SDoc -> String showSDocOneLine SDocContext ctx (Id -> SDoc forall a. Outputable a => a -> SDoc ppr Id id) n_val_args :: Int n_val_args = [ArgSummary] -> Int forall a. [a] -> Int forall (t :: * -> *) a. Foldable t => t a -> Int length [ArgSummary] arg_infos -- some_benefit is used when the RHS is small enough -- and the call has enough (or too many) value -- arguments (ie n_val_args >= arity). But there must -- be *something* interesting about some argument, or the -- result context, to make it worth inlining calc_some_benefit :: Arity -> Bool -> Bool -- The Arity is the number of args -- expected by the unfolding calc_some_benefit :: Int -> Bool -> Bool calc_some_benefit Int uf_arity Bool is_inline | Bool -> Bool not Bool saturated = Bool interesting_args -- Under-saturated -- Note [Unsaturated applications] | Bool otherwise = Bool interesting_args -- Saturated or over-saturated Bool -> Bool -> Bool || Bool interesting_call where saturated :: Bool saturated = Int n_val_args Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >= Int uf_arity over_saturated :: Bool over_saturated = Int n_val_args Int -> Int -> Bool forall a. Ord a => a -> a -> Bool > Int uf_arity interesting_args :: Bool interesting_args = (ArgSummary -> Bool) -> [ArgSummary] -> Bool forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool any ArgSummary -> Bool nonTriv [ArgSummary] arg_infos -- NB: (any nonTriv arg_infos) looks at the -- over-saturated args too which is "wrong"; -- but if over-saturated we inline anyway. interesting_call :: Bool interesting_call | Bool over_saturated = Bool True | Bool otherwise = case CallCtxt cont_info of CallCtxt CaseCtxt -> Bool -> Bool not (Bool lone_variable Bool -> Bool -> Bool && Bool is_exp) -- Note [Lone variables] CallCtxt ValAppCtxt -> Bool True -- Note [Cast then apply] CallCtxt RuleArgCtxt -> Int uf_arity Int -> Int -> Bool forall a. Ord a => a -> a -> Bool > Int 0 -- See Note [RHS of lets] CallCtxt DiscArgCtxt -> Int uf_arity Int -> Int -> Bool forall a. Ord a => a -> a -> Bool > Int 0 -- Note [Inlining in ArgCtxt] RhsCtxt RecFlag NonRecursive | Bool is_inline -> Int uf_arity Int -> Int -> Bool forall a. Ord a => a -> a -> Bool > Int 0 -- See Note [RHS of lets] CallCtxt _other -> Bool False -- See Note [Nested functions] vselems :: VarSet -> [Var] vselems :: VarSet -> [Id] vselems VarSet s = (Id -> [Id] -> [Id]) -> [Id] -> VarSet -> [Id] forall a. (Id -> a -> a) -> a -> VarSet -> a nonDetStrictFoldVarSet (\Id v [Id] vs -> Id v Id -> [Id] -> [Id] forall a. a -> [a] -> [a] : [Id] vs) [] VarSet s is_more_evald :: InScopeSet -> Id -> Bool -- See Note [Inlining join points] is_more_evald :: InScopeSet -> Id -> Bool is_more_evald InScopeSet in_scope Id v | Just Id v1 <- InScopeSet -> Id -> Maybe Id lookupInScope InScopeSet in_scope Id v , IdUnfoldingFun idUnfolding Id v1 Unfolding -> Unfolding -> Bool `isBetterUnfoldingThan` IdUnfoldingFun idUnfolding Id v = Bool True | Bool otherwise = Bool False {- Note [RHS of lets] ~~~~~~~~~~~~~~~~~~~~~ When the call is the argument of a function with a RULE, or the RHS of a let, we are a little bit keener to inline (in tryUnfolding). For example f y = (y,y,y) g y = let x = f y in ...(case x of (a,b,c) -> ...) ... We'd inline 'f' if the call was in a case context, and it kind-of-is, only we can't see it. Also x = f v could be expensive whereas x = case v of (a,b) -> a is patently cheap and may allow more eta expansion. So, in `interesting_call` in `tryUnfolding`, we treat the RHS of a /non-recursive/ let as not-totally-boring. A /recursive/ let isn't going be inlined so there is much less point. Hence the (only reason for the) RecFlag in RhsCtxt We inline only if `f` has an `UnfWhen` guidance. I found that being more eager led to fruitless inlining. See Note [Seq is boring] wrinkle (SB1) in GHC.Core.Opt.Simplify.Utils. Note [Inlining join points] ~~~~~~~~~~~~~~~~~~~~~~~~~~~ In general we /do not/ want to inline join points /even if they are small/. See Note [Duplicating join points] in GHC.Core.Opt.Simplify.Iteration. But, assuming it is small, there are various times when we /do/ want to inline a (non-recursive) join point. Namely, if either of these hold: (1) A /scrutinised/ argument (non-zero discount) has a /ValueArg/ info. Inlining will give some benefit. (2) A free variable of the RHS is * Is /not/ evaluated at the join point defn site * Is evaluated at the join point call site. This is the is_more_evald predicate. (1) is fairly obvious but (2) is less so. Here is the code for `integerGT` without (2): integerGt = \ (x :: Integer) (y :: Integer) -> join fail _ = case x of { IS x1 -> case y of { IS y1 -> case <# x1 y1 of _DEFAULT -> case ==# x1 y1 of DEFAULT -> True; 1# -> False 1# -> False IP ds1 -> False IN ds1 -> True IP x1 -> case y of { _DEFAULT -> True; IP y1 -> case bigNatCompare x1 y1 of _DEFAULT -> False; GT -> True IN x1 -> case y of { _DEFAULT -> False; IN y1 -> case bigNatCompare y1 x1 of _DEFAULT -> False; GT -> True in case x of { _DEFAULT -> jump fail GHC.Prim.(##); IS x1 -> case y of { _DEFAULT -> jump fail GHC.Prim.(##); IS y1 -> tagToEnum# @Bool (># x1 y1) If we inline `fail` we get /much/ better code. The only clue is that `x` and `y` (a) are not evaluated at the definition site, and (b) are evaluated at the call site. This predicate is `isBetterUnfoldingThan`. You might think that the variable should also be /scrutinised/ in the join-point RHS, but here are two reasons for not taking that into account. First, we see code somewhat like this in imaginary/wheel-sieve1: let x = <small thunk> in join $j = (x,y) in case z of A -> case x of P -> $j Q -> blah B -> (x,x) C -> True Here `x` can't be duplicated into the branches becuase it is used in both the join point and the A branch. But if we inline $j we get let x = <small thunk> in case z of A -> case x of x' P -> (x', y) Q -> blah B -> x C -> True and now we /can/ duplicate x into the branches, at which point: * it is used strictly in the A branch (evaluated, but no thunk) * it is used lazily in the B branch (still a thunk) * it is not used at all in the C branch (no thunk) Second, spectral/treejoin gets a big win from SpecConstr due to evaluated-ness. Something like this: join $j x = ...(foo fv)... in case fv of I# x -> ... jump $j True ... If we inline $j, SpecConstr sees a call (foo (I# x)) and specialises. Note [Unsaturated applications] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When a call is not saturated, we *still* inline if one of the arguments has interesting structure. That's sometimes very important. A good example is the Ord instance for Bool in Base: Rec { $fOrdBool =GHC.Classes.D:Ord @ Bool ... $cmin_ajX $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool } But the defn of GHC.Classes.$dmmin is: $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a {- Arity: 3, HasNoCafRefs, Strictness: SLL, Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a -> case @ a GHC.Classes.<= @ a $dOrd x y of wild { GHC.Types.False -> y GHC.Types.True -> x }) -} We *really* want to inline $dmmin, even though it has arity 3, in order to unravel the recursion. Note [Things to watch] ~~~~~~~~~~~~~~~~~~~~~~ * { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... } Assume x is exported, so not inlined unconditionally. Then we want x to inline unconditionally; no reason for it not to, and doing so avoids an indirection. * { x = I# 3; ....f x.... } Make sure that x does not inline unconditionally! Lest we get extra allocation. Note [Nested functions] ~~~~~~~~~~~~~~~~~~~~~~~ At one time we treated a call of a non-top-level function as "interesting" (regardless of how boring the context) in the hope that inlining it would eliminate the binding, and its allocation. Specifically, in the default case of interesting_call we had _other -> not is_top && uf_arity > 0 But actually postInlineUnconditionally does some of this and overall it makes virtually no difference to nofib. So I simplified away this special case Note [Cast then apply] ~~~~~~~~~~~~~~~~~~~~~~ Consider myIndex = __inline_me ( (/\a. <blah>) |> co ) co :: (forall a. a -> a) ~ (forall a. T a) ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ... We need to inline myIndex to unravel this; but the actual call (myIndex a) has no value arguments. The ValAppCtxt gives it enough incentive to inline. Note [Inlining in ArgCtxt] ~~~~~~~~~~~~~~~~~~~~~~~~~~ The condition (arity > 0) here is very important, because otherwise we end up inlining top-level stuff into useless places; eg x = I# 3# f = \y. g x This can make a very big difference: it adds 16% to nofib 'integer' allocs, and 20% to 'power'. At one stage I replaced this condition by 'True' (leading to the above slow-down). The motivation was test eyeball/inline1.hs; but that seems to work ok now. NOTE: arguably, we should inline in ArgCtxt only if the result of the call is at least CONLIKE. At least for the cases where we use ArgCtxt for the RHS of a 'let', we only profit from the inlining if we get a CONLIKE thing (modulo lets). Note [Lone variables] ~~~~~~~~~~~~~~~~~~~~~ See also Note [Interaction of exprIsWorkFree and lone variables] which appears below The "lone-variable" case is important. I spent ages messing about with unsatisfactory variants, but this is nice. The idea is that if a variable appears all alone as an arg of lazy fn, or rhs BoringCtxt as scrutinee of a case CaseCtxt as arg of a fn ArgCtxt AND it is bound to a cheap expression then we should not inline it (unless there is some other reason, e.g. it is the sole occurrence). That is what is happening at the use of 'lone_variable' in 'interesting_call'. Why? At least in the case-scrutinee situation, turning let x = (a,b) in case x of y -> ... into let x = (a,b) in case (a,b) of y -> ... and thence to let x = (a,b) in let y = (a,b) in ... is bad if the binding for x will remain. Another example: I discovered that strings were getting inlined straight back into applications of 'error' because the latter is strict. s = "foo" f = \x -> ...(error s)... Fundamentally such contexts should not encourage inlining because, provided the RHS is "expandable" (see Note [exprIsExpandable] in GHC.Core.Utils) the context can ``see'' the unfolding of the variable (e.g. case or a RULE) so there's no gain. However, watch out: * Consider this: foo = \n. [n]) {-# INLINE foo #-} bar = foo 20 {-# INLINE bar #-} baz = \n. case bar of { (m:_) -> m + n } Here we really want to inline 'bar' so that we can inline 'foo' and the whole thing unravels as it should obviously do. This is important: in the NDP project, 'bar' generates a closure data structure rather than a list. So the non-inlining of lone_variables should only apply if the unfolding is regarded as expandable; because that is when exprIsConApp_maybe looks through the unfolding. Hence the "&& is_exp" in the CaseCtxt branch of interesting_call * Even a type application or coercion isn't a lone variable. Consider case $fMonadST @ RealWorld of { :DMonad a b c -> c } We had better inline that sucker! The case won't see through it. For now, I'm treating treating a variable applied to types in a *lazy* context "lone". The motivating example was f = /\a. \x. BIG g = /\a. \y. h (f a) There's no advantage in inlining f here, and perhaps a significant disadvantage. Hence some_val_args in the Stop case Note [Interaction of exprIsWorkFree and lone variables] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The lone-variable test says "don't inline if a case expression scrutinises a lone variable whose unfolding is cheap". It's very important that, under these circumstances, exprIsConApp_maybe can spot a constructor application. So, for example, we don't consider let x = e in (x,x) to be cheap, and that's good because exprIsConApp_maybe doesn't think that expression is a constructor application. In the 'not (lone_variable && is_wf)' test, I used to test is_value rather than is_wf, which was utterly wrong, because the above expression responds True to exprIsHNF, which is what sets is_value. This kind of thing can occur if you have {-# INLINE foo #-} foo = let x = e in (x,x) which Roman did. -} computeDiscount :: [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int computeDiscount :: [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int computeDiscount [Int] arg_discounts Int res_discount [ArgSummary] arg_infos CallCtxt cont_info = Int 10 -- Discount of 10 because the result replaces the call -- so we count 10 for the function itself Int -> Int -> Int forall a. Num a => a -> a -> a + Int 10 Int -> Int -> Int forall a. Num a => a -> a -> a * [Int] -> Int forall a. [a] -> Int forall (t :: * -> *) a. Foldable t => t a -> Int length [Int] actual_arg_discounts -- Discount of 10 for each arg supplied, -- because the result replaces the call Int -> Int -> Int forall a. Num a => a -> a -> a + Int total_arg_discount Int -> Int -> Int forall a. Num a => a -> a -> a + Int res_discount' where actual_arg_discounts :: [Int] actual_arg_discounts = (Int -> ArgSummary -> Int) -> [Int] -> [ArgSummary] -> [Int] forall a b c. (a -> b -> c) -> [a] -> [b] -> [c] zipWith Int -> ArgSummary -> Int forall {a}. Num a => a -> ArgSummary -> a mk_arg_discount [Int] arg_discounts [ArgSummary] arg_infos total_arg_discount :: Int total_arg_discount = [Int] -> Int forall a. Num a => [a] -> a forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a sum [Int] actual_arg_discounts mk_arg_discount :: a -> ArgSummary -> a mk_arg_discount a _ ArgSummary TrivArg = a 0 mk_arg_discount a _ ArgSummary NonTrivArg = a 10 mk_arg_discount a discount ArgSummary ValueArg = a discount res_discount' :: Int res_discount' | Ordering LT <- [Int] arg_discounts [Int] -> [ArgSummary] -> Ordering forall a b. [a] -> [b] -> Ordering `compareLength` [ArgSummary] arg_infos = Int res_discount -- Over-saturated | Bool otherwise = case CallCtxt cont_info of CallCtxt BoringCtxt -> Int 0 CallCtxt CaseCtxt -> Int res_discount -- Presumably a constructor CallCtxt ValAppCtxt -> Int res_discount -- Presumably a function CallCtxt _ -> Int 40 Int -> Int -> Int forall a. Ord a => a -> a -> a `min` Int res_discount -- ToDo: this 40 `min` res_discount doesn't seem right -- for DiscArgCtxt it shouldn't matter because the function will -- get the arg discount for any non-triv arg -- for RuleArgCtxt we do want to be keener to inline; but not only -- constructor results -- for RhsCtxt I suppose that exposing a data con is good in general -- And 40 seems very arbitrary -- -- res_discount can be very large when a function returns -- constructors; but we only want to invoke that large discount -- when there's a case continuation. -- Otherwise we, rather arbitrarily, threshold it. Yuk. -- But we want to avoid inlining large functions that return -- constructors into contexts that are simply "interesting"