{-
(c) The University of Glasgow 2006
(c) The AQUA Project, Glasgow University, 1994-1998

This module contains inlining logic used by the simplifier.
-}



module GHC.Core.Opt.Simplify.Inline (
        -- * Cheap and cheerful inlining checks.
        couldBeSmallEnoughToInline,
        smallEnoughToInline, activeUnfolding,

        -- * The smart inlining decisions are made by callSiteInline
        callSiteInline, CallCtxt(..),
    ) where

import GHC.Prelude

import GHC.Driver.Flags

import GHC.Core.Opt.Simplify.Env

import GHC.Core
import GHC.Core.Unfold
import GHC.Core.FVs( exprFreeIds )

import GHC.Types.Id
import GHC.Types.Var.Env( InScopeSet, lookupInScope )
import GHC.Types.Var.Set
import GHC.Types.Basic  ( Arity, RecFlag(..), isActive )
import GHC.Utils.Logger
import GHC.Utils.Misc
import GHC.Utils.Outputable
import GHC.Types.Name

import Data.List (isPrefixOf)

{-
************************************************************************
*                                                                      *
\subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
*                                                                      *
************************************************************************

We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
we ``couldn't possibly use'' on the other side.  Can be overridden w/
flaggery.  Just the same as smallEnoughToInline, except that it has no
actual arguments.
-}

couldBeSmallEnoughToInline :: UnfoldingOpts -> Int -> CoreExpr -> Bool
couldBeSmallEnoughToInline :: UnfoldingOpts -> Int -> CoreExpr -> Bool
couldBeSmallEnoughToInline UnfoldingOpts
opts Int
threshold CoreExpr
rhs
  = case UnfoldingOpts -> Int -> [Id] -> CoreExpr -> ExprSize
sizeExpr UnfoldingOpts
opts Int
threshold [] CoreExpr
body of
       ExprSize
TooBig -> Bool
False
       ExprSize
_      -> Bool
True
  where
    ([Id]
_, CoreExpr
body) = CoreExpr -> ([Id], CoreExpr)
forall b. Expr b -> ([b], Expr b)
collectBinders CoreExpr
rhs

----------------
smallEnoughToInline :: UnfoldingOpts -> Unfolding -> Bool
smallEnoughToInline :: UnfoldingOpts -> Unfolding -> Bool
smallEnoughToInline UnfoldingOpts
opts (CoreUnfolding {uf_guidance :: Unfolding -> UnfoldingGuidance
uf_guidance = UnfoldingGuidance
guidance})
  = case UnfoldingGuidance
guidance of
       UnfIfGoodArgs {ug_size :: UnfoldingGuidance -> Int
ug_size = Int
size} -> Int
size Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= UnfoldingOpts -> Int
unfoldingUseThreshold UnfoldingOpts
opts
       UnfWhen {} -> Bool
True
       UnfoldingGuidance
UnfNever   -> Bool
False
smallEnoughToInline UnfoldingOpts
_ Unfolding
_
  = Bool
False

{-
************************************************************************
*                                                                      *
\subsection{callSiteInline}
*                                                                      *
************************************************************************

This is the key function.  It decides whether to inline a variable at a call site

callSiteInline is used at call sites, so it is a bit more generous.
It's a very important function that embodies lots of heuristics.
A non-WHNF can be inlined if it doesn't occur inside a lambda,
and occurs exactly once or
    occurs once in each branch of a case and is small

If the thing is in WHNF, there's no danger of duplicating work,
so we can inline if it occurs once, or is small

NOTE: we don't want to inline top-level functions that always diverge.
It just makes the code bigger.  Tt turns out that the convenient way to prevent
them inlining is to give them a NOINLINE pragma, which we do in
StrictAnal.addStrictnessInfoToTopId
-}

callSiteInline :: SimplEnv
               -> Logger
               -> Id                    -- The Id
               -> Bool                  -- True if there are no arguments at all (incl type args)
               -> [ArgSummary]          -- One for each value arg; True if it is interesting
               -> CallCtxt              -- True <=> continuation is interesting
               -> Maybe CoreExpr        -- Unfolding, if any
callSiteInline :: SimplEnv
-> Logger
-> Id
-> Bool
-> [ArgSummary]
-> CallCtxt
-> Maybe CoreExpr
callSiteInline SimplEnv
env Logger
logger Id
id Bool
lone_variable [ArgSummary]
arg_infos CallCtxt
cont_info
  = case IdUnfoldingFun
idUnfolding Id
id of
      -- idUnfolding checks for loop-breakers, returning NoUnfolding
      -- Things with an INLINE pragma may have an unfolding *and*
      -- be a loop breaker  (maybe the knot is not yet untied)
        CoreUnfolding { uf_tmpl :: Unfolding -> CoreExpr
uf_tmpl = CoreExpr
unf_template
                      , uf_cache :: Unfolding -> UnfoldingCache
uf_cache = UnfoldingCache
unf_cache
                      , uf_guidance :: Unfolding -> UnfoldingGuidance
uf_guidance = UnfoldingGuidance
guidance }
          | Bool
active_unf -> SimplEnv
-> Logger
-> Id
-> Bool
-> [ArgSummary]
-> CallCtxt
-> CoreExpr
-> UnfoldingCache
-> UnfoldingGuidance
-> Maybe CoreExpr
tryUnfolding SimplEnv
env Logger
logger Id
id Bool
lone_variable
                                    [ArgSummary]
arg_infos CallCtxt
cont_info CoreExpr
unf_template
                                    UnfoldingCache
unf_cache UnfoldingGuidance
guidance
          | Bool
otherwise -> Logger
-> UnfoldingOpts
-> Id
-> String
-> SDoc
-> Maybe CoreExpr
-> Maybe CoreExpr
forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
traceInline Logger
logger UnfoldingOpts
uf_opts Id
id String
"Inactive unfolding:" (Id -> SDoc
forall a. Outputable a => a -> SDoc
ppr Id
id) Maybe CoreExpr
forall a. Maybe a
Nothing
        Unfolding
NoUnfolding      -> Maybe CoreExpr
forall a. Maybe a
Nothing
        Unfolding
BootUnfolding    -> Maybe CoreExpr
forall a. Maybe a
Nothing
        OtherCon {}      -> Maybe CoreExpr
forall a. Maybe a
Nothing
        DFunUnfolding {} -> Maybe CoreExpr
forall a. Maybe a
Nothing     -- Never unfold a DFun
  where
    uf_opts :: UnfoldingOpts
uf_opts    = SimplEnv -> UnfoldingOpts
seUnfoldingOpts SimplEnv
env
    active_unf :: Bool
active_unf = SimplMode -> Id -> Bool
activeUnfolding (SimplEnv -> SimplMode
seMode SimplEnv
env) Id
id

activeUnfolding :: SimplMode -> Id -> Bool
activeUnfolding :: SimplMode -> Id -> Bool
activeUnfolding SimplMode
mode Id
id
  | Unfolding -> Bool
isCompulsoryUnfolding (IdUnfoldingFun
realIdUnfolding Id
id)
  = Bool
True   -- Even sm_inline can't override compulsory unfoldings
  | Bool
otherwise
  = CompilerPhase -> Activation -> Bool
isActive (SimplMode -> CompilerPhase
sm_phase SimplMode
mode) (Id -> Activation
idInlineActivation Id
id)
  Bool -> Bool -> Bool
&& SimplMode -> Bool
sm_inline SimplMode
mode
      -- `or` isStableUnfolding (realIdUnfolding id)
      -- Inline things when
      --  (a) they are active
      --  (b) sm_inline says so, except that for stable unfoldings
      --                         (ie pragmas) we inline anyway

-- | Report the inlining of an identifier's RHS to the user, if requested.
traceInline :: Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
traceInline :: forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
traceInline Logger
logger UnfoldingOpts
opts Id
inline_id String
str SDoc
doc a
result
  -- We take care to ensure that doc is used in only one branch, ensuring that
  -- the simplifier can push its allocation into the branch. See Note [INLINE
  -- conditional tracing utilities].
  | Bool
enable    = Logger -> String -> SDoc -> a -> a
forall a. Logger -> String -> SDoc -> a -> a
logTraceMsg Logger
logger String
str SDoc
doc a
result
  | Bool
otherwise = a
result
  where
    enable :: Bool
enable
      | Logger -> DumpFlag -> Bool
logHasDumpFlag Logger
logger DumpFlag
Opt_D_dump_verbose_inlinings
      = Bool
True
      | Just String
prefix <- UnfoldingOpts -> Maybe String
unfoldingReportPrefix UnfoldingOpts
opts
      = String
prefix String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` OccName -> String
occNameString (Id -> OccName
forall a. NamedThing a => a -> OccName
getOccName Id
inline_id)
      | Bool
otherwise
      = Bool
False
{-# INLINE traceInline #-} -- see Note [INLINE conditional tracing utilities]

{- Note [Avoid inlining into deeply nested cases]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Also called "exponential inlining".

Consider a function f like this: (#18730)

  f arg1 arg2 =
    case ...
      ... -> g arg1
      ... -> g arg2

This function is small. So should be safe to inline.
However sometimes this doesn't quite work out like that.
Consider this code:

    f1 arg1 arg2 ... = ...
        case _foo of
          alt1 -> ... f2 arg1 ...
          alt2 -> ... f2 arg2 ...

    f2 arg1 arg2 ... = ...
        case _foo of
          alt1 -> ... f3 arg1 ...
          alt2 -> ... f3 arg2 ...

    f3 arg1 arg2 ... = ...

    ... repeats up to n times. And then f1 is
    applied to some arguments:

    foo = ... f1 <interestingArgs> ...

Initially f2..fn are not interesting to inline so we don't.  However we see
that f1 is applied to interesting args.  So it's an obvious choice to inline
those:

    foo = ...
          case _foo of
            alt1 -> ... f2 <interestingArg> ...
            alt2 -> ... f2 <interestingArg> ...

As a result we go and inline f2 both mentions of f2 in turn are now applied to
interesting arguments and f2 is small:

    foo = ...
          case _foo of
            alt1 -> ... case _foo of
                alt1 -> ... f3 <interestingArg> ...
                alt2 -> ... f3 <interestingArg> ...

            alt2 -> ... case _foo of
                alt1 -> ... f3 <interestingArg> ...
                alt2 -> ... f3 <interestingArg> ...

The same thing happens for each binding up to f_n, duplicating the amount of inlining
done in each step. Until at some point we are either done or run out of simplifier
ticks/RAM. This pattern happened #18730.

To combat this we introduce one more heuristic when weighing inlining decision.
We keep track of a "case-depth". Which increases each time we look inside a case
expression with more than one alternative.

We then apply a penalty to inlinings based on the case-depth at which they would
be inlined. Bounding the number of inlinings in such a scenario.

The heuristic can be tuned in two ways:

* We can ignore the first n levels of case nestings for inlining decisions using
  -funfolding-case-threshold.

* The penalty grows linear with the depth. It's computed as
     size*(depth-threshold)/scaling.
  Scaling can be set with -funfolding-case-scaling.

Reflections and wrinkles

* See also Note [Do not add unfoldings to join points at birth] in
  GHC.Core.Opt.Simplify.Iteration

* The total case depth is really the wrong thing; it will inhibit inlining of a
  local function, just because there is some giant case nest further out.  What we
  want is the /difference/ in case-depth between the binding site and the call site.
  That could be done quite easily by adding the case-depth to the Unfolding of the
  function.

* What matters more than /depth/ is total /width/; that is how many alternatives
  are in the tree.  We could perhaps multiply depth by width at each case expression.

* There might be a case nest with many alternatives, but the function is called in
  only a handful of them.  So maybe we should ignore case-depth, and instead penalise
  funtions that are called many times -- after all, inlining them bloats code.

  But in the scenario above, we are simplifying an inlined fuction, without doing a
  global occurrence analysis each time.  So if we based the penalty on multiple
  occurences, we should /also/ add a penalty when simplifying an already-simplified
  expression.  We do track this (seInlineDepth) but currently we barely use it.

  An advantage of using occurrences+inline depth is that it'll work when no
  case expressions are involved.  See #15488.

* Test T18730 did not involve join points.  But join points are very prone to
  the same kind of thing.  For exampe in #13253, and several related tickets,
  we got an exponential blowup in code size from a program that looks like
  this.

  let j1a x = case f y     of { True -> p;   False -> q }
      j1b x = case f y     of { True -> q;   False -> p }
      j2a x = case f (y+1) of { True -> j1a x; False -> j1b x}
      j2b x = case f (y+1) of { True -> j1b x; False -> j1a x}
      ...
  in case f (y+10) of { True -> j10a 7; False -> j10b 8 }

  The first danger is this: in Simplifier iteration 1 postInlineUnconditionally
  inlines the last functions, j10a and j10b (they are both small).  Now we have
  two calls to j9a and two to j9b.  In the next Simplifer iteration,
  postInlineUnconditionally inlines all four of these calls, leaving four calls
  to j8a and j8b. Etc.

  Happily, this probably /won't/ happen because the Simplifier works top down, so it'll
  inline j1a/j1b into j2a/j2b, which will make the latter bigger; so the process
  will stop.  But we still need to stop the inline cascade described at the head
  of this Note.

Some guidance on setting these defaults:

* A low threshold (<= 2) is needed to prevent exponential cases from spiraling out of
  control. We picked 2 for no particular reason.

* Scaling the penalty by any more than 30 means the reproducer from
  T18730 won't compile even with reasonably small values of n. Instead
  it will run out of runs/ticks. This means to positively affect the reproducer
  a scaling <= 30 is required.

* A scaling of >= 15 still causes a few very large regressions on some nofib benchmarks.
  (+80% for gc/fulsom, +90% for real/ben-raytrace, +20% for spectral/fibheaps)

* A scaling of >= 25 showed no regressions on nofib. However it showed a number of
  (small) regression for compiler perf benchmarks.

The end result is that we are settling for a scaling of 30, with a threshold of 2.
This gives us minimal compiler perf regressions. No nofib runtime regressions and
will still avoid this pattern sometimes. This is a "safe" default, where we err on
the side of compiler blowup instead of risking runtime regressions.

For cases where the default falls short the flag can be changed to allow
more/less inlining as needed on a per-module basis.

-}

tryUnfolding :: SimplEnv -> Logger -> Id -> Bool -> [ArgSummary] -> CallCtxt
             -> CoreExpr -> UnfoldingCache -> UnfoldingGuidance
             -> Maybe CoreExpr
tryUnfolding :: SimplEnv
-> Logger
-> Id
-> Bool
-> [ArgSummary]
-> CallCtxt
-> CoreExpr
-> UnfoldingCache
-> UnfoldingGuidance
-> Maybe CoreExpr
tryUnfolding SimplEnv
env Logger
logger Id
id Bool
lone_variable [ArgSummary]
arg_infos
             CallCtxt
cont_info CoreExpr
unf_template UnfoldingCache
unf_cache UnfoldingGuidance
guidance
 = case UnfoldingGuidance
guidance of
     UnfoldingGuidance
UnfNever -> Logger
-> UnfoldingOpts
-> Id
-> String
-> SDoc
-> Maybe CoreExpr
-> Maybe CoreExpr
forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
traceInline Logger
logger UnfoldingOpts
opts Id
id String
str (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"UnfNever") Maybe CoreExpr
forall a. Maybe a
Nothing

     UnfWhen { ug_arity :: UnfoldingGuidance -> Int
ug_arity = Int
uf_arity, ug_unsat_ok :: UnfoldingGuidance -> Bool
ug_unsat_ok = Bool
unsat_ok, ug_boring_ok :: UnfoldingGuidance -> Bool
ug_boring_ok = Bool
boring_ok }
        | Bool
enough_args Bool -> Bool -> Bool
&& (Bool
boring_ok Bool -> Bool -> Bool
|| Bool
some_benefit Bool -> Bool -> Bool
|| UnfoldingOpts -> Bool
unfoldingVeryAggressive UnfoldingOpts
opts)
                -- See Note [INLINE for small functions] (3)
        -> Logger
-> UnfoldingOpts
-> Id
-> String
-> SDoc
-> Maybe CoreExpr
-> Maybe CoreExpr
forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
traceInline Logger
logger UnfoldingOpts
opts Id
id String
str (Bool -> SDoc -> Bool -> SDoc
mk_doc Bool
some_benefit SDoc
forall doc. IsOutput doc => doc
empty Bool
True) (CoreExpr -> Maybe CoreExpr
forall a. a -> Maybe a
Just CoreExpr
unf_template)
        | Bool
otherwise
        -> Logger
-> UnfoldingOpts
-> Id
-> String
-> SDoc
-> Maybe CoreExpr
-> Maybe CoreExpr
forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
traceInline Logger
logger UnfoldingOpts
opts Id
id String
str (Bool -> SDoc -> Bool -> SDoc
mk_doc Bool
some_benefit SDoc
forall doc. IsOutput doc => doc
empty Bool
False) Maybe CoreExpr
forall a. Maybe a
Nothing
        where
          some_benefit :: Bool
some_benefit = Int -> Bool -> Bool
calc_some_benefit Int
uf_arity Bool
True
          enough_args :: Bool
enough_args  = (Int
n_val_args Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
uf_arity) Bool -> Bool -> Bool
|| (Bool
unsat_ok Bool -> Bool -> Bool
&& Int
n_val_args Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0)

     UnfIfGoodArgs { ug_args :: UnfoldingGuidance -> [Int]
ug_args = [Int]
arg_discounts, ug_res :: UnfoldingGuidance -> Int
ug_res = Int
res_discount, ug_size :: UnfoldingGuidance -> Int
ug_size = Int
size }
        | Id -> Bool
isJoinId Id
id, Bool
small_enough         -> Maybe CoreExpr
inline_join_point
        | UnfoldingOpts -> Bool
unfoldingVeryAggressive UnfoldingOpts
opts      -> Maybe CoreExpr
yes
        | Bool
is_wf, Bool
some_benefit, Bool
small_enough -> Maybe CoreExpr
yes
        | Bool
otherwise                         -> Maybe CoreExpr
no
        where
          yes :: Maybe CoreExpr
yes = Logger
-> UnfoldingOpts
-> Id
-> String
-> SDoc
-> Maybe CoreExpr
-> Maybe CoreExpr
forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
traceInline Logger
logger UnfoldingOpts
opts Id
id String
str (Bool -> SDoc -> Bool -> SDoc
mk_doc Bool
some_benefit SDoc
extra_doc Bool
True)  (CoreExpr -> Maybe CoreExpr
forall a. a -> Maybe a
Just CoreExpr
unf_template)
          no :: Maybe CoreExpr
no  = Logger
-> UnfoldingOpts
-> Id
-> String
-> SDoc
-> Maybe CoreExpr
-> Maybe CoreExpr
forall a. Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
traceInline Logger
logger UnfoldingOpts
opts Id
id String
str (Bool -> SDoc -> Bool -> SDoc
mk_doc Bool
some_benefit SDoc
extra_doc Bool
False) Maybe CoreExpr
forall a. Maybe a
Nothing

          some_benefit :: Bool
some_benefit = Int -> Bool -> Bool
calc_some_benefit ([Int] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
arg_discounts) Bool
False

          -- depth_penalty: see Note [Avoid inlining into deeply nested cases]
          depth_threshold :: Int
depth_threshold = UnfoldingOpts -> Int
unfoldingCaseThreshold UnfoldingOpts
opts
          depth_scaling :: Int
depth_scaling   = UnfoldingOpts -> Int
unfoldingCaseScaling UnfoldingOpts
opts
          depth_penalty :: Int
depth_penalty | Int
case_depth Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
depth_threshold = Int
0
                        | Bool
otherwise = (Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Int
case_depth Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
depth_threshold)) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
depth_scaling

          adjusted_size :: Int
adjusted_size = Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
depth_penalty Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
discount
          small_enough :: Bool
small_enough = Int
adjusted_size Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= UnfoldingOpts -> Int
unfoldingUseThreshold UnfoldingOpts
opts
          discount :: Int
discount = [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int
computeDiscount [Int]
arg_discounts Int
res_discount [ArgSummary]
arg_infos CallCtxt
cont_info

          extra_doc :: SDoc
extra_doc = [SDoc] -> SDoc
forall doc. IsDoc doc => [doc] -> doc
vcat [ Bool -> SDoc -> SDoc
forall doc. IsOutput doc => Bool -> doc -> doc
ppWhen (Id -> Bool
isJoinId Id
id) (SDoc -> SDoc) -> SDoc -> SDoc
forall a b. (a -> b) -> a -> b
$
                             String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"join" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> [SDoc] -> SDoc
forall doc. IsLine doc => [doc] -> doc
fsep [ (Id, Bool, Maybe Bool, Bool) -> SDoc
forall a. Outputable a => a -> SDoc
ppr (Id
v, Unfolding -> Bool
hasCoreUnfolding (IdUnfoldingFun
idUnfolding Id
v)
                                                        , (Id -> Bool) -> Maybe Id -> Maybe Bool
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Unfolding -> Bool
isEvaldUnfolding (Unfolding -> Bool) -> IdUnfoldingFun -> Id -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IdUnfoldingFun
idUnfolding) (InScopeSet -> Id -> Maybe Id
lookupInScope InScopeSet
in_scope Id
v)
                                                        , InScopeSet -> Id -> Bool
is_more_evald InScopeSet
in_scope Id
v)
                                                  | Id
v <- VarSet -> [Id]
vselems (CoreExpr -> VarSet
exprFreeIds CoreExpr
unf_template) ]
                           , String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"depth based penalty =" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Int -> SDoc
forall doc. IsLine doc => Int -> doc
int Int
depth_penalty
                           , String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"adjusted size =" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Int -> SDoc
forall doc. IsLine doc => Int -> doc
int Int
adjusted_size ]

          inline_join_point :: Maybe CoreExpr
inline_join_point  -- See Note [Inlining join points]
            | [Bool] -> Bool
forall (t :: * -> *). Foldable t => t Bool -> Bool
or ((Int -> ArgSummary -> Bool) -> [Int] -> [ArgSummary] -> [Bool]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> ArgSummary -> Bool
forall {a}. (Ord a, Num a) => a -> ArgSummary -> Bool
scrut_arg [Int]
arg_discounts [ArgSummary]
arg_infos) = Maybe CoreExpr
yes
            | (Id -> Bool) -> VarSet -> Bool
anyVarSet (InScopeSet -> Id -> Bool
is_more_evald InScopeSet
in_scope) (VarSet -> Bool) -> VarSet -> Bool
forall a b. (a -> b) -> a -> b
$
              CoreExpr -> VarSet
exprFreeIds CoreExpr
unf_template                       = Maybe CoreExpr
yes
            | Bool
otherwise                                      = Maybe CoreExpr
no
          -- scrut_arg is True if the function body has a discount and the arg is a value
          scrut_arg :: a -> ArgSummary -> Bool
scrut_arg a
disc ArgSummary
ValueArg = a
disc a -> a -> Bool
forall a. Ord a => a -> a -> Bool
> a
0
          scrut_arg a
_    ArgSummary
_        = Bool
False

  where
    opts :: UnfoldingOpts
opts         = SimplEnv -> UnfoldingOpts
seUnfoldingOpts SimplEnv
env
    case_depth :: Int
case_depth   = SimplEnv -> Int
seCaseDepth SimplEnv
env
    inline_depth :: Int
inline_depth = SimplEnv -> Int
seInlineDepth SimplEnv
env
    in_scope :: InScopeSet
in_scope     = SimplEnv -> InScopeSet
seInScope SimplEnv
env

    -- Unpack the UnfoldingCache lazily because it may not be needed, and all
    -- its fields are strict; so evaluating unf_cache at all forces all the
    -- isWorkFree etc computations to take place.  That risks wasting effort for
    -- Ids that are never going to inline anyway.
    -- See Note [UnfoldingCache] in GHC.Core
    UnfoldingCache{ uf_is_work_free :: UnfoldingCache -> Bool
uf_is_work_free = Bool
is_wf, uf_expandable :: UnfoldingCache -> Bool
uf_expandable = Bool
is_exp } = UnfoldingCache
unf_cache

    mk_doc :: Bool -> SDoc -> Bool -> SDoc
mk_doc Bool
some_benefit SDoc
extra_doc Bool
yes_or_no
      = [SDoc] -> SDoc
forall doc. IsDoc doc => [doc] -> doc
vcat [ String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"arg infos" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> [ArgSummary] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [ArgSummary]
arg_infos
             , String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"interesting continuation" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> CallCtxt -> SDoc
forall a. Outputable a => a -> SDoc
ppr CallCtxt
cont_info
             , String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"some_benefit" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Bool -> SDoc
forall a. Outputable a => a -> SDoc
ppr Bool
some_benefit
             , String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"is exp:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Bool -> SDoc
forall a. Outputable a => a -> SDoc
ppr Bool
is_exp
             , String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"is work-free:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Bool -> SDoc
forall a. Outputable a => a -> SDoc
ppr Bool
is_wf
             , String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"guidance" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> UnfoldingGuidance -> SDoc
forall a. Outputable a => a -> SDoc
ppr UnfoldingGuidance
guidance
             , String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"case depth =" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Int -> SDoc
forall doc. IsLine doc => Int -> doc
int Int
case_depth
             , String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"inline depth =" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Int -> SDoc
forall doc. IsLine doc => Int -> doc
int Int
inline_depth
             , SDoc
extra_doc
             , String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"ANSWER =" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> if Bool
yes_or_no then String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"YES" else String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"NO"]

    ctx :: SDocContext
ctx = LogFlags -> SDocContext
log_default_dump_context (Logger -> LogFlags
logFlags Logger
logger)
    str :: String
str = String
"Considering inlining: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ SDocContext -> SDoc -> String
showSDocOneLine SDocContext
ctx (Id -> SDoc
forall a. Outputable a => a -> SDoc
ppr Id
id)
    n_val_args :: Int
n_val_args = [ArgSummary] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ArgSummary]
arg_infos

           -- some_benefit is used when the RHS is small enough
           -- and the call has enough (or too many) value
           -- arguments (ie n_val_args >= arity). But there must
           -- be *something* interesting about some argument, or the
           -- result context, to make it worth inlining
    calc_some_benefit :: Arity -> Bool -> Bool   -- The Arity is the number of args
                                         -- expected by the unfolding
    calc_some_benefit :: Int -> Bool -> Bool
calc_some_benefit Int
uf_arity Bool
is_inline
       | Bool -> Bool
not Bool
saturated = Bool
interesting_args       -- Under-saturated
                                        -- Note [Unsaturated applications]
       | Bool
otherwise = Bool
interesting_args   -- Saturated or over-saturated
                  Bool -> Bool -> Bool
|| Bool
interesting_call
      where
        saturated :: Bool
saturated      = Int
n_val_args Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
uf_arity
        over_saturated :: Bool
over_saturated = Int
n_val_args Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
uf_arity
        interesting_args :: Bool
interesting_args = (ArgSummary -> Bool) -> [ArgSummary] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any ArgSummary -> Bool
nonTriv [ArgSummary]
arg_infos
                -- NB: (any nonTriv arg_infos) looks at the
                -- over-saturated args too which is "wrong";
                -- but if over-saturated we inline anyway.

        interesting_call :: Bool
interesting_call
          | Bool
over_saturated
          = Bool
True
          | Bool
otherwise
          = case CallCtxt
cont_info of
              CallCtxt
CaseCtxt   -> Bool -> Bool
not (Bool
lone_variable Bool -> Bool -> Bool
&& Bool
is_exp)  -- Note [Lone variables]
              CallCtxt
ValAppCtxt -> Bool
True                           -- Note [Cast then apply]
              CallCtxt
RuleArgCtxt -> Int
uf_arity Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0  -- See Note [RHS of lets]
              CallCtxt
DiscArgCtxt -> Int
uf_arity Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0  -- Note [Inlining in ArgCtxt]
              RhsCtxt RecFlag
NonRecursive | Bool
is_inline
                          -> Int
uf_arity Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0  -- See Note [RHS of lets]
              CallCtxt
_other      -> Bool
False         -- See Note [Nested functions]


vselems :: VarSet -> [Var]
vselems :: VarSet -> [Id]
vselems VarSet
s = (Id -> [Id] -> [Id]) -> [Id] -> VarSet -> [Id]
forall a. (Id -> a -> a) -> a -> VarSet -> a
nonDetStrictFoldVarSet (\Id
v [Id]
vs -> Id
v Id -> [Id] -> [Id]
forall a. a -> [a] -> [a]
: [Id]
vs) [] VarSet
s

is_more_evald :: InScopeSet -> Id -> Bool
-- See Note [Inlining join points]
is_more_evald :: InScopeSet -> Id -> Bool
is_more_evald InScopeSet
in_scope Id
v
  | Just Id
v1 <- InScopeSet -> Id -> Maybe Id
lookupInScope InScopeSet
in_scope Id
v
  , IdUnfoldingFun
idUnfolding Id
v1 Unfolding -> Unfolding -> Bool
`isBetterUnfoldingThan` IdUnfoldingFun
idUnfolding Id
v
  = Bool
True
  | Bool
otherwise
  = Bool
False

{- Note [RHS of lets]
~~~~~~~~~~~~~~~~~~~~~
When the call is the argument of a function with a RULE, or the RHS of a let,
we are a little bit keener to inline (in tryUnfolding).  For example
     f y = (y,y,y)
     g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
We'd inline 'f' if the call was in a case context, and it kind-of-is,
only we can't see it.  Also
     x = f v
could be expensive whereas
     x = case v of (a,b) -> a
is patently cheap and may allow more eta expansion.

So, in `interesting_call` in `tryUnfolding`, we treat the RHS of a
/non-recursive/ let as not-totally-boring.  A /recursive/ let isn't
going be inlined so there is much less point.  Hence the (only reason
for the) RecFlag in RhsCtxt

We inline only if `f` has an `UnfWhen` guidance.  I found that being more eager
led to fruitless inlining.  See Note [Seq is boring] wrinkle (SB1) in
GHC.Core.Opt.Simplify.Utils.

Note [Inlining join points]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
In general we /do not/ want to inline join points /even if they are small/.
See Note [Duplicating join points] in GHC.Core.Opt.Simplify.Iteration.

But, assuming it is small, there are various times when we /do/ want to
inline a (non-recursive) join point.  Namely, if either of these hold:

(1) A /scrutinised/ argument (non-zero discount) has a /ValueArg/ info.
    Inlining will give some benefit.

(2) A free variable of the RHS is
    * Is /not/ evaluated at the join point defn site
    * Is evaluated at the join point call site.
    This is the is_more_evald predicate.

(1) is fairly obvious but (2) is less so. Here is the code for `integerGT`
without (2):

  integerGt = \ (x :: Integer) (y :: Integer) ->
     join fail _ = case x of {
       IS x1 -> case y of {
           IS y1 -> case <# x1 y1  of
                      _DEFAULT -> case ==# x1 y1 of
                                    DEFAULT -> True;
                                    1#      -> False
                      1# -> False
           IP ds1 -> False
           IN ds1 -> True

       IP x1 -> case y of {
                 _DEFAULT -> True;
                 IP y1    -> case bigNatCompare x1 y1 of
                               _DEFAULT -> False;
                               GT -> True
       IN x1 -> case y of {
                  _DEFAULT -> False;
                  IN y1    -> case bigNatCompare y1 x1 of
                                _DEFAULT -> False;
                                GT -> True
     in case x of {
       _DEFAULT -> jump fail GHC.Prim.(##);
       IS x1    -> case y of {
                     _DEFAULT -> jump fail GHC.Prim.(##);
                     IS y1 -> tagToEnum# @Bool (># x1 y1)

If we inline `fail` we get /much/ better code.  The only clue is that
`x` and `y` (a) are not evaluated at the definition site, and (b) are
evaluated at the call site.  This predicate is `isBetterUnfoldingThan`.

You might think that the variable should also be /scrutinised/ in the
join-point RHS, but here are two reasons for not taking that into
account.

First, we see code somewhat like this in imaginary/wheel-sieve1:
    let x = <small thunk> in
    join $j = (x,y) in
    case z of
      A -> case x of
             P -> $j
             Q -> blah
      B -> (x,x)
      C -> True
Here `x` can't be duplicated into the branches becuase it is used
in both the join point and the A branch.  But if we inline $j we get
    let x = <small thunk> in
    case z of
      A -> case x of x'
             P -> (x', y)
             Q -> blah
      B -> x
      C -> True
and now we /can/ duplicate x into the branches, at which point:
  * it is used strictly in the A branch (evaluated, but no thunk)
  * it is used lazily in the B branch (still a thunk)
  * it is not used at all in the C branch (no thunk)

Second, spectral/treejoin gets a big win from SpecConstr due
to evaluated-ness. Something like this:
    join $j x = ...(foo fv)...
    in case fv of I# x ->
       ...  jump $j True ...
If we inline $j, SpecConstr sees a call (foo (I# x)) and specialises.

Note [Unsaturated applications]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When a call is not saturated, we *still* inline if one of the
arguments has interesting structure.  That's sometimes very important.
A good example is the Ord instance for Bool in Base:

 Rec {
    $fOrdBool =GHC.Classes.D:Ord
                 @ Bool
                 ...
                 $cmin_ajX

    $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
    $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
  }

But the defn of GHC.Classes.$dmmin is:

  $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
    {- Arity: 3, HasNoCafRefs, Strictness: SLL,
       Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
                   case @ a GHC.Classes.<= @ a $dOrd x y of wild {
                     GHC.Types.False -> y GHC.Types.True -> x }) -}

We *really* want to inline $dmmin, even though it has arity 3, in
order to unravel the recursion.


Note [Things to watch]
~~~~~~~~~~~~~~~~~~~~~~
*   { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
    Assume x is exported, so not inlined unconditionally.
    Then we want x to inline unconditionally; no reason for it
    not to, and doing so avoids an indirection.

*   { x = I# 3; ....f x.... }
    Make sure that x does not inline unconditionally!
    Lest we get extra allocation.

Note [Nested functions]
~~~~~~~~~~~~~~~~~~~~~~~
At one time we treated a call of a non-top-level function as
"interesting" (regardless of how boring the context) in the hope
that inlining it would eliminate the binding, and its allocation.
Specifically, in the default case of interesting_call we had
   _other -> not is_top && uf_arity > 0

But actually postInlineUnconditionally does some of this and overall
it makes virtually no difference to nofib.  So I simplified away this
special case

Note [Cast then apply]
~~~~~~~~~~~~~~~~~~~~~~
Consider
   myIndex = __inline_me ( (/\a. <blah>) |> co )
   co :: (forall a. a -> a) ~ (forall a. T a)
     ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...

We need to inline myIndex to unravel this; but the actual call (myIndex a) has
no value arguments.  The ValAppCtxt gives it enough incentive to inline.

Note [Inlining in ArgCtxt]
~~~~~~~~~~~~~~~~~~~~~~~~~~
The condition (arity > 0) here is very important, because otherwise
we end up inlining top-level stuff into useless places; eg
   x = I# 3#
   f = \y.  g x
This can make a very big difference: it adds 16% to nofib 'integer' allocs,
and 20% to 'power'.

At one stage I replaced this condition by 'True' (leading to the above
slow-down).  The motivation was test eyeball/inline1.hs; but that seems
to work ok now.

NOTE: arguably, we should inline in ArgCtxt only if the result of the
call is at least CONLIKE.  At least for the cases where we use ArgCtxt
for the RHS of a 'let', we only profit from the inlining if we get a
CONLIKE thing (modulo lets).

Note [Lone variables]
~~~~~~~~~~~~~~~~~~~~~
See also Note [Interaction of exprIsWorkFree and lone variables]
which appears below

The "lone-variable" case is important.  I spent ages messing about
with unsatisfactory variants, but this is nice.  The idea is that if a
variable appears all alone

        as an arg of lazy fn, or rhs    BoringCtxt
        as scrutinee of a case          CaseCtxt
        as arg of a fn                  ArgCtxt
AND
        it is bound to a cheap expression

then we should not inline it (unless there is some other reason,
e.g. it is the sole occurrence).  That is what is happening at
the use of 'lone_variable' in 'interesting_call'.

Why?  At least in the case-scrutinee situation, turning
        let x = (a,b) in case x of y -> ...
into
        let x = (a,b) in case (a,b) of y -> ...
and thence to
        let x = (a,b) in let y = (a,b) in ...
is bad if the binding for x will remain.

Another example: I discovered that strings
were getting inlined straight back into applications of 'error'
because the latter is strict.
        s = "foo"
        f = \x -> ...(error s)...

Fundamentally such contexts should not encourage inlining because, provided
the RHS is "expandable" (see Note [exprIsExpandable] in GHC.Core.Utils) the
context can ``see'' the unfolding of the variable (e.g. case or a
RULE) so there's no gain.

However, watch out:

 * Consider this:
        foo = \n. [n])  {-# INLINE foo #-}
        bar = foo 20    {-# INLINE bar #-}
        baz = \n. case bar of { (m:_) -> m + n }
   Here we really want to inline 'bar' so that we can inline 'foo'
   and the whole thing unravels as it should obviously do.  This is
   important: in the NDP project, 'bar' generates a closure data
   structure rather than a list.

   So the non-inlining of lone_variables should only apply if the
   unfolding is regarded as expandable; because that is when
   exprIsConApp_maybe looks through the unfolding.  Hence the "&&
   is_exp" in the CaseCtxt branch of interesting_call

 * Even a type application or coercion isn't a lone variable.
   Consider
        case $fMonadST @ RealWorld of { :DMonad a b c -> c }
   We had better inline that sucker!  The case won't see through it.

   For now, I'm treating treating a variable applied to types
   in a *lazy* context "lone". The motivating example was
        f = /\a. \x. BIG
        g = /\a. \y.  h (f a)
   There's no advantage in inlining f here, and perhaps
   a significant disadvantage.  Hence some_val_args in the Stop case

Note [Interaction of exprIsWorkFree and lone variables]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The lone-variable test says "don't inline if a case expression
scrutinises a lone variable whose unfolding is cheap".  It's very
important that, under these circumstances, exprIsConApp_maybe
can spot a constructor application. So, for example, we don't
consider
        let x = e in (x,x)
to be cheap, and that's good because exprIsConApp_maybe doesn't
think that expression is a constructor application.

In the 'not (lone_variable && is_wf)' test, I used to test is_value
rather than is_wf, which was utterly wrong, because the above
expression responds True to exprIsHNF, which is what sets is_value.

This kind of thing can occur if you have

        {-# INLINE foo #-}
        foo = let x = e in (x,x)

which Roman did.


-}

computeDiscount :: [Int] -> Int -> [ArgSummary] -> CallCtxt
                -> Int
computeDiscount :: [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int
computeDiscount [Int]
arg_discounts Int
res_discount [ArgSummary]
arg_infos CallCtxt
cont_info

  = Int
10          -- Discount of 10 because the result replaces the call
                -- so we count 10 for the function itself

    Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
10 Int -> Int -> Int
forall a. Num a => a -> a -> a
* [Int] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
actual_arg_discounts
               -- Discount of 10 for each arg supplied,
               -- because the result replaces the call

    Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
total_arg_discount Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
res_discount'
  where
    actual_arg_discounts :: [Int]
actual_arg_discounts = (Int -> ArgSummary -> Int) -> [Int] -> [ArgSummary] -> [Int]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> ArgSummary -> Int
forall {a}. Num a => a -> ArgSummary -> a
mk_arg_discount [Int]
arg_discounts [ArgSummary]
arg_infos
    total_arg_discount :: Int
total_arg_discount   = [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
actual_arg_discounts

    mk_arg_discount :: a -> ArgSummary -> a
mk_arg_discount a
_        ArgSummary
TrivArg    = a
0
    mk_arg_discount a
_        ArgSummary
NonTrivArg = a
10
    mk_arg_discount a
discount ArgSummary
ValueArg   = a
discount

    res_discount' :: Int
res_discount'
      | Ordering
LT <- [Int]
arg_discounts [Int] -> [ArgSummary] -> Ordering
forall a b. [a] -> [b] -> Ordering
`compareLength` [ArgSummary]
arg_infos
      = Int
res_discount   -- Over-saturated
      | Bool
otherwise
      = case CallCtxt
cont_info of
           CallCtxt
BoringCtxt  -> Int
0
           CallCtxt
CaseCtxt    -> Int
res_discount  -- Presumably a constructor
           CallCtxt
ValAppCtxt  -> Int
res_discount  -- Presumably a function
           CallCtxt
_           -> Int
40 Int -> Int -> Int
forall a. Ord a => a -> a -> a
`min` Int
res_discount
                -- ToDo: this 40 `min` res_discount doesn't seem right
                --   for DiscArgCtxt it shouldn't matter because the function will
                --       get the arg discount for any non-triv arg
                --   for RuleArgCtxt we do want to be keener to inline; but not only
                --       constructor results
                --   for RhsCtxt I suppose that exposing a data con is good in general
                --   And 40 seems very arbitrary
                --
                -- res_discount can be very large when a function returns
                -- constructors; but we only want to invoke that large discount
                -- when there's a case continuation.
                -- Otherwise we, rather arbitrarily, threshold it.  Yuk.
                -- But we want to avoid inlining large functions that return
                -- constructors into contexts that are simply "interesting"