{-# LANGUAGE TupleSections #-}
module GHC.Core.LateCC.TopLevelBinds where

import GHC.Prelude

import GHC.Core.LateCC.Types
import GHC.Core.LateCC.Utils

import GHC.Core
import GHC.Core.Opt.Monad
import GHC.Driver.DynFlags
import GHC.Types.Id
import GHC.Types.Name
import GHC.Unit.Module.ModGuts

import Data.Maybe

{- Note [Collecting late cost centres]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Usually cost centres defined by a module are collected
during tidy by collectCostCentres. However with `-fprof-late`
we insert cost centres after inlining. So we keep a list of
all the cost centres we inserted and combine that with the list
of cost centres found during tidy.

To avoid overhead when using -fprof-inline there is a flag to stop
us from collecting them here when we run this pass before tidy.

Note [Adding late cost centres to top level bindings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The basic idea is very simple. For a top level binder
`f = rhs` we compile it as if the user had written
`f = {-# SCC f #-} rhs`.

If we do this after unfoldings for `f` have been created this
doesn't impact core-level optimizations at all. If we do it
before the cost centre will be included in the unfolding and
might inhibit optimizations at the call site. For this reason
we provide flags for both approaches as they have different
tradeoffs.

To reduce overhead we ignore workfree bindings because they don't contribute
meaningfully to a performance profile. This reduces code size massively as it
allows us to allocate definitions like `val = Just 32` at compile time instead
of turning them into a CAF of the form `val = <scc val> let x = Just 32 in x` which
would be the alternative.

We make an exception for rhss with function types. This allows us to get
cost centres on eta-reduced definitions like `f = g`. By putting a tick onto
`f`s rhs we end up with

    f = \eta1 eta2 ... etan ->
        <scc f> g eta1 ... etan

Which can make it easier to understand call graphs of an application.

We also don't add a cost centre for any binder that is a constructor
worker or wrapper. These will never meaningfully enrich the resulting
profile so we improve efficiency by omitting those.

-}

-- | Add late cost centres directly to the 'ModGuts'. This is used inside the
-- core pipeline with the -fprof-late-inline flag. It should not be used after
-- tidy, since it does not manually track inserted cost centers. See
-- Note [Collecting late cost centres].
topLevelBindsCCMG :: ModGuts -> CoreM ModGuts
topLevelBindsCCMG :: ModGuts -> CoreM ModGuts
topLevelBindsCCMG ModGuts
guts = do
    dflags <- CoreM DynFlags
forall (m :: * -> *). HasDynFlags m => m DynFlags
getDynFlags
    let
      env =
        LateCCEnv
          { lateCCEnv_module :: Module
lateCCEnv_module = ModGuts -> Module
mg_module ModGuts
guts

            -- We don't use this for topLevelBindsCC, so Nothing is okay
          , lateCCEnv_file :: Maybe FastString
lateCCEnv_file = Maybe FastString
forall a. Maybe a
Nothing

          , lateCCEnv_countEntries :: Bool
lateCCEnv_countEntries= GeneralFlag -> DynFlags -> Bool
gopt GeneralFlag
Opt_ProfCountEntries DynFlags
dflags
          , lateCCEnv_collectCCs :: Bool
lateCCEnv_collectCCs = Bool
False
          }
      guts' =
        ModGuts
guts
          { mg_binds =
              fst
                ( doLateCostCenters
                    env
                    (initLateCCState ())
                    (topLevelBindsCC (const True))
                    (mg_binds guts)
                )
          }
    return guts'

-- | Insert cost centres on top-level bindings in the module, depending on
-- whether or not they satisfy the given predicate.
topLevelBindsCC :: (CoreExpr -> Bool) -> CoreBind -> LateCCM s CoreBind
topLevelBindsCC :: forall s. (CoreExpr -> Bool) -> CoreBind -> LateCCM s CoreBind
topLevelBindsCC CoreExpr -> Bool
pred CoreBind
core_bind =
    case CoreBind
core_bind of
      NonRec CoreBndr
b CoreExpr
rhs ->
        CoreBndr -> CoreExpr -> CoreBind
forall b. b -> Expr b -> Bind b
NonRec CoreBndr
b (CoreExpr -> CoreBind)
-> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
-> LateCCM s CoreBind
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> CoreBndr
-> CoreExpr -> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
forall s. CoreBndr -> CoreExpr -> LateCCM s CoreExpr
doBndr CoreBndr
b CoreExpr
rhs
      Rec [(CoreBndr, CoreExpr)]
bs ->
        [(CoreBndr, CoreExpr)] -> CoreBind
forall b. [(b, Expr b)] -> Bind b
Rec ([(CoreBndr, CoreExpr)] -> CoreBind)
-> ReaderT LateCCEnv (State (LateCCState s)) [(CoreBndr, CoreExpr)]
-> LateCCM s CoreBind
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((CoreBndr, CoreExpr)
 -> ReaderT LateCCEnv (State (LateCCState s)) (CoreBndr, CoreExpr))
-> [(CoreBndr, CoreExpr)]
-> ReaderT LateCCEnv (State (LateCCState s)) [(CoreBndr, CoreExpr)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (CoreBndr, CoreExpr)
-> ReaderT LateCCEnv (State (LateCCState s)) (CoreBndr, CoreExpr)
forall s. (CoreBndr, CoreExpr) -> LateCCM s (CoreBndr, CoreExpr)
doPair [(CoreBndr, CoreExpr)]
bs
  where
    doPair :: ((Id, CoreExpr) -> LateCCM s (Id, CoreExpr))
    doPair :: forall s. (CoreBndr, CoreExpr) -> LateCCM s (CoreBndr, CoreExpr)
doPair (CoreBndr
b,CoreExpr
rhs) = (CoreBndr
b,) (CoreExpr -> (CoreBndr, CoreExpr))
-> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
-> ReaderT LateCCEnv (State (LateCCState s)) (CoreBndr, CoreExpr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> CoreBndr
-> CoreExpr -> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
forall s. CoreBndr -> CoreExpr -> LateCCM s CoreExpr
doBndr CoreBndr
b CoreExpr
rhs

    doBndr :: Id -> CoreExpr -> LateCCM s CoreExpr
    doBndr :: forall s. CoreBndr -> CoreExpr -> LateCCM s CoreExpr
doBndr CoreBndr
bndr CoreExpr
rhs
      -- Not a constructor worker.
      -- Cost centres on constructor workers are pretty much useless so we don't emit them
      -- if we are looking at the rhs of a constructor binding.
      | Maybe DataCon -> Bool
forall a. Maybe a -> Bool
isNothing (CoreBndr -> Maybe DataCon
isDataConId_maybe CoreBndr
bndr)
      , CoreExpr -> Bool
pred CoreExpr
rhs
      = CoreBndr -> CoreExpr -> LateCCM s CoreExpr
forall s. CoreBndr -> CoreExpr -> LateCCM s CoreExpr
addCC CoreBndr
bndr CoreExpr
rhs
      | Bool
otherwise = CoreExpr -> LateCCM s CoreExpr
forall a. a -> ReaderT LateCCEnv (State (LateCCState s)) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure CoreExpr
rhs

    -- We want to put the cost centre below the lambda as we only care about
    -- executions of the RHS. Note that the lambdas might be hidden under ticks
    -- or casts. So look through these as well.
    addCC :: Id -> CoreExpr -> LateCCM s CoreExpr
    addCC :: forall s. CoreBndr -> CoreExpr -> LateCCM s CoreExpr
addCC CoreBndr
bndr (Cast CoreExpr
rhs CoercionR
co) = (CoreExpr -> CoercionR -> CoreExpr)
-> ReaderT
     LateCCEnv
     (State (LateCCState s))
     (CoreExpr -> CoercionR -> CoreExpr)
forall a. a -> ReaderT LateCCEnv (State (LateCCState s)) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure CoreExpr -> CoercionR -> CoreExpr
forall b. Expr b -> CoercionR -> Expr b
Cast ReaderT
  LateCCEnv
  (State (LateCCState s))
  (CoreExpr -> CoercionR -> CoreExpr)
-> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
-> ReaderT
     LateCCEnv (State (LateCCState s)) (CoercionR -> CoreExpr)
forall a b.
ReaderT LateCCEnv (State (LateCCState s)) (a -> b)
-> ReaderT LateCCEnv (State (LateCCState s)) a
-> ReaderT LateCCEnv (State (LateCCState s)) b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> CoreBndr
-> CoreExpr -> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
forall s. CoreBndr -> CoreExpr -> LateCCM s CoreExpr
addCC CoreBndr
bndr CoreExpr
rhs ReaderT LateCCEnv (State (LateCCState s)) (CoercionR -> CoreExpr)
-> ReaderT LateCCEnv (State (LateCCState s)) CoercionR
-> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
forall a b.
ReaderT LateCCEnv (State (LateCCState s)) (a -> b)
-> ReaderT LateCCEnv (State (LateCCState s)) a
-> ReaderT LateCCEnv (State (LateCCState s)) b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> CoercionR -> ReaderT LateCCEnv (State (LateCCState s)) CoercionR
forall a. a -> ReaderT LateCCEnv (State (LateCCState s)) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure CoercionR
co
    addCC CoreBndr
bndr (Tick CoreTickish
t CoreExpr
rhs) = (CoreTickish -> CoreExpr -> CoreExpr
forall b. CoreTickish -> Expr b -> Expr b
Tick CoreTickish
t) (CoreExpr -> CoreExpr)
-> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
-> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> CoreBndr
-> CoreExpr -> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
forall s. CoreBndr -> CoreExpr -> LateCCM s CoreExpr
addCC CoreBndr
bndr CoreExpr
rhs
    addCC CoreBndr
bndr (Lam CoreBndr
b CoreExpr
rhs) = CoreBndr -> CoreExpr -> CoreExpr
forall b. b -> Expr b -> Expr b
Lam CoreBndr
b (CoreExpr -> CoreExpr)
-> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
-> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> CoreBndr
-> CoreExpr -> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
forall s. CoreBndr -> CoreExpr -> LateCCM s CoreExpr
addCC CoreBndr
bndr CoreExpr
rhs
    addCC CoreBndr
bndr CoreExpr
rhs = do
      let name :: Name
name = CoreBndr -> Name
idName CoreBndr
bndr
          cc_loc :: SrcSpan
cc_loc = Name -> SrcSpan
nameSrcSpan Name
name
          cc_name :: FastString
cc_name = Name -> FastString
forall a. NamedThing a => a -> FastString
getOccFS Name
name
      FastString
-> SrcSpan
-> CoreExpr
-> ReaderT LateCCEnv (State (LateCCState s)) CoreExpr
forall s. FastString -> SrcSpan -> CoreExpr -> LateCCM s CoreExpr
insertCC FastString
cc_name SrcSpan
cc_loc CoreExpr
rhs