{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE TypeFamilies #-}

-----------------------------------------------------------------------------
--
-- Stg to C-- code generation: expressions
--
-- (c) The University of Glasgow 2004-2006
--
-----------------------------------------------------------------------------

module GHC.StgToCmm.Expr ( cgExpr, cgLit ) where

import GHC.Prelude hiding ((<*>))

import {-# SOURCE #-} GHC.StgToCmm.Bind ( cgBind )

import GHC.StgToCmm.Monad
import GHC.StgToCmm.Heap
import GHC.StgToCmm.Env
import GHC.StgToCmm.DataCon
import GHC.StgToCmm.Prof (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
import GHC.StgToCmm.Layout
import GHC.StgToCmm.Lit
import GHC.StgToCmm.Prim
import GHC.StgToCmm.Hpc
import GHC.StgToCmm.TagCheck
import GHC.StgToCmm.Ticky
import GHC.StgToCmm.Utils
import GHC.StgToCmm.Closure

import GHC.Stg.Syntax

import GHC.Cmm.Graph
import GHC.Cmm.BlockId
import GHC.Cmm hiding ( succ )
import GHC.Cmm.Info
import GHC.Cmm.Utils ( cmmTagMask, mkWordCLit, mAX_PTR_TAG )
import GHC.Core
import GHC.Core.DataCon
import GHC.Types.ForeignCall
import GHC.Types.Id
import GHC.Builtin.PrimOps
import GHC.Core.TyCon
import GHC.Core.Type        ( isUnliftedType )
import GHC.Types.RepType    ( isZeroBitTy, countConRepArgs, mightBeFunTy )
import GHC.Types.CostCentre ( CostCentreStack, currentCCS )
import GHC.Types.Tickish
import GHC.Data.Maybe
import GHC.Utils.Misc
import GHC.Data.FastString
import GHC.Utils.Outputable
import GHC.Utils.Panic

import Control.Monad ( unless, void )
import Control.Arrow ( first )
import Data.List     ( partition )
import GHC.Stg.EnforceEpt.TagSig (isTaggedSig)
import GHC.Platform.Profile (profileIsProfiling)

------------------------------------------------------------------------
--              cgExpr: the main function
------------------------------------------------------------------------

cgExpr  :: CgStgExpr -> FCode ReturnKind

cgExpr :: CgStgExpr -> FCode ReturnKind
cgExpr (StgApp Id
fun [StgArg]
args)     = Id -> [StgArg] -> FCode ReturnKind
cgIdApp Id
fun [StgArg]
args

-- dataToTagSmall# :: a_levpoly -> Int#
-- See Note [DataToTag overview] in GHC.Tc.Instance.Class,
-- particularly wrinkles H3 and DTW4
cgExpr (StgOpApp (StgPrimOp PrimOp
DataToTagSmallOp) [StgVarArg Id
a] Type
_res_ty) = do
  platform <- FCode Platform
getPlatform
  emitComment (mkFastString "dataToTagSmall#")

  a_eval_reg <- newTemp (bWord platform)
  _ <- withSequel (AssignTo [a_eval_reg] False) (cgIdApp a [])
  let a_eval_expr = CmmReg -> CmmExpr
CmmReg (LocalReg -> CmmReg
CmmLocal LocalReg
a_eval_reg)
      tag1 = Platform -> CmmExpr -> CmmExpr
cmmConstrTag1 Platform
platform CmmExpr
a_eval_expr

  -- subtract 1 because we need to return a zero-indexed tag
  emitReturn [cmmSubWord platform tag1 (CmmLit $ mkWordCLit platform 1)]

-- dataToTagLarge# :: a_levpoly -> Int#
-- See Note [DataToTag overview] in GHC.Tc.Instance.Class,
-- particularly wrinkles H3 and DTW4
cgExpr (StgOpApp (StgPrimOp PrimOp
DataToTagLargeOp) [StgVarArg Id
a] Type
_res_ty) = do
  platform <- FCode Platform
getPlatform
  emitComment (mkFastString "dataToTagLarge#")

  a_eval_reg <- newTemp (bWord platform)
  _ <- withSequel (AssignTo [a_eval_reg] False) (cgIdApp a [])
  let a_eval_expr = CmmReg -> CmmExpr
CmmReg (LocalReg -> CmmReg
CmmLocal LocalReg
a_eval_reg)

  tag1_reg <- assignTemp $ cmmConstrTag1 platform a_eval_expr
  result_reg <- newTemp (bWord platform)
  let tag1_expr = CmmReg -> CmmExpr
CmmReg (CmmReg -> CmmExpr) -> CmmReg -> CmmExpr
forall a b. (a -> b) -> a -> b
$ LocalReg -> CmmReg
CmmLocal LocalReg
tag1_reg
      is_too_big_tag = Platform -> CmmExpr -> CmmExpr -> CmmExpr
cmmEqWord Platform
platform CmmExpr
tag1_expr (Platform -> CmmExpr
cmmTagMask Platform
platform)

  -- Return the constructor index from the pointer tag
  -- (Used if pointer tag is small enough to be unambiguous)
  return_ptr_tag <- getCode $ do
    emitAssign (CmmLocal result_reg)
      $ cmmSubWord platform tag1_expr (CmmLit $ mkWordCLit platform 1)

  -- Return the constructor index recorded in the info table
  return_info_tag <- getCode $ do
    profile     <- getProfile
    align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig
    emitAssign (CmmLocal result_reg)
      $ getConstrTag profile align_check (cmmUntag platform a_eval_expr)

  emit =<< mkCmmIfThenElse' is_too_big_tag return_info_tag return_ptr_tag (Just False)
  emitReturn [CmmReg $ CmmLocal result_reg]


cgExpr (StgOpApp StgOp
op [StgArg]
args Type
ty) = StgOp -> [StgArg] -> Type -> FCode ReturnKind
cgOpApp StgOp
op [StgArg]
args Type
ty
cgExpr (StgConApp DataCon
con ConstructorNumber
mn [StgArg]
args [[PrimRep]]
_) = DataCon -> ConstructorNumber -> [StgArg] -> FCode ReturnKind
cgConApp DataCon
con ConstructorNumber
mn [StgArg]
args
cgExpr (StgTick StgTickish
t CgStgExpr
e)         = StgTickish -> FCode ()
cgTick StgTickish
t FCode () -> FCode ReturnKind -> FCode ReturnKind
forall a b. FCode a -> FCode b -> FCode b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> CgStgExpr -> FCode ReturnKind
cgExpr CgStgExpr
e
cgExpr (StgLit Literal
lit)          = do cmm_expr <- Literal -> FCode CmmExpr
cgLit Literal
lit
                                  emitReturn [cmm_expr]

cgExpr (StgLet XLet 'CodeGen
_ GenStgBinding 'CodeGen
binds CgStgExpr
expr) = do { GenStgBinding 'CodeGen -> FCode ()
cgBind GenStgBinding 'CodeGen
binds;     CgStgExpr -> FCode ReturnKind
cgExpr CgStgExpr
expr }
cgExpr (StgLetNoEscape XLetNoEscape 'CodeGen
_ GenStgBinding 'CodeGen
binds CgStgExpr
expr) =
  do { u <- FCode Unique
newUnique
     ; let join_id = Unique -> BlockId
mkBlockId Unique
u
     ; cgLneBinds join_id binds
     ; r <- cgExpr expr
     ; emitLabel join_id
     ; return r }

cgExpr (StgCase CgStgExpr
expr BinderP 'CodeGen
bndr AltType
alt_type [GenStgAlt 'CodeGen]
alts) =
  CgStgExpr
-> Id -> AltType -> [GenStgAlt 'CodeGen] -> FCode ReturnKind
cgCase CgStgExpr
expr Id
BinderP 'CodeGen
bndr AltType
alt_type [GenStgAlt 'CodeGen]
alts

------------------------------------------------------------------------
--              Let no escape
------------------------------------------------------------------------

{- Generating code for a let-no-escape binding, aka join point is very
very similar to what we do for a case expression.  The duality is
between
        let-no-escape x = b
        in e
and
        case e of ... -> b

That is, the RHS of 'x' (ie 'b') will execute *later*, just like
the alternative of the case; it needs to be compiled in an environment
in which all volatile bindings are forgotten, and the free vars are
bound only to stable things like stack locations..  The 'e' part will
execute *next*, just like the scrutinee of a case. -}

-------------------------
cgLneBinds :: BlockId -> CgStgBinding -> FCode ()
cgLneBinds :: BlockId -> GenStgBinding 'CodeGen -> FCode ()
cgLneBinds BlockId
join_id (StgNonRec BinderP 'CodeGen
bndr GenStgRhs 'CodeGen
rhs)
  = do  { local_cc <- FCode (Maybe LocalReg)
saveCurrentCostCentre
                -- See Note [Saving the current cost centre]
        ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
        ; fcode
        ; addBindC info }

cgLneBinds BlockId
join_id (StgRec [(BinderP 'CodeGen, GenStgRhs 'CodeGen)]
pairs)
  = do  { local_cc <- FCode (Maybe LocalReg)
saveCurrentCostCentre
        ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
        ; let (infos, fcodes) = unzip r
        ; addBindsC infos
        ; sequence_ fcodes
        }

-------------------------
cgLetNoEscapeRhs
    :: BlockId          -- join point for successor of let-no-escape
    -> Maybe LocalReg   -- Saved cost centre
    -> Id
    -> CgStgRhs
    -> FCode (CgIdInfo, FCode ())

cgLetNoEscapeRhs :: BlockId
-> Maybe LocalReg
-> Id
-> GenStgRhs 'CodeGen
-> FCode (CgIdInfo, FCode ())
cgLetNoEscapeRhs BlockId
join_id Maybe LocalReg
local_cc Id
bndr GenStgRhs 'CodeGen
rhs =
  do { (info, rhs_code) <- Maybe LocalReg
-> Id -> GenStgRhs 'CodeGen -> FCode (CgIdInfo, FCode ())
cgLetNoEscapeRhsBody Maybe LocalReg
local_cc Id
bndr GenStgRhs 'CodeGen
rhs
     ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
     ; let code = do { (_, body) <- FCode () -> FCode ((), CmmAGraphScoped)
forall a. FCode a -> FCode (a, CmmAGraphScoped)
getCodeScoped FCode ()
rhs_code
                     ; emitOutOfLine bid (first (<*> mkBranch join_id) body) }
     ; return (info, code)
     }

cgLetNoEscapeRhsBody
    :: Maybe LocalReg   -- Saved cost centre
    -> Id
    -> CgStgRhs
    -> FCode (CgIdInfo, FCode ())
cgLetNoEscapeRhsBody :: Maybe LocalReg
-> Id -> GenStgRhs 'CodeGen -> FCode (CgIdInfo, FCode ())
cgLetNoEscapeRhsBody Maybe LocalReg
local_cc Id
bndr (StgRhsClosure XRhsClosure 'CodeGen
_ CostCentreStack
cc UpdateFlag
_upd [BinderP 'CodeGen]
args CgStgExpr
body Type
_typ)
  = Id
-> Maybe LocalReg
-> CostCentreStack
-> [NonVoid Id]
-> CgStgExpr
-> FCode (CgIdInfo, FCode ())
cgLetNoEscapeClosure Id
bndr Maybe LocalReg
local_cc CostCentreStack
cc ([Id] -> [NonVoid Id]
nonVoidIds [Id]
[BinderP 'CodeGen]
args) CgStgExpr
body
cgLetNoEscapeRhsBody Maybe LocalReg
local_cc Id
bndr (StgRhsCon CostCentreStack
cc DataCon
con ConstructorNumber
mn [StgTickish]
_ts [StgArg]
args Type
_typ)
  = Id
-> Maybe LocalReg
-> CostCentreStack
-> [NonVoid Id]
-> CgStgExpr
-> FCode (CgIdInfo, FCode ())
cgLetNoEscapeClosure Id
bndr Maybe LocalReg
local_cc CostCentreStack
cc []
      (DataCon
-> ConstructorNumber -> [StgArg] -> [[PrimRep]] -> CgStgExpr
forall (pass :: StgPass).
DataCon
-> ConstructorNumber -> [StgArg] -> [[PrimRep]] -> GenStgExpr pass
StgConApp DataCon
con ConstructorNumber
mn [StgArg]
args (String -> SDoc -> [[PrimRep]]
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"cgLetNoEscapeRhsBody" (SDoc -> [[PrimRep]]) -> SDoc -> [[PrimRep]]
forall a b. (a -> b) -> a -> b
$
                           String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"StgRhsCon doesn't have type args"))
        -- For a constructor RHS we want to generate a single chunk of
        -- code which can be jumped to from many places, which will
        -- return the constructor. It's easy; just behave as if it
        -- was an StgRhsClosure with a ConApp inside!

-------------------------
cgLetNoEscapeClosure
        :: Id                   -- binder
        -> Maybe LocalReg       -- Slot for saved current cost centre
        -> CostCentreStack      -- XXX: *** NOT USED *** why not?
        -> [NonVoid Id]         -- Args (as in \ args -> body)
        -> CgStgExpr            -- Body (as in above)
        -> FCode (CgIdInfo, FCode ())

cgLetNoEscapeClosure :: Id
-> Maybe LocalReg
-> CostCentreStack
-> [NonVoid Id]
-> CgStgExpr
-> FCode (CgIdInfo, FCode ())
cgLetNoEscapeClosure Id
bndr Maybe LocalReg
cc_slot CostCentreStack
_unused_cc [NonVoid Id]
args CgStgExpr
body
  = do platform <- FCode Platform
getPlatform
       let code = FCode () -> FCode ()
forall a. FCode a -> FCode a
forkLneBody (FCode () -> FCode ()) -> FCode () -> FCode ()
forall a b. (a -> b) -> a -> b
$ Id -> [NonVoid Id] -> FCode () -> FCode ()
forall a. Id -> [NonVoid Id] -> FCode a -> FCode a
withNewTickyCounterLNE Id
bndr [NonVoid Id]
args (FCode () -> FCode ()) -> FCode () -> FCode ()
forall a b. (a -> b) -> a -> b
$ do
                { Platform -> Maybe LocalReg -> FCode ()
restoreCurrentCostCentre Platform
platform Maybe LocalReg
cc_slot
                ; arg_regs <- [NonVoid Id] -> FCode [LocalReg]
bindArgsToRegs [NonVoid Id]
args
                ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
       return ( lneIdInfo platform bndr args, code )


------------------------------------------------------------------------
--              Case expressions
------------------------------------------------------------------------

{- Note [Compiling case expressions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
It is quite interesting to decide whether to put a heap-check at the
start of each alternative.  Of course we certainly have to do so if
the case forces an evaluation, or if there is a primitive op which can
trigger GC.

NB: things are not settled here: see #8326.

A more interesting situation is this (a Plan-B situation)

        !P!;
        ...P...
        case x# of
          0#      -> !Q!; ...Q...
          default -> !R!; ...R...

where !x! indicates a possible heap-check point. The heap checks
in the alternatives *can* be omitted, in which case the topmost
heapcheck will take their worst case into account.

In favour of omitting !Q!, !R!:

 - *May* save a heap overflow test,
   if ...P... allocates anything.

 - We can use relative addressing from a single Hp to
   get at all the closures so allocated.

 - No need to save volatile vars etc across heap checks
   in !Q!, !R!

Against omitting !Q!, !R!

  - May put a heap-check into the inner loop.  Suppose
        the main loop is P -> R -> P -> R...
        Q is the loop exit, and only it does allocation.
    This only hurts us if P does no allocation.  If P allocates,
    then there is a heap check in the inner loop anyway.

  - May do more allocation than reqd.  This sometimes bites us
    badly.  For example, nfib (ha!) allocates about 30\% more space if the
    worst-casing is done, because many many calls to nfib are leaf calls
    which don't need to allocate anything.

    We can un-allocate, but that costs an instruction

Neither problem hurts us if there is only one alternative.

Suppose the inner loop is P->R->P->R etc.  Then here is
how many heap checks we get in the *inner loop* under various
conditions

  Alloc   Heap check in branches (!Q!, !R!)?
  P Q R      yes     no (absorb to !P!)
--------------------------------------
  n n n      0          0
  n y n      0          1
  n . y      1          1
  y . y      2          1
  y . n      1          1

Best choices: absorb heap checks from Q and R into !P! iff
  a) P itself does some allocation
or
  b) P does allocation, or there is exactly one alternative

We adopt (b) because that is more likely to put the heap check at the
entry to a function, when not many things are live.  After a bunch of
single-branch cases, we may have lots of things live

Hence: Two basic plans for

        case e of r { alts }

------ Plan A: the general case ---------

        ...save current cost centre...

        ...code for e,
           with sequel (SetLocals r)

        ...restore current cost centre...
        ...code for alts...
        ...alts do their own heap checks

   When using GcInAlts the return point for heap checks and evaluating
   the scrutinee is shared. This does mean we might execute the actual
   branching code twice but it's rare enough to not matter.
   The huge advantage of this pattern is that we do not require multiple
   info tables for returning from gc as they can be shared between all
   cases. Reducing code size nicely.

------ Plan B: special case when ---------
  (i)  e does not allocate or call GC
  (ii) either upstream code performs allocation
       or there is just one alternative

  Then heap allocation in the (single) case branch
  is absorbed by the upstream check.
  Very common example: primops on unboxed values

        ...code for e,
           with sequel (SetLocals r)...

        ...code for alts...
        ...no heap check...

   There is a variant B.2 which we use if:

  (i)   e is already evaluated+tagged
  (ii)  We have multiple alternatives
  (iii) and there is no upstream allocation.

  Here we also place one heap check before the `case` which
  branches on `e`. Hopefully to be absorbed by an already existing
  heap check further up. However the big difference in this case is that
  there is no code for e. So we are not guaranteed that the heap
  checks of the alts will be combined with an heap check further up.

  Very common example: Casing on strict fields.

        ...heap check...
        ...assign bindings...

        ...code for alts...
        ...no heap check...

  -- Reasoning for Plan B.2:
   Since the scrutinee is already evaluated there is no evaluation
   call which would force a info table that we can use as a shared
   return point.
   This means currently if we were to do GcInAlts like in Plan A then
   we would end up with one info table per alternative.

   To avoid this we unconditionally do gc outside of the alts with all
   the pros and cons described in Note [Compiling case expressions].
   Rewriting the logic to generate a shared return point before the case
   expression while keeping the heap checks in the alternatives would be
   possible. But it's unclear to me that this would actually be an improvement.

   This means if we have code along these lines:

      g x y = case x of
         True -> Left $ (y + 1,y,y-1)
         False -> Right $! y - (2 :: Int)

   We get these potential heap check placements:

   f = ...
      !max(L,R)!; -- Might be absorbed upstream.
      case x of
         True  -> !L!; ...L...
         False -> !R!; ...R...

   And we place a heap check at !max(L,R)!

   The downsides of using !max(L,R)! are:

   * If f is recursive, and the hot loop wouldn't allocate, but the exit branch does then we do
   a redundant heap check.
   * We use one more instruction to de-allocate the unused heap in the branch using less heap. (Negligible)
   * A small risk of running gc slightly more often than needed especially if one branch allocates a lot.

   The upsides are:
   * May save a heap overflow test if there is an upstream check already.
   * If the heap check is absorbed upstream we can also eliminate its info table.
   * We generate at most one heap check (versus one per alt otherwise).
   * No need to save volatile vars etc across heap checks in !L!, !R!
   * We can use relative addressing from a single Hp to get at all the closures so allocated. (seems neglible)
   * It fits neatly in the logic we already have for handling A/B

   For containers:Data/Sequence/Internal/Sorting.o the difference is
   about 10% in terms of code size compared to using Plan A for this case.
   The main downside is we might put heap checks into loops, even if we
   could avoid it (See Note [Compiling case expressions]).

   Potential improvement: Investigate if heap checks in alts would be an
   improvement if we generate and use a shared return point that is placed
   in the common path for all alts.

-}



-------------------------------------
data GcPlan
  = GcInAlts            -- Put a GC check at the start the case alternatives,
        [LocalReg]      -- which binds these registers
  | NoGcInAlts          -- The scrutinee is a primitive value, or a call to a
                        -- primitive op which does no GC.  Absorb the allocation
                        -- of the case alternative(s) into the upstream check

-------------------------------------
cgCase :: CgStgExpr -> Id -> AltType -> [CgStgAlt] -> FCode ReturnKind

{-
Note [Scrutinising VoidRep]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Suppose we have this STG code:
   f = \[s : State# RealWorld] ->
       case s of _ -> blah
This is very odd.  Why are we scrutinising a state token?  But it
can arise with bizarre NOINLINE pragmas (#9964)
    crash :: IO ()
    crash = IO (\s -> let {-# NOINLINE s' #-}
                          s' = s
                      in (# s', () #))

Now the trouble is that 's' has VoidRep, and we do not bind void
arguments in the environment; they don't live anywhere.  See the
calls to nonVoidIds in various places.  So we must not look up
's' in the environment.  Instead, just evaluate the RHS!  Simple.

Note [Dead-binder optimisation]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider:

   case x of (y, z<dead>) -> rhs

where `z` is unused in `rhs`.  When we return form the eval of `x`,
GHC.StgToCmm.DataCon.bindConArgs will generate some loads, assuming the the
value of `x` is returned in R1:
   y := R1[1]
   z := R1[2]

If `z` is never used, the load `z := R1[2]` is a waste of a memory operation.
CmmSink (which sinks loads to their usage sites, if any) will eliminate the dead
load; but
  1. CmmSink only runs with -O
  2. It would save CmmSink work if we simply did not generate the load in the
  first place.

Hence STG uses dead-binder information, in `bindConArgs` to drop dead loads.
That's why we preserve occurrence-info on binders in GHC.Core.Tidy (see
GHC.Core.Tidy.tidyIdBndr).

So it's important that deadness is accurate.  But StgCse can invalidate it
(#14895 #24233).  Here is an example:

  map_either :: (a -> b) -> Either String a -> Either String b
  map_either = \f e -> case e of b<dead> {
    Right x -> Right (f x)
    Left  x -> Left x
  }

  The case-binder "b" is dead (not used in the rhss of the alternatives).
  StgCse notices that `Left x` doesn't need to be allocated as we can reuse `b`,
  and we get:

  map_either :: (a -> b) -> Either String a -> Either String b
  map_either = \f e -> case e of b { -- b no longer dead!
    Right x -> Right (f x)
    Left  x -> b
  }

For now StgCse simply zaps occurrence information on case binders. A more
accurate update would complexify the implementation and doesn't seem worth it.

-}

cgCase :: CgStgExpr
-> Id -> AltType -> [GenStgAlt 'CodeGen] -> FCode ReturnKind
cgCase (StgApp Id
v []) Id
_ (PrimAlt PrimRep
_) [GenStgAlt 'CodeGen]
alts
  | HasDebugCallStack => Type -> Bool
Type -> Bool
isZeroBitTy (Id -> Type
idType Id
v)  -- See Note [Scrutinising VoidRep]
  , [GenStgAlt{alt_con :: forall (pass :: StgPass). GenStgAlt pass -> AltCon
alt_con=AltCon
DEFAULT, alt_bndrs :: forall (pass :: StgPass). GenStgAlt pass -> [BinderP pass]
alt_bndrs=[BinderP 'CodeGen]
_, alt_rhs :: forall (pass :: StgPass). GenStgAlt pass -> GenStgExpr pass
alt_rhs=CgStgExpr
rhs}] <- [GenStgAlt 'CodeGen]
alts
  = CgStgExpr -> FCode ReturnKind
cgExpr CgStgExpr
rhs

{- Note [Dodgy unsafeCoerce 1]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider
    case (x :: HValue) |> co of (y :: MutVar# Int)
        DEFAULT -> ...
We want to generate an assignment
     y := x
We want to allow this assignment to be generated in the case when the
types are compatible, because this allows some slightly-dodgy but
occasionally-useful casts to be used, such as in GHC.Runtime.Heap.Inspect
where we cast an HValue to a MutVar# so we can print out the contents
of the MutVar#.  If instead we generate code that enters the HValue,
then we'll get a runtime panic, because the HValue really is a
MutVar#.  The types are compatible though, so we can just generate an
assignment.
-}
cgCase (StgApp Id
v []) Id
bndr alt_type :: AltType
alt_type@(PrimAlt PrimRep
_) [GenStgAlt 'CodeGen]
alts
  | HasDebugCallStack => Type -> Bool
Type -> Bool
isUnliftedType (Id -> Type
idType Id
v)  -- Note [Dodgy unsafeCoerce 1]
  = -- assignment suffices for unlifted types
    do { platform <- FCode Platform
getPlatform
       ; unless (reps_compatible platform) $
           pprPanic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
                    (pp_bndr v $$ pp_bndr bndr)
       ; v_info <- getCgIdInfo v
       ; emitAssign (CmmLocal (idToReg platform (NonVoid bndr)))
                    (idInfoToAmode v_info)
       -- Add bndr to the environment
       ; _ <- bindArgToReg (NonVoid bndr)
       ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
  where
    reps_compatible :: Platform -> Bool
reps_compatible Platform
platform = Platform -> PrimRep -> PrimRep -> Bool
primRepCompatible Platform
platform (Id -> PrimRep
idPrimRepU Id
v) (Id -> PrimRep
idPrimRepU Id
bndr)

    pp_bndr :: Id -> SDoc
pp_bndr Id
id = Id -> SDoc
forall a. Outputable a => a -> SDoc
ppr Id
id SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> SDoc
dcolon SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Type -> SDoc
forall a. Outputable a => a -> SDoc
ppr (Id -> Type
idType Id
id) SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc
parens (PrimRep -> SDoc
forall a. Outputable a => a -> SDoc
ppr (Id -> PrimRep
idPrimRepU Id
id))

{- Note [Dodgy unsafeCoerce 2, #3132]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In all other cases of a lifted Id being cast to an unlifted type, the
Id should be bound to bottom, otherwise this is an unsafe use of
unsafeCoerce.  We can generate code to enter the Id and assume that
it will never return.  Hence, we emit the usual enter/return code, and
because bottom must be untagged, it will be entered.  The Sequel is a
type-correct assignment, albeit bogus.  The (dead) continuation loops;
it would be better to invoke some kind of panic function here.
-}
cgCase scrut :: CgStgExpr
scrut@(StgApp Id
v []) Id
_ (PrimAlt PrimRep
_) [GenStgAlt 'CodeGen]
_
  = do { platform <- FCode Platform
getPlatform
       ; mb_cc <- maybeSaveCostCentre True
       ; _ <- withSequel
                  (AssignTo [idToReg platform (NonVoid v)] False) (cgExpr scrut)
       ; restoreCurrentCostCentre platform mb_cc
       ; emitComment $ mkFastString "should be unreachable code"
       ; l <- newBlockId
       ; emitLabel l
       ; emit (mkBranch l)  -- an infinite loop
       ; return AssignedDirectly
       }

{-
Note [Eliminate trivial Solo# continuations]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If we have code like this:

    case scrut of bndr {
      alt -> Solo# bndr
    }

The RHS of the only branch does nothing except wrap the case-binder
returned by 'scrut' in a unary unboxed tuple.  But unboxed tuples
don't exist at run-time, i.e. the branch is a no-op!  So we can
generate code as if we just had 'scrut' instead of a case-expression.

This situation can easily arise for IO or ST code, where the last
operation a function performs is commonly 'pure $! someExpr'.
See also #24264 and !11778.  More concretely, as of December 2023,
when building a stage2 "perf+no_profiled_libs" ghc:

 * The special case is reached 398 times.
 * Of these, 158 have scrutinees that call a function or enter a
   potential thunk, and would need to push a useless stack frame if
   not for this optimisation.

We might consider rewriting such case expressions in GHC.Stg.CSE as a
slight extension of Note [All alternatives are the binder].  But the
RuntimeReps of 'bndr' and 'Solo# bndr' are not exactly the same, and
per Note [Typing the STG language] in GHC.Stg.Lint, we do expect Stg
code to remain RuntimeRep-correct.  So we just detect the situation in
StgToCmm instead.

Crucially, the return conventions for 'ty' and '(# ty #)' are compatible:
The returned value is passed in the same register(s) or stack slot in
both conventions, and the set of allowed return values for 'ty'
is a subset of the allowed return values for '(# ty #)':

 * For a lifted type 'ty', the return convention for 'ty' promises to
   return an evaluated-properly-tagged heap pointer, while a return
   type '(# ty #)' only promises to return a heap pointer to an object
   that can be evaluated later if need be.

 * If 'ty' is unlifted, the allowed return
   values for 'ty' and '(# ty #)' are identical.
-}

cgCase CgStgExpr
scrut Id
bndr AltType
_alt_type [GenStgAlt { alt_rhs :: forall (pass :: StgPass). GenStgAlt pass -> GenStgExpr pass
alt_rhs = CgStgExpr
rhs}]
  -- see Note [Eliminate trivial Solo# continuations]
  | StgConApp DataCon
dc ConstructorNumber
_ [StgVarArg Id
v] [[PrimRep]]
_ <- CgStgExpr
rhs
  , DataCon -> Bool
isUnboxedTupleDataCon DataCon
dc
  , Id
v Id -> Id -> Bool
forall a. Eq a => a -> a -> Bool
== Id
bndr
  = CgStgExpr -> FCode ReturnKind
cgExpr CgStgExpr
scrut

cgCase CgStgExpr
scrut Id
bndr AltType
alt_type [GenStgAlt 'CodeGen]
alts
  = -- the general case
    do { platform <- FCode Platform
getPlatform
       ; up_hp_usg <- getVirtHp        -- Upstream heap usage
       ; let ret_bndrs = Id -> AltType -> [GenStgAlt 'CodeGen] -> [NonVoid Id]
chooseReturnBndrs Id
bndr AltType
alt_type [GenStgAlt 'CodeGen]
alts
             alt_regs  = (NonVoid Id -> LocalReg) -> [NonVoid Id] -> [LocalReg]
forall a b. (a -> b) -> [a] -> [b]
map (Platform -> NonVoid Id -> LocalReg
idToReg Platform
platform) [NonVoid Id]
ret_bndrs

       ; simple_scrut <- isSimpleScrut scrut alt_type
       ; let do_gc  | CgStgExpr -> Bool
forall {pass :: StgPass}. GenStgExpr pass -> Bool
is_cmp_op CgStgExpr
scrut  = Bool
False  -- See Note [GC for conditionals]
                    | Bool -> Bool
not Bool
simple_scrut = Bool
True
                    | [GenStgAlt 'CodeGen] -> Bool
forall a. [a] -> Bool
isSingleton [GenStgAlt 'CodeGen]
alts = Bool
False
                    | RepArity
up_hp_usg RepArity -> RepArity -> Bool
forall a. Ord a => a -> a -> Bool
> RepArity
0    = Bool
False
                    | Bool
otherwise        = Bool
True
               -- cf Note [Compiling case expressions]
             gc_plan = if Bool
do_gc then [LocalReg] -> GcPlan
GcInAlts [LocalReg]
alt_regs else GcPlan
NoGcInAlts

       ; mb_cc <- maybeSaveCostCentre simple_scrut

       ; let sequel = [LocalReg] -> Bool -> Sequel
AssignTo [LocalReg]
alt_regs Bool
do_gc{- Note [scrut sequel] -}
       ; ret_kind <- withSequel sequel (cgExpr scrut)
       ; restoreCurrentCostCentre platform mb_cc
       ; _ <- bindArgsToRegs ret_bndrs
       ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
       }
  where
    is_cmp_op :: GenStgExpr pass -> Bool
is_cmp_op (StgOpApp (StgPrimOp PrimOp
op) [StgArg]
_ Type
_) = PrimOp -> Bool
isComparisonPrimOp PrimOp
op
    is_cmp_op GenStgExpr pass
_                             = Bool
False


{- Note [GC for conditionals]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
For comparison operators (`is_cmp_op`) it seems that we have always done
NoGcInAlts.  It's odd, and it's flagrantly inconsistent with the rules described
Note [Compiling case expressions].  However, that's the way it has been for ages
(there was some long-gone history involving tagToEnum#; see #13397, #8317, #8326).

Note [scrut sequel]
~~~~~~~~~~~~~~~~~~~
The job of the scrutinee is to assign its value(s) to alt_regs.
Additionally, if we plan to do a heap-check in the alternatives (see
Note [Compiling case expressions]), then we *must* retreat Hp to
recover any unused heap before passing control to the sequel.  If we
don't do this, then any unused heap will become slop because the heap
check will reset the heap usage. Slop in the heap breaks LDV profiling
(+RTS -hb) which needs to do a linear sweep through the nursery.


Note [Inlining out-of-line primops and heap checks]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If shouldInlinePrimOp returns True when called from GHC.StgToCmm.Expr for the
purpose of heap check placement, we *must* inline the primop later in
GHC.StgToCmm.Prim. If we don't things will go wrong.
-}

-----------------
maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
maybeSaveCostCentre Bool
simple_scrut
  | Bool
simple_scrut = Maybe LocalReg -> FCode (Maybe LocalReg)
forall a. a -> FCode a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe LocalReg
forall a. Maybe a
Nothing
  | Bool
otherwise    = FCode (Maybe LocalReg)
saveCurrentCostCentre


-----------------
isSimpleScrut :: CgStgExpr -> AltType -> FCode Bool
-- Simple scrutinee, does not block or allocate; hence safe to amalgamate
-- heap usage from alternatives into the stuff before the case
-- NB: if you get this wrong, and claim that the expression doesn't allocate
--     when it does, you'll deeply mess up allocation
isSimpleScrut :: CgStgExpr -> AltType -> FCode Bool
isSimpleScrut (StgOpApp StgOp
op [StgArg]
args Type
_) AltType
_         = StgOp -> [StgArg] -> FCode Bool
isSimpleOp StgOp
op [StgArg]
args
isSimpleScrut (StgLit Literal
_)           AltType
_         = Bool -> FCode Bool
forall a. a -> FCode a
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
True       -- case 1# of { 0# -> ..; ... }
isSimpleScrut (StgApp Id
_ [])    (PrimAlt PrimRep
_)   = Bool -> FCode Bool
forall a. a -> FCode a
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
True       -- case x# of { 0# -> ..; ... }
isSimpleScrut (StgApp Id
f [])   AltType
_
  | Just TagSig
sig <- Id -> Maybe TagSig
idTagSig_maybe Id
f
  , TagSig -> Bool
isTaggedSig TagSig
sig  -- case !x of { ... }
  = if Type -> Bool
mightBeFunTy (Id -> Type
idType Id
f)
      -- See Note [Evaluating functions with profiling] in rts/Apply.cmm
      then Bool -> Bool
not (Bool -> Bool) -> (Profile -> Bool) -> Profile -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Profile -> Bool
profileIsProfiling (Profile -> Bool) -> FCode Profile -> FCode Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> FCode Profile
getProfile
      else Bool -> FCode Bool
forall a. a -> FCode a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True
isSimpleScrut CgStgExpr
_                    AltType
_         = Bool -> FCode Bool
forall a. a -> FCode a
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False

isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
-- True iff the op cannot block or allocate
isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
isSimpleOp (StgFCallOp (CCall (CCallSpec CCallTarget
_ CCallConv
_ Safety
safe)) Type
_) [StgArg]
_ = Bool -> FCode Bool
forall a. a -> FCode a
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> FCode Bool) -> Bool -> FCode Bool
forall a b. (a -> b) -> a -> b
$! Bool -> Bool
not (Safety -> Bool
playSafe Safety
safe)
-- dataToTagSmall#/dataToTagLarge# evaluate an argument;
-- see Note [DataToTag overview] in GHC.Tc.Instance.Class
isSimpleOp (StgPrimOp PrimOp
DataToTagSmallOp) [StgArg]
_ = Bool -> FCode Bool
forall a. a -> FCode a
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False
isSimpleOp (StgPrimOp PrimOp
DataToTagLargeOp) [StgArg]
_ = Bool -> FCode Bool
forall a. a -> FCode a
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False
isSimpleOp (StgPrimOp PrimOp
op) [StgArg]
stg_args                  = do
    arg_exprs <- [StgArg] -> FCode [CmmExpr]
getNonVoidArgAmodes [StgArg]
stg_args
    cfg       <- getStgToCmmConfig
    -- See Note [Inlining out-of-line primops and heap checks]
    return $! shouldInlinePrimOp cfg op arg_exprs
isSimpleOp (StgPrimCallOp PrimCall
_) [StgArg]
_                           = Bool -> FCode Bool
forall a. a -> FCode a
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False

-----------------
chooseReturnBndrs :: Id -> AltType -> [CgStgAlt] -> [NonVoid Id]
-- These are the binders of a case that are assigned by the evaluation of the
-- scrutinee.
-- They're non-void, see Note [Post-unarisation invariants] in GHC.Stg.Unarise.
chooseReturnBndrs :: Id -> AltType -> [GenStgAlt 'CodeGen] -> [NonVoid Id]
chooseReturnBndrs Id
bndr (PrimAlt PrimRep
_) [GenStgAlt 'CodeGen]
_alts
  = [Id] -> [NonVoid Id]
assertNonVoidIds [Id
bndr]

chooseReturnBndrs Id
_bndr (MultiValAlt RepArity
n) [GenStgAlt 'CodeGen
alt]
  = Bool -> SDoc -> [NonVoid Id] -> [NonVoid Id]
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr ([Id]
[BinderP 'CodeGen]
ids [Id] -> RepArity -> Bool
forall a. [a] -> RepArity -> Bool
`lengthIs` RepArity
n) (RepArity -> SDoc
forall a. Outputable a => a -> SDoc
ppr RepArity
n SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ [Id] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [Id]
[BinderP 'CodeGen]
ids SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ Id -> SDoc
forall a. Outputable a => a -> SDoc
ppr Id
_bndr) ([NonVoid Id] -> [NonVoid Id]) -> [NonVoid Id] -> [NonVoid Id]
forall a b. (a -> b) -> a -> b
$
    [Id] -> [NonVoid Id]
assertNonVoidIds [Id]
[BinderP 'CodeGen]
ids     -- 'bndr' is not assigned!
    where ids :: [BinderP 'CodeGen]
ids = GenStgAlt 'CodeGen -> [BinderP 'CodeGen]
forall (pass :: StgPass). GenStgAlt pass -> [BinderP pass]
alt_bndrs GenStgAlt 'CodeGen
alt

chooseReturnBndrs Id
bndr (AlgAlt TyCon
_) [GenStgAlt 'CodeGen]
_alts
  = [Id] -> [NonVoid Id]
assertNonVoidIds [Id
bndr]  -- Only 'bndr' is assigned

chooseReturnBndrs Id
bndr AltType
PolyAlt [GenStgAlt 'CodeGen]
_alts
  = [Id] -> [NonVoid Id]
assertNonVoidIds [Id
bndr]  -- Only 'bndr' is assigned

chooseReturnBndrs Id
_ AltType
_ [GenStgAlt 'CodeGen]
_ = String -> [NonVoid Id]
forall a. HasCallStack => String -> a
panic String
"chooseReturnBndrs"
                             -- MultiValAlt has only one alternative

-------------------------------------
cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [CgStgAlt]
       -> FCode ReturnKind
-- At this point the result of the case are in the binders
cgAlts :: (GcPlan, ReturnKind)
-> NonVoid Id
-> AltType
-> [GenStgAlt 'CodeGen]
-> FCode ReturnKind
cgAlts (GcPlan, ReturnKind)
gc_plan NonVoid Id
_bndr AltType
PolyAlt [GenStgAlt 'CodeGen
alt]
  = (GcPlan, ReturnKind) -> FCode ReturnKind -> FCode ReturnKind
forall a. (GcPlan, ReturnKind) -> FCode a -> FCode a
maybeAltHeapCheck (GcPlan, ReturnKind)
gc_plan (CgStgExpr -> FCode ReturnKind
cgExpr (CgStgExpr -> FCode ReturnKind) -> CgStgExpr -> FCode ReturnKind
forall a b. (a -> b) -> a -> b
$ GenStgAlt 'CodeGen -> CgStgExpr
forall (pass :: StgPass). GenStgAlt pass -> GenStgExpr pass
alt_rhs GenStgAlt 'CodeGen
alt)

cgAlts (GcPlan, ReturnKind)
gc_plan NonVoid Id
_bndr (MultiValAlt RepArity
_) [GenStgAlt 'CodeGen
alt]
  = (GcPlan, ReturnKind) -> FCode ReturnKind -> FCode ReturnKind
forall a. (GcPlan, ReturnKind) -> FCode a -> FCode a
maybeAltHeapCheck (GcPlan, ReturnKind)
gc_plan (CgStgExpr -> FCode ReturnKind
cgExpr (CgStgExpr -> FCode ReturnKind) -> CgStgExpr -> FCode ReturnKind
forall a b. (a -> b) -> a -> b
$ GenStgAlt 'CodeGen -> CgStgExpr
forall (pass :: StgPass). GenStgAlt pass -> GenStgExpr pass
alt_rhs GenStgAlt 'CodeGen
alt)
        -- Here bndrs are *already* in scope, so don't rebind them

cgAlts (GcPlan, ReturnKind)
gc_plan NonVoid Id
bndr (PrimAlt PrimRep
_) [GenStgAlt 'CodeGen]
alts
  = do  { platform <- FCode Platform
getPlatform

        ; tagged_cmms <- cgAltRhss gc_plan bndr alts

        ; let bndr_reg = LocalReg -> CmmReg
CmmLocal (Platform -> NonVoid Id -> LocalReg
idToReg Platform
platform NonVoid Id
bndr)
              (DEFAULT,deflt) = head tagged_cmms
                -- PrimAlts always have a DEFAULT case
                -- and it always comes first

              tagged_cmms' = [(Literal
lit,CmmAGraphScoped
code)
                             | (LitAlt Literal
lit, CmmAGraphScoped
code) <- [(AltCon, CmmAGraphScoped)]
tagged_cmms]
        ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
        ; return AssignedDirectly }

cgAlts (GcPlan, ReturnKind)
gc_plan NonVoid Id
bndr (AlgAlt TyCon
tycon) [GenStgAlt 'CodeGen]
alts
  = do  { platform <- FCode Platform
getPlatform

        ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts

        ; let !fam_sz   = TyCon -> RepArity
tyConFamilySize TyCon
tycon
              !bndr_reg = LocalReg -> CmmReg
CmmLocal (Platform -> NonVoid Id -> LocalReg
idToReg Platform
platform NonVoid Id
bndr)
              !ptag_expr = Platform -> CmmExpr -> CmmExpr
cmmConstrTag1 Platform
platform (CmmReg -> CmmExpr
CmmReg CmmReg
bndr_reg)
              !branches' = (RepArity -> RepArity)
-> (RepArity, CmmAGraphScoped) -> (RepArity, CmmAGraphScoped)
forall b c d. (b -> c) -> (b, d) -> (c, d)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first RepArity -> RepArity
forall a. Enum a => a -> a
succ ((RepArity, CmmAGraphScoped) -> (RepArity, CmmAGraphScoped))
-> [(RepArity, CmmAGraphScoped)] -> [(RepArity, CmmAGraphScoped)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(RepArity, CmmAGraphScoped)]
branches
              !maxpt = Platform -> RepArity
mAX_PTR_TAG Platform
platform
              (!via_ptr, !via_info) = partition ((< maxpt) . fst) branches'
              !small = Platform -> RepArity -> Bool
isSmallFamily Platform
platform RepArity
fam_sz

                -- Is the constructor tag in the node reg?
                -- See Note [Tagging big families]
        ; if small || null via_info
           then -- Yes, bndr_reg has constructor tag in ls bits
               emitSwitch ptag_expr branches' mb_deflt 1
                 (if small then fam_sz else maxpt)

           else -- No, the get exact tag from info table when mAX_PTR_TAG
                -- See Note [Double switching for big families]
              do
                profile     <- getProfile
                align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig
                let !untagged_ptr = Platform -> CmmExpr -> CmmExpr
cmmUntag Platform
platform (CmmReg -> CmmExpr
CmmReg CmmReg
bndr_reg)
                    !itag_expr = Profile -> Bool -> CmmExpr -> CmmExpr
getConstrTag Profile
profile Bool
align_check CmmExpr
untagged_ptr
                    !info0 = (RepArity -> RepArity)
-> (RepArity, CmmAGraphScoped) -> (RepArity, CmmAGraphScoped)
forall b c d. (b -> c) -> (b, d) -> (c, d)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first RepArity -> RepArity
forall a. Enum a => a -> a
pred ((RepArity, CmmAGraphScoped) -> (RepArity, CmmAGraphScoped))
-> [(RepArity, CmmAGraphScoped)] -> [(RepArity, CmmAGraphScoped)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(RepArity, CmmAGraphScoped)]
via_info
                if null via_ptr then
                  emitSwitch itag_expr info0 mb_deflt 0 (fam_sz - 1)
                else do
                  infos_lbl <- newBlockId
                  infos_scp <- getTickScope

                  let spillover = (RepArity
maxpt, (BlockId -> CmmAGraph
mkBranch BlockId
infos_lbl, CmmTickScope
infos_scp))

                  (mb_shared_deflt, mb_shared_branch) <- case mb_deflt of
                      (Just (CmmAGraph
stmts, CmmTickScope
scp)) ->
                          do lbl <- FCode BlockId
forall (m :: * -> *). MonadGetUnique m => m BlockId
newBlockId
                             return ( Just (mkLabel lbl scp <*> stmts, scp)
                                    , Just (mkBranch lbl, scp))
                      Maybe CmmAGraphScoped
_ -> (Maybe CmmAGraphScoped, Maybe CmmAGraphScoped)
-> FCode (Maybe CmmAGraphScoped, Maybe CmmAGraphScoped)
forall a. a -> FCode a
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe CmmAGraphScoped
forall a. Maybe a
Nothing, Maybe CmmAGraphScoped
forall a. Maybe a
Nothing)
                  -- Switch on pointer tag
                  emitSwitch ptag_expr (spillover : via_ptr) mb_shared_deflt 1 maxpt
                  join_lbl <- newBlockId
                  emit (mkBranch join_lbl)
                  -- Switch on info table tag
                  emitLabel infos_lbl
                  emitSwitch itag_expr info0 mb_shared_branch
                    (maxpt - 1) (fam_sz - 1)
                  emitLabel join_lbl

        ; return AssignedDirectly }

cgAlts (GcPlan, ReturnKind)
_ NonVoid Id
_ AltType
_ [GenStgAlt 'CodeGen]
_ = String -> FCode ReturnKind
forall a. HasCallStack => String -> a
panic String
"cgAlts"
        -- UbxTupAlt and PolyAlt have only one alternative

-- Note [Double switching for big families]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- An algebraic data type can have a n >= 0 summands
-- (or alternatives), which are identified (labeled) by
-- constructors. In memory they are kept apart by tags
-- (see Note [Data constructor dynamic tags] in GHC.StgToCmm.Closure).
-- Due to the characteristics of the platform that
-- contribute to the alignment of memory objects, there
-- is a natural limit of information about constructors
-- that can be encoded in the pointer tag. When the mapping
-- of constructors to the pointer tag range 1..mAX_PTR_TAG
-- is not injective, then we have a "big data type", also
-- called a "big (constructor) family" in the literature.
-- Constructor tags residing in the info table are injective,
-- but considerably more expensive to obtain, due to additional
-- memory access(es).
--
-- When doing case analysis on a value of a "big data type"
-- we need two nested switch statements to make up for the lack
-- of injectivity of pointer tagging, also taking the info
-- table tag into account. The exact mechanism is described next.
--
-- In the general case, switching on big family alternatives
-- is done by two nested switch statements. According to
-- Note [Tagging big families], the outer switch
-- looks at the pointer tag and the inner dereferences the
-- pointer and switches on the info table tag.
--
-- We can handle a simple case first, namely when none
-- of the case alternatives mention a constructor having
-- a pointer tag of 1..mAX_PTR_TAG-1. In this case we
-- simply emit a switch on the info table tag.
-- Note that the other simple case is when all mentioned
-- alternatives lie in 1..mAX_PTR_TAG-1, in which case we can
-- switch on the ptr tag only, just like in the small family case.
--
-- There is a single intricacy with a nested switch:
-- Both should branch to the same default alternative, and as such
-- avoid duplicate codegen of potentially heavy code. The outer
-- switch generates the actual code with a prepended fresh label,
-- while the inner one only generates a jump to that label.
--
-- For example, let's assume a 64-bit architecture, so that all
-- heap objects are 8-byte aligned, and hence the address of a
-- heap object ends in `000` (three zero bits).
--
-- Then consider the following data type
--
--   > data Big = T0 | T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8
--   Ptr tag:      1    2    3    4    5    6    7    7    7
--   As bits:    001  010  011  100  101  110  111  111  111
--   Info pointer tag (zero based):
--                 0    1    2    3    4    5    6    7    8
--
-- Then     \case T2 -> True; T8 -> True; _ -> False
-- will result in following code (slightly cleaned-up and
-- commented -ddump-cmm-from-stg):
{-
           R1 = _sqI::P64;  -- scrutinee
           if (R1 & 7 != 0) goto cqO; else goto cqP;
       cqP: // global       -- enter
           call (I64[R1])(R1) returns to cqO, args: 8, res: 8, upd: 8;
       cqO: // global       -- already WHNF
           _sqJ::P64 = R1;
           _cqX::P64 = _sqJ::P64 & 7;  -- extract pointer tag
           switch [1 .. 7] _cqX::P64 {
               case 3 : goto cqW;
               case 7 : goto cqR;
               default: {goto cqS;}
           }
       cqR: // global
           _cr2 = I32[I64[_sqJ::P64 & (-8)] - 4]; -- tag from info pointer
           switch [6 .. 8] _cr2::I64 {
               case 8 : goto cr1;
               default: {goto cr0;}
           }
       cr1: // global
           R1 = GHC.Types.True_closure+2;
           call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
       cr0: // global     -- technically necessary label
           goto cqS;
       cqW: // global
           R1 = GHC.Types.True_closure+2;
           call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
       cqS: // global
           R1 = GHC.Types.False_closure+1;
           call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
-}
--
-- For 32-bit systems we only have 2 tag bits in the pointers at our disposal,
-- so the performance win is dubious, especially in face of the increased code
-- size due to double switching. But we can take the viewpoint that 32-bit
-- architectures are not relevant for performance any more, so this can be
-- considered as moot.


-- Note [alg-alt heap check]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- In an algebraic case with more than one alternative, we will have
-- code like
--
-- L0:
--   x = R1
--   goto L1
-- L1:
--   if (x & 7 >= 2) then goto L2 else goto L3
-- L2:
--   Hp = Hp + 16
--   if (Hp > HpLim) then goto L4
--   ...
-- L4:
--   call gc() returns to L5
-- L5:
--   x = R1
--   goto L1


-- Note [Tagging big families]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Both the big and the small constructor families are tagged,
-- that is, greater unions which overflow the tag space of TAG_BITS
-- (i.e. 3 on 32 resp. 7 constructors on 64 bit archs).
--
-- For example, let's assume a 64-bit architecture, so that all
-- heap objects are 8-byte aligned, and hence the address of a
-- heap object ends in `000` (three zero bits).  Then consider
-- > data Maybe a = Nothing | Just a
-- > data Day a = Mon | Tue | Wed | Thu | Fri | Sat | Sun
-- > data Grade = G1 | G2 | G3 | G4 | G5 | G6 | G7 | G8 | G9 | G10
--
-- Since `Grade` has more than 7 constructors, it counts as a
-- "big data type" (also referred to as "big constructor family" in papers).
-- On the other hand, `Maybe` and `Day` have 7 constructors or fewer, so they
-- are "small data types".
--
-- Then
--   * A pointer to an unevaluated thunk of type `Maybe Int`, `Day` or `Grade` will end in `000`
--   * A tagged pointer to a `Nothing`, `Mon` or `G1` will end in `001`
--   * A tagged pointer to a `Just x`, `Tue` or `G2`  will end in `010`
--   * A tagged pointer to `Wed` or `G3` will end in `011`
--       ...
--   * A tagged pointer to `Sat` or `G6` will end in `110`
--   * A tagged pointer to `Sun` or `G7` or `G8` or `G9` or `G10` will end in `111`
--
-- For big families we employ a mildly clever way of combining pointer and
-- info-table tagging. We use 1..MAX_PTR_TAG-1 as pointer-resident tags where
-- the tags in the pointer and the info table are in a one-to-one
-- relation, whereas tag MAX_PTR_TAG is used as "spill over", signifying
-- we have to fall back and get the precise constructor tag from the
-- info-table.
--
-- Consequently we now cascade switches, because we have to check
-- the pointer tag first, and when it is MAX_PTR_TAG, fetch the precise
-- tag from the info table, and switch on that. The only technically
-- tricky part is that the default case needs (logical) duplication.
-- To do this we emit an extra label for it and branch to that from
-- the second switch. This avoids duplicated codegen. See #14373.
-- See Note [Double switching for big families] for the mechanics
-- involved.
--
-- Also see Note [Data constructor dynamic tags]
-- and the wiki https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/haskell-execution/pointer-tagging
--

-------------------
cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
             -> FCode ( Maybe CmmAGraphScoped
                      , [(ConTagZ, CmmAGraphScoped)] )
cgAlgAltRhss :: (GcPlan, ReturnKind)
-> NonVoid Id
-> [GenStgAlt 'CodeGen]
-> FCode (Maybe CmmAGraphScoped, [(RepArity, CmmAGraphScoped)])
cgAlgAltRhss (GcPlan, ReturnKind)
gc_plan NonVoid Id
bndr [GenStgAlt 'CodeGen]
alts
  = do { tagged_cmms <- (GcPlan, ReturnKind)
-> NonVoid Id
-> [GenStgAlt 'CodeGen]
-> FCode [(AltCon, CmmAGraphScoped)]
cgAltRhss (GcPlan, ReturnKind)
gc_plan NonVoid Id
bndr [GenStgAlt 'CodeGen]
alts

       ; let { mb_deflt = case [(AltCon, CmmAGraphScoped)]
tagged_cmms of
                           ((AltCon
DEFAULT,CmmAGraphScoped
rhs) : [(AltCon, CmmAGraphScoped)]
_) -> CmmAGraphScoped -> Maybe CmmAGraphScoped
forall a. a -> Maybe a
Just CmmAGraphScoped
rhs
                           [(AltCon, CmmAGraphScoped)]
_other              -> Maybe CmmAGraphScoped
forall a. Maybe a
Nothing
                            -- DEFAULT is always first, if present

              ; branches = [ (DataCon -> RepArity
dataConTagZ DataCon
con, CmmAGraphScoped
cmm)
                           | (DataAlt DataCon
con, CmmAGraphScoped
cmm) <- [(AltCon, CmmAGraphScoped)]
tagged_cmms ]
              }

       ; return (mb_deflt, branches)
       }


-------------------
cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
          -> FCode [(AltCon, CmmAGraphScoped)]
cgAltRhss :: (GcPlan, ReturnKind)
-> NonVoid Id
-> [GenStgAlt 'CodeGen]
-> FCode [(AltCon, CmmAGraphScoped)]
cgAltRhss (GcPlan, ReturnKind)
gc_plan NonVoid Id
bndr [GenStgAlt 'CodeGen]
alts = do
  platform <- FCode Platform
getPlatform
  let
    base_reg = Platform -> NonVoid Id -> LocalReg
idToReg Platform
platform NonVoid Id
bndr
    cg_alt :: CgStgAlt -> FCode (AltCon, CmmAGraphScoped)
    cg_alt GenStgAlt{alt_con :: forall (pass :: StgPass). GenStgAlt pass -> AltCon
alt_con=AltCon
con, alt_bndrs :: forall (pass :: StgPass). GenStgAlt pass -> [BinderP pass]
alt_bndrs=[BinderP 'CodeGen]
bndrs, alt_rhs :: forall (pass :: StgPass). GenStgAlt pass -> GenStgExpr pass
alt_rhs=CgStgExpr
rhs}
      = FCode AltCon -> FCode (AltCon, CmmAGraphScoped)
forall a. FCode a -> FCode (a, CmmAGraphScoped)
getCodeScoped             (FCode AltCon -> FCode (AltCon, CmmAGraphScoped))
-> FCode AltCon -> FCode (AltCon, CmmAGraphScoped)
forall a b. (a -> b) -> a -> b
$
        (GcPlan, ReturnKind) -> FCode AltCon -> FCode AltCon
forall a. (GcPlan, ReturnKind) -> FCode a -> FCode a
maybeAltHeapCheck (GcPlan, ReturnKind)
gc_plan (FCode AltCon -> FCode AltCon) -> FCode AltCon -> FCode AltCon
forall a b. (a -> b) -> a -> b
$
        do { _ <- AltCon -> LocalReg -> [NonVoid Id] -> FCode [LocalReg]
bindConArgs AltCon
con LocalReg
base_reg ([Id] -> [NonVoid Id]
assertNonVoidIds [Id]
[BinderP 'CodeGen]
bndrs)
                    -- alt binders are always non-void,
                    -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
           ; _ <- cgExpr rhs
           ; return con }
  forkAlts (map cg_alt alts)

maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
maybeAltHeapCheck :: forall a. (GcPlan, ReturnKind) -> FCode a -> FCode a
maybeAltHeapCheck (GcPlan
NoGcInAlts,ReturnKind
_)  FCode a
code = FCode a
code
maybeAltHeapCheck (GcInAlts [LocalReg]
regs, ReturnKind
AssignedDirectly) FCode a
code =
  [LocalReg] -> FCode a -> FCode a
forall a. [LocalReg] -> FCode a -> FCode a
altHeapCheck [LocalReg]
regs FCode a
code
maybeAltHeapCheck (GcInAlts [LocalReg]
regs, ReturnedTo BlockId
lret RepArity
off) FCode a
code =
  [LocalReg] -> BlockId -> RepArity -> FCode a -> FCode a
forall a. [LocalReg] -> BlockId -> RepArity -> FCode a -> FCode a
altHeapCheckReturnsTo [LocalReg]
regs BlockId
lret RepArity
off FCode a
code

-----------------------------------------------------------------------------
--      Tail calls
-----------------------------------------------------------------------------

cgConApp :: DataCon -> ConstructorNumber -> [StgArg] -> FCode ReturnKind
cgConApp :: DataCon -> ConstructorNumber -> [StgArg] -> FCode ReturnKind
cgConApp DataCon
con ConstructorNumber
mn [StgArg]
stg_args
  | DataCon -> Bool
isUnboxedTupleDataCon DataCon
con       -- Unboxed tuple: assign and return
  = do { arg_exprs <- [StgArg] -> FCode [CmmExpr]
getNonVoidArgAmodes [StgArg]
stg_args
       ; tickyUnboxedTupleReturn (length arg_exprs)
       ; emitReturn arg_exprs }

  | Bool
otherwise   --  Boxed constructors; allocate and return
  = Bool -> SDoc -> FCode ReturnKind -> FCode ReturnKind
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr ([StgArg]
stg_args [StgArg] -> RepArity -> Bool
forall a. [a] -> RepArity -> Bool
`lengthIs` DataCon -> RepArity
countConRepArgs DataCon
con)
              (DataCon -> SDoc
forall a. Outputable a => a -> SDoc
ppr DataCon
con SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc
parens (RepArity -> SDoc
forall a. Outputable a => a -> SDoc
ppr (DataCon -> RepArity
countConRepArgs DataCon
con)) SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> [StgArg] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [StgArg]
stg_args) (FCode ReturnKind -> FCode ReturnKind)
-> FCode ReturnKind -> FCode ReturnKind
forall a b. (a -> b) -> a -> b
$
    do  { (idinfo, fcode_init) <- Id
-> ConstructorNumber
-> Bool
-> CostCentreStack
-> DataCon
-> [NonVoid StgArg]
-> FCode (CgIdInfo, FCode CmmAGraph)
buildDynCon (DataCon -> Id
dataConWorkId DataCon
con) ConstructorNumber
mn Bool
False
                                     CostCentreStack
currentCCS DataCon
con ([StgArg] -> [NonVoid StgArg]
assertNonVoidStgArgs [StgArg]
stg_args)
                                     -- con args are always non-void,
                                     -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
                -- The first "con" says that the name bound to this
                -- closure is "con", which is a bit of a fudge, but
                -- it only affects profiling (hence the False)

        ; emit =<< fcode_init
        ; tickyReturnNewCon (length stg_args)
        ; emitReturn [idInfoToAmode idinfo] }

cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
cgIdApp Id
fun_id [StgArg]
args = do
    platform       <- FCode Platform
getPlatform
    fun_info       <- getCgIdInfo fun_id
    cfg            <- getStgToCmmConfig
    self_loop      <- getSelfLoop
    let profile        = StgToCmmConfig -> Profile
stgToCmmProfile  StgToCmmConfig
cfg
        fun_arg        = Id -> StgArg
StgVarArg Id
fun_id
        fun_name       = Id -> Name
idName    Id
fun_id
        fun            = CgIdInfo -> CmmExpr
idInfoToAmode CgIdInfo
fun_info
        lf_info        = CgIdInfo -> LambdaFormInfo
cg_lf         CgIdInfo
fun_info
        n_args         = [StgArg] -> RepArity
forall a. [a] -> RepArity
forall (t :: * -> *) a. Foldable t => t a -> RepArity
length [StgArg]
args
    case getCallMethod cfg fun_name fun_id lf_info n_args (cg_loc fun_info) self_loop of
            -- A value in WHNF, so we can just return it.
        CallMethod
ReturnIt
          | HasDebugCallStack => Type -> Bool
Type -> Bool
isZeroBitTy (Id -> Type
idType Id
fun_id) -> [CmmExpr] -> FCode ReturnKind
emitReturn []
          | Bool
otherwise                -> [CmmExpr] -> FCode ReturnKind
emitReturn [CmmExpr
fun]

        -- A value infered to be in WHNF, so we can just return it.
        CallMethod
InferedReturnIt
          | HasDebugCallStack => Type -> Bool
Type -> Bool
isZeroBitTy (Id -> Type
idType Id
fun_id) -> FCode ()
trace FCode () -> FCode ReturnKind -> FCode ReturnKind
forall a b. FCode a -> FCode b -> FCode b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> [CmmExpr] -> FCode ReturnKind
emitReturn []
          | Bool
otherwise                   -> FCode ()
trace FCode () -> FCode () -> FCode ()
forall a b. FCode a -> FCode b -> FCode b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> FCode ()
assertTag FCode () -> FCode ReturnKind -> FCode ReturnKind
forall a b. FCode a -> FCode b -> FCode b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
                                                    [CmmExpr] -> FCode ReturnKind
emitReturn [CmmExpr
fun]
            where
              trace :: FCode ()
trace = do
                FCode ()
tickyTagged
                use_id <- FCode Unique
newUnique
                _lbl <- emitTickyCounterTag use_id (NonVoid fun_id)
                tickyTagSkip use_id fun_id

                -- pprTraceM "WHNF:" (ppr fun_id <+> ppr args )
              assertTag :: FCode ()
assertTag = FCode () -> FCode ()
whenCheckTags (FCode () -> FCode ()) -> FCode () -> FCode ()
forall a b. (a -> b) -> a -> b
$ do
                  mod <- FCode Module
getModuleName
                  emitTagAssertion (showPprUnsafe
                      (text "TagCheck failed on entry in" <+> ppr mod <+> text "- value:" <> ppr fun_id <+> pdoc platform fun))
                      fun

        CallMethod
EnterIt -> Bool -> SDoc -> FCode ReturnKind -> FCode ReturnKind
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr ([StgArg] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [StgArg]
args) (Id -> SDoc
forall a. Outputable a => a -> SDoc
ppr Id
fun_id SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ [StgArg] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [StgArg]
args) (FCode ReturnKind -> FCode ReturnKind)
-> FCode ReturnKind -> FCode ReturnKind
forall a b. (a -> b) -> a -> b
$  -- Discarding arguments
                   CmmExpr -> FCode ReturnKind
emitEnter CmmExpr
fun

        CallMethod
SlowCall -> do      -- A slow function call via the RTS apply routines
                { LambdaFormInfo -> [StgArg] -> FCode ()
tickySlowCall LambdaFormInfo
lf_info [StgArg]
args
                ; FastString -> FCode ()
emitComment (FastString -> FCode ()) -> FastString -> FCode ()
forall a b. (a -> b) -> a -> b
$ String -> FastString
mkFastString String
"slowCall"
                ; CmmExpr -> [StgArg] -> FCode ReturnKind
slowCall CmmExpr
fun [StgArg]
args }

        -- A direct function call (possibly with some left-over arguments)
        DirectEntry CLabel
lbl RepArity
arity -> do
                { RepArity -> [StgArg] -> FCode ()
tickyDirectCall RepArity
arity [StgArg]
args
                ; if Profile -> LambdaFormInfo -> Bool
nodeMustPointToIt Profile
profile LambdaFormInfo
lf_info
                     then Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind
directCall Convention
NativeNodeCall   CLabel
lbl RepArity
arity (StgArg
fun_argStgArg -> [StgArg] -> [StgArg]
forall a. a -> [a] -> [a]
:[StgArg]
args)
                     else Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind
directCall Convention
NativeDirectCall CLabel
lbl RepArity
arity [StgArg]
args }

        -- Let-no-escape call or self-recursive tail-call
        JumpToIt BlockId
blk_id [LocalReg]
lne_regs -> do
          { FCode ()
adjustHpBackwards -- always do this before a tail-call
          ; cmm_args <- [StgArg] -> FCode [CmmExpr]
getNonVoidArgAmodes [StgArg]
args
          ; emitMultiAssign lne_regs cmm_args
          ; emit (mkBranch blk_id)
          ; return AssignedDirectly }

-- Note [Self-recursive tail calls]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Self-recursive tail calls can be optimized into a local jump in the same
-- way as let-no-escape bindings (see Note [What is a non-escaping let] in
-- "GHC.CoreToStg"). Consider this:
--
-- foo.info:
--     a = R1  // calling convention
--     b = R2
--     goto L1
-- L1: ...
--     ...
-- ...
-- L2: R1 = x
--     R2 = y
--     call foo(R1,R2)
--
-- Instead of putting x and y into registers (or other locations required by the
-- calling convention) and performing a call we can put them into local
-- variables a and b and perform jump to L1:
--
-- foo.info:
--     a = R1
--     b = R2
--     goto L1
-- L1: ...
--     ...
-- ...
-- L2: a = x
--     b = y
--     goto L1
--
-- This can be done only when function is calling itself in a tail position
-- and only if the call passes number of parameters equal to function's arity.
-- Note that this cannot be performed if a function calls itself with a
-- continuation.
--
-- This in fact implements optimization known as "loopification". It was
-- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
-- by Krzysztof Woś, though we use different approach. Krzysztof performed his
-- optimization at the Cmm level, whereas we perform ours during code generation
-- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
-- generated in the first place.
--
-- Implementation is spread across a couple of places in the code:
--
--   * FCode monad stores additional information in its reader
--     environment (stgToCmmSelfLoop field). This `SelfLoopInfo`
--     record tells us which function can tail call itself in an
--     optimized way (it is the function currently being compiled),
--     its RepArity, what is the label of its loop header (L1 in
--     example above) and information about which local registers
--     should receive arguments when making a call (this would be a
--     and b in the example above).
--
--   * Whenever we are compiling a function, we set that information to reflect
--     the fact that function currently being compiled can be jumped to, instead
--     of called. This is done in closureCodyBody in GHC.StgToCmm.Bind.
--
--   * We also have to emit a label to which we will be jumping. We make sure
--     that the label is placed after a stack check but before the heap
--     check. The reason is that making a recursive tail-call does not increase
--     the stack so we only need to check once. But it may grow the heap, so we
--     have to repeat the heap check in every self-call. This is done in
--     do_checks in GHC.StgToCmm.Heap.
--
--   * When we begin compilation of another closure we remove the additional
--     information from the environment. This is done by forkClosureBody
--     in GHC.StgToCmm.Monad. Other functions that duplicate the environment -
--     forkLneBody, forkAlts, codeOnly - duplicate that information. In other
--     words, we only need to clean the environment of the self-loop information
--     when compiling right hand side of a closure (binding).
--
--   * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
--     of call will be generated. getCallMethod decides to generate a self
--     recursive tail call when (a) environment stores information about
--     possible self tail-call; (b) that tail call is to a function currently
--     being compiled; (c) number of passed arguments is equal to
--     function's unarised arity. (d) loopification is turned on via
--     -floopification command-line option.
--
--   * Command line option to turn loopification on and off is implemented in
--     DynFlags, then passed to StgToCmmConfig for this phase.


emitEnter :: CmmExpr -> FCode ReturnKind
emitEnter :: CmmExpr -> FCode ReturnKind
emitEnter CmmExpr
fun = do
  { platform <- FCode Platform
getPlatform
  ; profile  <- getProfile
  ; adjustHpBackwards
  ; sequel      <- getSequel
  ; updfr_off   <- getUpdFrameOff
  ; align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig
  ; case sequel of
      -- For a return, we have the option of generating a tag-test or
      -- not.  If the value is tagged, we can return directly, which
      -- is quicker than entering the value.  This is a code
      -- size/speed trade-off: when optimising for speed rather than
      -- size we could generate the tag test.
      --
      -- Right now, we do what the old codegen did, and omit the tag
      -- test, just generating an enter.
      Sequel
Return -> do
        { let entry :: CmmExpr
entry = Platform -> CmmExpr -> CmmExpr
entryCode Platform
platform
                (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr
forall a b. (a -> b) -> a -> b
$ Platform -> Bool -> CmmExpr -> CmmExpr
closureInfoPtr Platform
platform Bool
align_check
                (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr
forall a b. (a -> b) -> a -> b
$ CmmReg -> CmmExpr
CmmReg (Platform -> CmmReg
nodeReg Platform
platform)
        ; CmmAGraph -> FCode ()
emit (CmmAGraph -> FCode ()) -> CmmAGraph -> FCode ()
forall a b. (a -> b) -> a -> b
$ Profile
-> Convention -> CmmExpr -> [CmmExpr] -> RepArity -> CmmAGraph
mkJump Profile
profile Convention
NativeNodeCall CmmExpr
entry
                        [Platform -> CmmExpr -> CmmExpr
cmmUntag Platform
platform CmmExpr
fun] RepArity
updfr_off
        ; ReturnKind -> FCode ReturnKind
forall a. a -> FCode a
forall (m :: * -> *) a. Monad m => a -> m a
return ReturnKind
AssignedDirectly
        }

      -- The result will be scrutinised in the sequel.  This is where
      -- we generate a tag-test to avoid entering the closure if
      -- possible.
      --
      -- The generated code will be something like this:
      --
      --    R1 = fun  -- copyout
      --    if (fun & 7 != 0) goto Lret else goto Lcall
      --  Lcall:
      --    call [fun] returns to Lret
      --  Lret:
      --    fun' = R1  -- copyin
      --    ...
      --
      -- Note in particular that the label Lret is used as a
      -- destination by both the tag-test and the call.  This is
      -- because Lret will necessarily be a proc-point, and we want to
      -- ensure that we generate only one proc-point for this
      -- sequence.
      --
      -- Furthermore, we tell the caller that we generated a native
      -- return continuation by returning (ReturnedTo Lret off), so
      -- that the continuation can be reused by the heap-check failure
      -- code in the enclosing case expression.
      --
      AssignTo [LocalReg]
res_regs Bool
_ -> do
       { lret  <- FCode BlockId
forall (m :: * -> *). MonadGetUnique m => m BlockId
newBlockId
       ; lcall <- newBlockId
       ; updfr_off   <- getUpdFrameOff
       ; align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig
       ; let (off, _, copyin) = copyInOflow profile NativeReturn (Young lret) res_regs []
       ; let area = BlockId -> Area
Young BlockId
lret
       ; let (outArgs, regs, copyout) = copyOutOflow profile NativeNodeCall Call area
                                          [fun] updfr_off []
         -- refer to fun via nodeReg after the copyout, to avoid having
         -- both live simultaneously; this sometimes enables fun to be
         -- inlined in the RHS of the R1 assignment.
       ; let node = CmmReg -> CmmExpr
CmmReg (CmmReg -> CmmExpr) -> CmmReg -> CmmExpr
forall a b. (a -> b) -> a -> b
$ Platform -> CmmReg
nodeReg Platform
platform
             entry = Platform -> CmmExpr -> CmmExpr
entryCode Platform
platform (Platform -> Bool -> CmmExpr -> CmmExpr
closureInfoPtr Platform
platform Bool
align_check CmmExpr
node)
             the_call = CmmExpr
-> Maybe BlockId
-> RepArity
-> RepArity
-> RepArity
-> [GlobalRegUse]
-> CmmAGraph
toCall CmmExpr
entry (BlockId -> Maybe BlockId
forall a. a -> Maybe a
Just BlockId
lret) RepArity
updfr_off RepArity
off RepArity
outArgs [GlobalRegUse]
regs
       ; tscope <- getTickScope
       ; emit $
           copyout <*>
           mkCbranch (cmmIsTagged platform node)
                     lret lcall Nothing <*>
           outOfLine lcall (the_call,tscope) <*>
           mkLabel lret tscope <*>
           copyin
       ; return (ReturnedTo lret off)
       }
  }

------------------------------------------------------------------------
--              Ticks
------------------------------------------------------------------------

-- | Generate Cmm code for a tick. Depending on the type of Tickish,
-- this will either generate actual Cmm instrumentation code, or
-- simply pass on the annotation as a @CmmTickish@.
cgTick :: StgTickish -> FCode ()
cgTick :: StgTickish -> FCode ()
cgTick StgTickish
tick
  = do { platform <- FCode Platform
getPlatform
       ; case tick of
           ProfNote   CostCentre
cc Bool
t Bool
p -> CostCentre -> Bool -> Bool -> FCode ()
emitSetCCC CostCentre
cc Bool
t Bool
p
           HpcTick    Module
m RepArity
n    -> CmmAGraph -> FCode ()
emit (Platform -> Module -> RepArity -> CmmAGraph
mkTickBox Platform
platform Module
m RepArity
n)
           SourceNote RealSrcSpan
s LexicalFastString
n    -> CmmTickish -> FCode ()
emitTick (CmmTickish -> FCode ()) -> CmmTickish -> FCode ()
forall a b. (a -> b) -> a -> b
$ RealSrcSpan -> LexicalFastString -> CmmTickish
forall (pass :: TickishPass).
RealSrcSpan -> LexicalFastString -> GenTickish pass
SourceNote RealSrcSpan
s LexicalFastString
n
           StgTickish
_other            -> () -> FCode ()
forall a. a -> FCode a
forall (m :: * -> *) a. Monad m => a -> m a
return () -- ignore
       }