{-# LANGUAGE GADTs #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE MultiWayIf #-}

module GHC.Cmm.Sink (
     cmmSink
  ) where

import GHC.Prelude

import GHC.Cmm
import GHC.Cmm.Opt
import GHC.Cmm.Liveness
import GHC.Cmm.LRegSet
import GHC.Cmm.Utils
import GHC.Cmm.Dataflow.Block
import GHC.Cmm.Dataflow.Label
import GHC.Cmm.Dataflow.Graph
import GHC.Platform.Regs

import GHC.Platform
import GHC.Types.Unique.FM
import GHC.Types.Unique.Supply
import GHC.Cmm.Config

import Data.List (partition)
import Data.Maybe

import GHC.Exts (inline)

-- -----------------------------------------------------------------------------
-- Sinking and inlining

-- This is an optimisation pass that
--  (a) moves assignments closer to their uses, to reduce register pressure
--  (b) pushes assignments into a single branch of a conditional if possible
--  (c) inlines assignments to registers that are mentioned only once
--  (d) discards dead assignments
--
-- This tightens up lots of register-heavy code.  It is particularly
-- helpful in the Cmm generated by the Stg->Cmm code generator, in
-- which every function starts with a copyIn sequence like:
--
--    x1 = R1
--    x2 = Sp[8]
--    x3 = Sp[16]
--    if (Sp - 32 < SpLim) then L1 else L2
--
-- we really want to push the x1..x3 assignments into the L2 branch.
--
-- Algorithm:
--
--  * Start by doing liveness analysis.
--
--  * Keep a list of assignments A; earlier ones may refer to later ones.
--    Currently we only sink assignments to local registers, because we don't
--    have liveness information about global registers.
--
--  * Walk forwards through the graph, look at each node N:
--
--    * If it is a dead assignment, i.e. assignment to a register that is
--      not used after N, discard it.
--
--    * Try to inline based on current list of assignments
--      * If any assignments in A (1) occur only once in N, and (2) are
--        not live after N, inline the assignment and remove it
--        from A.
--
--      * If an assignment in A is cheap (RHS is local register), then
--        inline the assignment and keep it in A in case it is used afterwards.
--
--      * Otherwise don't inline.
--
--    * If N is assignment to a local register pick up the assignment
--      and add it to A.
--
--    * If N is not an assignment to a local register:
--      * remove any assignments from A that conflict with N, and
--        place them before N in the current block.  We call this
--        "dropping" the assignments.
--
--      * An assignment conflicts with N if it:
--        - assigns to a register mentioned in N
--        - mentions a register assigned by N
--        - reads from memory written by N
--      * do this recursively, dropping dependent assignments
--
--    * At an exit node:
--      * drop any assignments that are live on more than one successor
--        and are not trivial
--      * if any successor has more than one predecessor (a join-point),
--        drop everything live in that successor. Since we only propagate
--        assignments that are not dead at the successor, we will therefore
--        eliminate all assignments dead at this point. Thus analysis of a
--        join-point will always begin with an empty list of assignments.
--
--
-- As a result of above algorithm, sinking deletes some dead assignments
-- (transitively, even).  This isn't as good as removeDeadAssignments,
-- but it's much cheaper.

-- -----------------------------------------------------------------------------
-- things that we aren't optimising very well yet.
--
-- -----------
-- (1) From GHC's FastString.hashStr:
--
--  s2ay:
--      if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp;
--  c2gn:
--      R1 = _s2au::I64;
--      call (I64[Sp])(R1) args: 8, res: 0, upd: 8;
--  c2gp:
--      _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128,
--                                 4091);
--      _s2an::I64 = _s2an::I64 + 1;
--      _s2au::I64 = _s2cO::I64;
--      goto s2ay;
--
-- a nice loop, but we didn't eliminate the silly assignment at the end.
-- See Note [dependent assignments], which would probably fix this.
-- This is #8336.
--
-- -----------
-- (2) From stg_atomically_frame in PrimOps.cmm
--
-- We have a diamond control flow:
--
--     x = ...
--       |
--      / \
--     A   B
--      \ /
--       |
--    use of x
--
-- Now x won't be sunk down to its use, because we won't push it into
-- both branches of the conditional.  We certainly do have to check
-- that we can sink it past all the code in both A and B, but having
-- discovered that, we could sink it to its use.
--

-- -----------------------------------------------------------------------------

type Assignment = (LocalReg, CmmExpr, AbsMem)
  -- Assignment caches AbsMem, an abstraction of the memory read by
  -- the RHS of the assignment.

type Assignments = [Assignment]
  -- A sequence of assignments; kept in *reverse* order
  -- So the list [ x=e1, y=e2 ] means the sequence of assignments
  --     y = e2
  --     x = e1

cmmSink :: CmmConfig -> CmmGraph -> UniqSM CmmGraph
cmmSink :: CmmConfig -> CmmGraph -> UniqSM CmmGraph
cmmSink CmmConfig
cfg CmmGraph
graph = BlockId -> [CmmBlock] -> CmmGraph
ofBlockList (CmmGraph -> BlockId
forall (n :: Extensibility -> Extensibility -> *).
GenCmmGraph n -> BlockId
g_entry CmmGraph
graph) ([CmmBlock] -> CmmGraph) -> UniqSM [CmmBlock] -> UniqSM CmmGraph
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> LabelMap Assignments -> [CmmBlock] -> UniqSM [CmmBlock]
sink LabelMap Assignments
forall v. LabelMap v
mapEmpty [CmmBlock]
blocks
  where
  platform :: Platform
platform = CmmConfig -> Platform
cmmPlatform CmmConfig
cfg
  liveness :: BlockEntryLivenessL
liveness = Platform -> CmmGraph -> BlockEntryLivenessL
cmmLocalLivenessL Platform
platform CmmGraph
graph
  getLive :: BlockId -> LRegSet
getLive BlockId
l = LRegSet -> BlockId -> BlockEntryLivenessL -> LRegSet
forall a. a -> BlockId -> LabelMap a -> a
mapFindWithDefault LRegSet
emptyLRegSet BlockId
l BlockEntryLivenessL
liveness

  blocks :: [CmmBlock]
blocks = CmmGraph -> [CmmBlock]
revPostorder CmmGraph
graph

  join_pts :: LabelMap Int
join_pts = [CmmBlock] -> LabelMap Int
findJoinPoints [CmmBlock]
blocks

  sink :: LabelMap Assignments -> [CmmBlock] -> UniqSM [CmmBlock]
  sink :: LabelMap Assignments -> [CmmBlock] -> UniqSM [CmmBlock]
sink LabelMap Assignments
_ [] = [CmmBlock] -> UniqSM [CmmBlock]
forall a. a -> UniqSM a
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
  sink LabelMap Assignments
sunk (CmmBlock
b:[CmmBlock]
bs) = do
    -- Now sink and inline in this block
    (prepend, last_fold) <- CmmConfig
-> Opt (CmmNode O C) -> UniqSM ([CmmNode O O], CmmNode O C)
forall a. CmmConfig -> Opt a -> UniqSM ([CmmNode O O], a)
runOpt CmmConfig
cfg (Opt (CmmNode O C) -> UniqSM ([CmmNode O O], CmmNode O C))
-> Opt (CmmNode O C) -> UniqSM ([CmmNode O O], CmmNode O C)
forall a b. (a -> b) -> a -> b
$ CmmNode O C -> Opt (CmmNode O C)
forall (e :: Extensibility) (x :: Extensibility).
CmmNode e x -> Opt (CmmNode e x)
constantFoldNode CmmNode O C
last

    (middle', assigs) <- walk cfg (ann_middles ++ annotate platform live_middle prepend) (mapFindWithDefault [] lbl sunk)

    let (final_last, assigs') = tryToInline platform live last_fold assigs
        -- Now, drop any assignments that we will not sink any further.
        (dropped_last, assigs'') = dropAssignments platform drop_if init_live_sets assigs'
        drop_if :: (LocalReg, CmmExpr, AbsMem)
                      -> [LRegSet] -> (Bool, [LRegSet])
        drop_if a :: Assignment
a@(LocalReg
r,CmmExpr
rhs,AbsMem
_) [LRegSet]
live_sets = (Bool
should_drop, [LRegSet]
live_sets')
            where
              should_drop :: Bool
should_drop =  Platform -> Assignment -> CmmNode O C -> Bool
forall (x :: Extensibility).
Platform -> Assignment -> CmmNode O x -> Bool
conflicts Platform
platform Assignment
a CmmNode O C
final_last
                          Bool -> Bool -> Bool
|| Bool -> Bool
not (Platform -> CmmExpr -> Bool
isTrivial Platform
platform CmmExpr
rhs) Bool -> Bool -> Bool
&& [LRegSet] -> LocalReg -> Bool
live_in_multi [LRegSet]
live_sets LocalReg
r
                          Bool -> Bool -> Bool
|| LocalReg
r LocalReg -> LRegSet -> Bool
`elemLRegSet` LRegSet
live_in_joins

              live_sets' :: [LRegSet]
live_sets' | Bool
should_drop = [LRegSet]
live_sets
                        | Bool
otherwise   = (LRegSet -> LRegSet) -> [LRegSet] -> [LRegSet]
forall a b. (a -> b) -> [a] -> [b]
map LRegSet -> LRegSet
upd [LRegSet]
live_sets

              upd :: LRegSet -> LRegSet
upd LRegSet
set | LocalReg
r LocalReg -> LRegSet -> Bool
`elemLRegSet` LRegSet
set = LRegSet
set LRegSet -> LRegSet -> LRegSet
`unionLRegSet` LRegSet
live_rhs
                      | Bool
otherwise           = LRegSet
set

              live_rhs :: LRegSet
live_rhs = Platform
-> (LRegSet -> LocalReg -> LRegSet)
-> LRegSet
-> CmmExpr
-> LRegSet
forall b. Platform -> (b -> LocalReg -> b) -> b -> CmmExpr -> b
forall r a b.
UserOfRegs r a =>
Platform -> (b -> r -> b) -> b -> a -> b
foldRegsUsed Platform
platform ((LocalReg -> LRegSet -> LRegSet) -> LRegSet -> LocalReg -> LRegSet
forall a b c. (a -> b -> c) -> b -> a -> c
flip LocalReg -> LRegSet -> LRegSet
insertLRegSet) LRegSet
emptyLRegSet CmmExpr
rhs

        final_middle = (Block CmmNode O O -> CmmNode O O -> Block CmmNode O O)
-> Block CmmNode O O -> [CmmNode O O] -> Block CmmNode O O
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Block CmmNode O O -> CmmNode O O -> Block CmmNode O O
forall (n :: Extensibility -> Extensibility -> *)
       (e :: Extensibility).
Block n e O -> n O O -> Block n e O
blockSnoc Block CmmNode O O
middle' [CmmNode O O]
dropped_last

        sunk' = LabelMap Assignments
-> LabelMap Assignments -> LabelMap Assignments
forall v. LabelMap v -> LabelMap v -> LabelMap v
mapUnion LabelMap Assignments
sunk (LabelMap Assignments -> LabelMap Assignments)
-> LabelMap Assignments -> LabelMap Assignments
forall a b. (a -> b) -> a -> b
$
                  [(BlockId, Assignments)] -> LabelMap Assignments
forall v. [(BlockId, v)] -> LabelMap v
mapFromList [ (BlockId
l, Platform -> LRegSet -> Assignments -> Assignments
filterAssignments Platform
platform (BlockId -> LRegSet
getLive BlockId
l) Assignments
assigs'')
                              | BlockId
l <- [BlockId]
succs ]

    (blockJoin first final_middle final_last :) <$> sink sunk' bs

    where
      lbl :: BlockId
lbl = CmmBlock -> BlockId
forall (x :: Extensibility). Block CmmNode C x -> BlockId
forall (thing :: Extensibility -> Extensibility -> *)
       (x :: Extensibility).
NonLocal thing =>
thing C x -> BlockId
entryLabel CmmBlock
b
      (CmmNode C O
first, Block CmmNode O O
middle, CmmNode O C
last) = CmmBlock -> (CmmNode C O, Block CmmNode O O, CmmNode O C)
forall (n :: Extensibility -> Extensibility -> *).
Block n C C -> (n C O, Block n O O, n O C)
blockSplit CmmBlock
b

      succs :: [BlockId]
succs = CmmNode O C -> [BlockId]
forall (e :: Extensibility). CmmNode e C -> [BlockId]
forall (thing :: Extensibility -> Extensibility -> *)
       (e :: Extensibility).
NonLocal thing =>
thing e C -> [BlockId]
successors CmmNode O C
last

      -- Annotate the middle nodes with the registers live *after*
      -- the node.  This will help us decide whether we can inline
      -- an assignment in the current node or not.
      live :: LRegSet
live = [LRegSet] -> LRegSet
unionsLRegSet ((BlockId -> LRegSet) -> [BlockId] -> [LRegSet]
forall a b. (a -> b) -> [a] -> [b]
map BlockId -> LRegSet
getLive [BlockId]
succs)
      live_middle :: LRegSet
live_middle = Platform -> CmmNode O C -> LRegSet -> LRegSet
forall n.
(DefinerOfRegs LocalReg n, UserOfRegs LocalReg n) =>
Platform -> n -> LRegSet -> LRegSet
gen_killL Platform
platform CmmNode O C
last LRegSet
live
      ann_middles :: [(LRegSet, CmmNode O O)]
ann_middles = Platform -> LRegSet -> [CmmNode O O] -> [(LRegSet, CmmNode O O)]
annotate Platform
platform LRegSet
live_middle (Block CmmNode O O -> [CmmNode O O]
forall (n :: Extensibility -> Extensibility -> *).
Block n O O -> [n O O]
blockToList Block CmmNode O O
middle)

      -- We cannot sink into join points (successors with more than
      -- one predecessor), so identify the join points and the set
      -- of registers live in them.
      ([BlockId]
joins, [BlockId]
nonjoins) = (BlockId -> Bool) -> [BlockId] -> ([BlockId], [BlockId])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition (BlockId -> LabelMap Int -> Bool
forall a. BlockId -> LabelMap a -> Bool
`mapMember` LabelMap Int
join_pts) [BlockId]
succs
      live_in_joins :: LRegSet
live_in_joins = [LRegSet] -> LRegSet
unionsLRegSet ((BlockId -> LRegSet) -> [BlockId] -> [LRegSet]
forall a b. (a -> b) -> [a] -> [b]
map BlockId -> LRegSet
getLive [BlockId]
joins)

      -- We do not want to sink an assignment into multiple branches,
      -- so identify the set of registers live in multiple successors.
      -- This is made more complicated because when we sink an assignment
      -- into one branch, this might change the set of registers that are
      -- now live in multiple branches.
      init_live_sets :: [LRegSet]
init_live_sets = (BlockId -> LRegSet) -> [BlockId] -> [LRegSet]
forall a b. (a -> b) -> [a] -> [b]
map BlockId -> LRegSet
getLive [BlockId]
nonjoins
      live_in_multi :: [LRegSet] -> LocalReg -> Bool
live_in_multi [LRegSet]
live_sets LocalReg
r =
         case (LRegSet -> Bool) -> [LRegSet] -> [LRegSet]
forall a. (a -> Bool) -> [a] -> [a]
filter (LocalReg -> LRegSet -> Bool
elemLRegSet LocalReg
r) [LRegSet]
live_sets of
           (LRegSet
_one:LRegSet
_two:[LRegSet]
_) -> Bool
True
           [LRegSet]
_ -> Bool
False

{- TODO: enable this later, when we have some good tests in place to
   measure the effect and tune it.

-- small: an expression we don't mind duplicating
isSmall :: CmmExpr -> Bool
isSmall (CmmReg (CmmLocal _)) = True  --
isSmall (CmmLit _) = True
isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y
isSmall (CmmRegOff (CmmLocal _) _) = True
isSmall _ = False
-}

--
-- We allow duplication of trivial expressions: registers (both local and
-- global) and literals.
--
isTrivial :: Platform -> CmmExpr -> Bool
isTrivial :: Platform -> CmmExpr -> Bool
isTrivial Platform
_ (CmmReg (CmmLocal LocalReg
_)) = Bool
True
isTrivial Platform
platform (CmmReg (CmmGlobal (GlobalRegUse GlobalReg
r CmmType
_))) = -- see Note [Inline GlobalRegs?]
  if Arch -> Bool
isARM (Platform -> Arch
platformArch Platform
platform)
  then Bool
True -- CodeGen.Platform.ARM does not have globalRegMaybe
  else Maybe RealReg -> Bool
forall a. Maybe a -> Bool
isJust (Platform -> GlobalReg -> Maybe RealReg
globalRegMaybe Platform
platform GlobalReg
r)
  -- GlobalRegs that are loads from BaseReg are not trivial
isTrivial Platform
_ (CmmLit CmmLit
_) = Bool
True
isTrivial Platform
_ CmmExpr
_          = Bool
False

--
-- annotate each node with the set of registers live *after* the node
--
annotate :: Platform -> LRegSet -> [CmmNode O O] -> [(LRegSet, CmmNode O O)]
annotate :: Platform -> LRegSet -> [CmmNode O O] -> [(LRegSet, CmmNode O O)]
annotate Platform
platform LRegSet
live [CmmNode O O]
nodes = (LRegSet, [(LRegSet, CmmNode O O)]) -> [(LRegSet, CmmNode O O)]
forall a b. (a, b) -> b
snd ((LRegSet, [(LRegSet, CmmNode O O)]) -> [(LRegSet, CmmNode O O)])
-> (LRegSet, [(LRegSet, CmmNode O O)]) -> [(LRegSet, CmmNode O O)]
forall a b. (a -> b) -> a -> b
$ (CmmNode O O
 -> (LRegSet, [(LRegSet, CmmNode O O)])
 -> (LRegSet, [(LRegSet, CmmNode O O)]))
-> (LRegSet, [(LRegSet, CmmNode O O)])
-> [CmmNode O O]
-> (LRegSet, [(LRegSet, CmmNode O O)])
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr CmmNode O O
-> (LRegSet, [(LRegSet, CmmNode O O)])
-> (LRegSet, [(LRegSet, CmmNode O O)])
ann (LRegSet
live,[]) [CmmNode O O]
nodes
  where ann :: CmmNode O O
-> (LRegSet, [(LRegSet, CmmNode O O)])
-> (LRegSet, [(LRegSet, CmmNode O O)])
ann CmmNode O O
n (LRegSet
live,[(LRegSet, CmmNode O O)]
nodes) = (Platform -> CmmNode O O -> LRegSet -> LRegSet
forall n.
(DefinerOfRegs LocalReg n, UserOfRegs LocalReg n) =>
Platform -> n -> LRegSet -> LRegSet
gen_killL Platform
platform CmmNode O O
n LRegSet
live, (LRegSet
live,CmmNode O O
n) (LRegSet, CmmNode O O)
-> [(LRegSet, CmmNode O O)] -> [(LRegSet, CmmNode O O)]
forall a. a -> [a] -> [a]
: [(LRegSet, CmmNode O O)]
nodes)

--
-- Find the blocks that have multiple successors (join points)
--
findJoinPoints :: [CmmBlock] -> LabelMap Int
findJoinPoints :: [CmmBlock] -> LabelMap Int
findJoinPoints [CmmBlock]
blocks = (Int -> Bool) -> LabelMap Int -> LabelMap Int
forall v. (v -> Bool) -> LabelMap v -> LabelMap v
mapFilter (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>Int
1) LabelMap Int
succ_counts
 where
  all_succs :: [BlockId]
all_succs = (CmmBlock -> [BlockId]) -> [CmmBlock] -> [BlockId]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap CmmBlock -> [BlockId]
forall (e :: Extensibility). Block CmmNode e C -> [BlockId]
forall (thing :: Extensibility -> Extensibility -> *)
       (e :: Extensibility).
NonLocal thing =>
thing e C -> [BlockId]
successors [CmmBlock]
blocks

  succ_counts :: LabelMap Int
  succ_counts :: LabelMap Int
succ_counts = (LabelMap Int -> BlockId -> LabelMap Int)
-> LabelMap Int -> [BlockId] -> LabelMap Int
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\LabelMap Int
acc BlockId
l -> (Int -> Int -> Int)
-> BlockId -> Int -> LabelMap Int -> LabelMap Int
forall v. (v -> v -> v) -> BlockId -> v -> LabelMap v -> LabelMap v
mapInsertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) BlockId
l Int
1 LabelMap Int
acc) LabelMap Int
forall v. LabelMap v
mapEmpty [BlockId]
all_succs

--
-- filter the list of assignments to remove any assignments that
-- are not live in a continuation.
--
filterAssignments :: Platform -> LRegSet -> Assignments -> Assignments
filterAssignments :: Platform -> LRegSet -> Assignments -> Assignments
filterAssignments Platform
platform LRegSet
live Assignments
assigs = Assignments -> Assignments
forall a. [a] -> [a]
reverse (Assignments -> Assignments -> Assignments
go Assignments
assigs [])
  where go :: Assignments -> Assignments -> Assignments
go []             Assignments
kept = Assignments
kept
        go (a :: Assignment
a@(LocalReg
r,CmmExpr
_,AbsMem
_):Assignments
as) Assignments
kept | Bool
needed    = Assignments -> Assignments -> Assignments
go Assignments
as (Assignment
aAssignment -> Assignments -> Assignments
forall a. a -> [a] -> [a]
:Assignments
kept)
                               | Bool
otherwise = Assignments -> Assignments -> Assignments
go Assignments
as Assignments
kept
           where
              needed :: Bool
needed = LocalReg
r LocalReg -> LRegSet -> Bool
`elemLRegSet` LRegSet
live
                       Bool -> Bool -> Bool
|| (CmmNode O O -> Bool) -> [CmmNode O O] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Platform -> Assignment -> CmmNode O O -> Bool
forall (x :: Extensibility).
Platform -> Assignment -> CmmNode O x -> Bool
conflicts Platform
platform Assignment
a) ((Assignment -> CmmNode O O) -> Assignments -> [CmmNode O O]
forall a b. (a -> b) -> [a] -> [b]
map Assignment -> CmmNode O O
toNode Assignments
kept)
                       --  Note that we must keep assignments that are
                       -- referred to by other assignments we have
                       -- already kept.

-- -----------------------------------------------------------------------------
-- Walk through the nodes of a block, sinking and inlining assignments
-- as we go.
--
-- On input we pass in a:
--    * list of nodes in the block
--    * a list of assignments that appeared *before* this block and
--      that are being sunk.
--
-- On output we get:
--    * a new block
--    * a list of assignments that will be placed *after* that block.
--

walk :: CmmConfig
     -> [(LRegSet, CmmNode O O)]    -- nodes of the block, annotated with
                                        -- the set of registers live *after*
                                        -- this node.

     -> Assignments                     -- The current list of
                                        -- assignments we are sinking.
                                        -- Earlier assignments may refer
                                        -- to later ones.

     -> UniqSM ( Block CmmNode O O             -- The new block
               , Assignments                   -- Assignments to sink further
               )

walk :: CmmConfig
-> [(LRegSet, CmmNode O O)]
-> Assignments
-> UniqSM (Block CmmNode O O, Assignments)
walk CmmConfig
cfg [(LRegSet, CmmNode O O)]
nodes Assignments
assigs = [(LRegSet, CmmNode O O)]
-> Block CmmNode O O
-> Assignments
-> UniqSM (Block CmmNode O O, Assignments)
go [(LRegSet, CmmNode O O)]
nodes Block CmmNode O O
forall (n :: Extensibility -> Extensibility -> *). Block n O O
emptyBlock Assignments
assigs
 where
   platform :: Platform
platform = CmmConfig -> Platform
cmmPlatform CmmConfig
cfg
   go :: [(LRegSet, CmmNode O O)]
-> Block CmmNode O O
-> Assignments
-> UniqSM (Block CmmNode O O, Assignments)
go []               Block CmmNode O O
block Assignments
as = (Block CmmNode O O, Assignments)
-> UniqSM (Block CmmNode O O, Assignments)
forall a. a -> UniqSM a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Block CmmNode O O
block, Assignments
as)
   go ((LRegSet
live,CmmNode O O
node):[(LRegSet, CmmNode O O)]
ns) Block CmmNode O O
block Assignments
as
    -- discard nodes representing dead assignment
    | CmmNode O O -> LRegSet -> Bool
forall (e :: Extensibility) (x :: Extensibility).
CmmNode e x -> LRegSet -> Bool
shouldDiscard CmmNode O O
node LRegSet
live             = [(LRegSet, CmmNode O O)]
-> Block CmmNode O O
-> Assignments
-> UniqSM (Block CmmNode O O, Assignments)
go [(LRegSet, CmmNode O O)]
ns Block CmmNode O O
block Assignments
as
    | Bool
otherwise = do
      (prepend, node1) <- CmmConfig
-> Opt (CmmNode O O) -> UniqSM ([CmmNode O O], CmmNode O O)
forall a. CmmConfig -> Opt a -> UniqSM ([CmmNode O O], a)
runOpt CmmConfig
cfg (Opt (CmmNode O O) -> UniqSM ([CmmNode O O], CmmNode O O))
-> Opt (CmmNode O O) -> UniqSM ([CmmNode O O], CmmNode O O)
forall a b. (a -> b) -> a -> b
$ CmmNode O O -> Opt (CmmNode O O)
forall (e :: Extensibility) (x :: Extensibility).
CmmNode e x -> Opt (CmmNode e x)
constantFoldNode CmmNode O O
node
      if not (null prepend)
        then go (annotate platform live (prepend ++ [node1]) ++ ns) block as
        else do
          let -- Inline assignments
              (node2, as1) = tryToInline platform live node1 as
              -- Drop any earlier assignments conflicting with node2
              (dropped, as') = dropAssignmentsSimple platform
                                (\Assignment
a -> Platform -> Assignment -> CmmNode O O -> Bool
forall (x :: Extensibility).
Platform -> Assignment -> CmmNode O x -> Bool
conflicts Platform
platform Assignment
a CmmNode O O
node2) as1
              -- Walk over the rest of the block. Includes dropped assignments
              block' = (Block CmmNode O O -> CmmNode O O -> Block CmmNode O O)
-> Block CmmNode O O -> [CmmNode O O] -> Block CmmNode O O
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Block CmmNode O O -> CmmNode O O -> Block CmmNode O O
forall (n :: Extensibility -> Extensibility -> *)
       (e :: Extensibility).
Block n e O -> n O O -> Block n e O
blockSnoc Block CmmNode O O
block [CmmNode O O]
dropped Block CmmNode O O -> CmmNode O O -> Block CmmNode O O
forall (n :: Extensibility -> Extensibility -> *)
       (e :: Extensibility).
Block n e O -> n O O -> Block n e O
`blockSnoc` CmmNode O O
node2

          (prepend2, node3) <- runOpt cfg $ constantFoldNode node2
          if | not (null prepend2)                 -> go (annotate platform live (prepend2 ++ [node3]) ++ ns) block as
             -- sometimes only after simplification we can tell we can discard the node.
             -- See Note [Discard simplified nodes]
             | noOpAssignment node3                -> go ns block as
             -- Pick up interesting assignments
             | Just a <- shouldSink platform node3 -> go ns block (a : as1)
             -- Try inlining, drop assignments and move on
             | otherwise                           -> go ns block' as'

{- Note [Discard simplified nodes]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider a sequence like this:

      _c1::P64 = R1;
      _c3::I64 = I64[_c1::P64 + 1];
      R1 = _c1::P64;
      P64[Sp - 72] = _c1::P64;
      I64[Sp - 64] = _c3::I64;

If we discard assignments *before* simplifying nodes when we get to `R1 = _c1`.
This is then simplified into `R1 = `R1` and as a consequence prevents sinking of
loads from R1. What happens is that we:
    * Check if we can discard the node `R1 = _c1 (no)
    * Simplify the node to R1 = R1
    * We check all remaining assignments for conflicts.
    * The assignment `_c3 = [R1 + 1]`; (R1 already inlined on pickup)
      conflicts with R1 = R1, because it reads `R1` and the node writes
      to R1
    * This is clearly nonsensical because `R1 = R1` doesn't affect R1's value.

The solutions is to check if we can discard nodes before and *after* simplifying
them. We could only do it after as well, but I assume doing it early might save
some work.

That is if we process a assignment node we now:
    * Check if it can be discarded (because it's dead or a no-op)
    * Simplify the rhs of the assignment.
    * New: Check again if it might be a no-op now.
    * ...

This can help with problems like the one reported in #20334. For a full example see the test
cmm_sink_sp.

-}

--
-- Heuristic to decide whether to pick up and sink an assignment
-- Currently we pick up all assignments to local registers.  It might
-- be profitable to sink assignments to global regs too, but the
-- liveness analysis doesn't track those (yet) so we can't.
--
shouldSink :: Platform -> CmmNode e x -> Maybe Assignment
shouldSink :: forall (e :: Extensibility) (x :: Extensibility).
Platform -> CmmNode e x -> Maybe Assignment
shouldSink Platform
platform (CmmAssign (CmmLocal LocalReg
r) CmmExpr
e) | Bool
no_local_regs = Assignment -> Maybe Assignment
forall a. a -> Maybe a
Just (LocalReg
r, CmmExpr
e, Platform -> CmmExpr -> AbsMem
exprMem Platform
platform CmmExpr
e)
  where no_local_regs :: Bool
no_local_regs = Bool
True -- foldRegsUsed (\_ _ -> False) True e
shouldSink Platform
_ CmmNode e x
_other = Maybe Assignment
forall a. Maybe a
Nothing

--
-- discard dead assignments.  This doesn't do as good a job as
-- removeDeadAssignments, because it would need multiple passes
-- to get all the dead code, but it catches the common case of
-- superfluous reloads from the stack that the stack allocator
-- leaves behind.
--
-- Also we catch "r = r" here.  You might think it would fall
-- out of inlining, but the inliner will see that r is live
-- after the instruction and choose not to inline r in the rhs.
--
shouldDiscard :: CmmNode e x -> LRegSet -> Bool
shouldDiscard :: forall (e :: Extensibility) (x :: Extensibility).
CmmNode e x -> LRegSet -> Bool
shouldDiscard CmmNode e x
node LRegSet
live
   = case CmmNode e x
node of
       -- r = r
       CmmAssign CmmReg
r (CmmReg CmmReg
r') | CmmReg
r CmmReg -> CmmReg -> Bool
forall a. Eq a => a -> a -> Bool
== CmmReg
r' -> Bool
True
       -- r = e, r is dead after assignment
       CmmAssign (CmmLocal LocalReg
r) CmmExpr
_ -> Bool -> Bool
not (LocalReg
r LocalReg -> LRegSet -> Bool
`elemLRegSet` LRegSet
live)
       CmmNode e x
_otherwise -> Bool
False

noOpAssignment :: CmmNode e x -> Bool
noOpAssignment :: forall (e :: Extensibility) (x :: Extensibility).
CmmNode e x -> Bool
noOpAssignment CmmNode e x
node
   = case CmmNode e x
node of
       -- r = r
       CmmAssign CmmReg
r (CmmReg CmmReg
r') | CmmReg
r CmmReg -> CmmReg -> Bool
forall a. Eq a => a -> a -> Bool
== CmmReg
r' -> Bool
True
       CmmNode e x
_otherwise -> Bool
False


toNode :: Assignment -> CmmNode O O
toNode :: Assignment -> CmmNode O O
toNode (LocalReg
r,CmmExpr
rhs,AbsMem
_) = CmmReg -> CmmExpr -> CmmNode O O
CmmAssign (LocalReg -> CmmReg
CmmLocal LocalReg
r) CmmExpr
rhs

dropAssignmentsSimple :: Platform -> (Assignment -> Bool) -> Assignments
                      -> ([CmmNode O O], Assignments)
dropAssignmentsSimple :: Platform
-> (Assignment -> Bool)
-> Assignments
-> ([CmmNode O O], Assignments)
dropAssignmentsSimple Platform
platform Assignment -> Bool
f = Platform
-> (Assignment -> () -> (Bool, ()))
-> ()
-> Assignments
-> ([CmmNode O O], Assignments)
forall s.
Platform
-> (Assignment -> s -> (Bool, s))
-> s
-> Assignments
-> ([CmmNode O O], Assignments)
dropAssignments Platform
platform (\Assignment
a ()
_ -> (Assignment -> Bool
f Assignment
a, ())) ()

dropAssignments :: Platform -> (Assignment -> s -> (Bool, s)) -> s -> Assignments
                -> ([CmmNode O O], Assignments)
dropAssignments :: forall s.
Platform
-> (Assignment -> s -> (Bool, s))
-> s
-> Assignments
-> ([CmmNode O O], Assignments)
dropAssignments Platform
platform Assignment -> s -> (Bool, s)
should_drop s
state Assignments
assigs
 = ([CmmNode O O]
dropped, Assignments -> Assignments
forall a. [a] -> [a]
reverse Assignments
kept)
 where
   ([CmmNode O O]
dropped,Assignments
kept) = s
-> Assignments
-> [CmmNode O O]
-> Assignments
-> ([CmmNode O O], Assignments)
go s
state Assignments
assigs [] []

   go :: s
-> Assignments
-> [CmmNode O O]
-> Assignments
-> ([CmmNode O O], Assignments)
go s
_ []             [CmmNode O O]
dropped Assignments
kept = ([CmmNode O O]
dropped, Assignments
kept)
   go s
state (Assignment
assig : Assignments
rest) [CmmNode O O]
dropped Assignments
kept
      | Bool
conflict  =
          let !node :: CmmNode O O
node = Assignment -> CmmNode O O
toNode Assignment
assig
          in  s
-> Assignments
-> [CmmNode O O]
-> Assignments
-> ([CmmNode O O], Assignments)
go s
state' Assignments
rest (CmmNode O O
node CmmNode O O -> [CmmNode O O] -> [CmmNode O O]
forall a. a -> [a] -> [a]
: [CmmNode O O]
dropped) Assignments
kept
      | Bool
otherwise = s
-> Assignments
-> [CmmNode O O]
-> Assignments
-> ([CmmNode O O], Assignments)
go s
state' Assignments
rest [CmmNode O O]
dropped (Assignment
assigAssignment -> Assignments -> Assignments
forall a. a -> [a] -> [a]
:Assignments
kept)
      where
        (Bool
dropit, s
state') = Assignment -> s -> (Bool, s)
should_drop Assignment
assig s
state
        conflict :: Bool
conflict = Bool
dropit Bool -> Bool -> Bool
|| (CmmNode O O -> Bool) -> [CmmNode O O] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Platform -> Assignment -> CmmNode O O -> Bool
forall (x :: Extensibility).
Platform -> Assignment -> CmmNode O x -> Bool
conflicts Platform
platform Assignment
assig) [CmmNode O O]
dropped


-- -----------------------------------------------------------------------------
-- Try to inline assignments into a node.
-- This also does constant folding for primops, since
-- inlining opens up opportunities for doing so.

tryToInline
   :: forall x. Platform
   -> LRegSet               -- set of registers live after this
                                -- node.  We cannot inline anything
                                -- that is live after the node, unless
                                -- it is small enough to duplicate.
   -> CmmNode O x               -- The node to inline into
   -> Assignments               -- Assignments to inline
   -> (
        CmmNode O x             -- New node
      , Assignments             -- Remaining assignments
      )

tryToInline :: forall (x :: Extensibility).
Platform
-> LRegSet
-> CmmNode O x
-> Assignments
-> (CmmNode O x, Assignments)
tryToInline Platform
platform LRegSet
liveAfter CmmNode O x
node Assignments
assigs =
  -- pprTrace "tryToInline assig length:" (ppr $ length assigs) $
    UniqFM LocalReg Int
-> LRegSet
-> CmmNode O x
-> LRegSet
-> Assignments
-> (CmmNode O x, Assignments)
go UniqFM LocalReg Int
usages LRegSet
liveAfter CmmNode O x
node LRegSet
emptyLRegSet Assignments
assigs
 where
  usages :: UniqFM LocalReg Int -- Maps each LocalReg to a count of how often it is used
  usages :: UniqFM LocalReg Int
usages = Platform
-> (UniqFM LocalReg Int -> LocalReg -> UniqFM LocalReg Int)
-> UniqFM LocalReg Int
-> CmmNode O x
-> UniqFM LocalReg Int
forall a b.
UserOfRegs LocalReg a =>
Platform -> (b -> LocalReg -> b) -> b -> a -> b
foldLocalRegsUsed Platform
platform UniqFM LocalReg Int -> LocalReg -> UniqFM LocalReg Int
addUsage UniqFM LocalReg Int
forall {k} (key :: k) elt. UniqFM key elt
emptyUFM CmmNode O x
node

  go :: UniqFM LocalReg Int -> LRegSet -> CmmNode O x -> LRegSet -> Assignments
     -> (CmmNode O x, Assignments)
  go :: UniqFM LocalReg Int
-> LRegSet
-> CmmNode O x
-> LRegSet
-> Assignments
-> (CmmNode O x, Assignments)
go UniqFM LocalReg Int
_usages LRegSet
_live CmmNode O x
node LRegSet
_skipped [] = (CmmNode O x
node, [])

  go UniqFM LocalReg Int
usages LRegSet
live CmmNode O x
node LRegSet
skipped (a :: Assignment
a@(LocalReg
l,CmmExpr
rhs,AbsMem
_) : Assignments
rest)
   | Bool
cannot_inline            = (CmmNode O x, Assignments)
dont_inline
   | Bool
occurs_none              = (CmmNode O x, Assignments)
discard  -- See Note [discard during inlining]
   | Bool
occurs_once              = (CmmNode O x, Assignments)
inline_and_discard
   | Platform -> CmmExpr -> Bool
isTrivial Platform
platform CmmExpr
rhs   = (CmmNode O x, Assignments)
inline_and_keep
   | Bool
otherwise                = (CmmNode O x, Assignments)
dont_inline
   where
        inline_and_discard :: (CmmNode O x, Assignments)
inline_and_discard = UniqFM LocalReg Int
-> LRegSet
-> CmmNode O x
-> LRegSet
-> Assignments
-> (CmmNode O x, Assignments)
go UniqFM LocalReg Int
usages' LRegSet
live CmmNode O x
inl_node LRegSet
skipped Assignments
rest
          where usages' :: UniqFM LocalReg Int
usages' = Platform
-> (UniqFM LocalReg Int -> LocalReg -> UniqFM LocalReg Int)
-> UniqFM LocalReg Int
-> CmmExpr
-> UniqFM LocalReg Int
forall a b.
UserOfRegs LocalReg a =>
Platform -> (b -> LocalReg -> b) -> b -> a -> b
foldLocalRegsUsed Platform
platform UniqFM LocalReg Int -> LocalReg -> UniqFM LocalReg Int
addUsage UniqFM LocalReg Int
usages CmmExpr
rhs

        discard :: (CmmNode O x, Assignments)
discard = UniqFM LocalReg Int
-> LRegSet
-> CmmNode O x
-> LRegSet
-> Assignments
-> (CmmNode O x, Assignments)
go UniqFM LocalReg Int
usages LRegSet
live CmmNode O x
node LRegSet
skipped Assignments
rest

        dont_inline :: (CmmNode O x, Assignments)
dont_inline        = CmmNode O x -> (CmmNode O x, Assignments)
keep CmmNode O x
node  -- don't inline the assignment, keep it
        inline_and_keep :: (CmmNode O x, Assignments)
inline_and_keep    = CmmNode O x -> (CmmNode O x, Assignments)
keep CmmNode O x
inl_node -- inline the assignment, keep it

        keep :: CmmNode O x -> (CmmNode O x, Assignments)
        keep :: CmmNode O x -> (CmmNode O x, Assignments)
keep CmmNode O x
node' = (CmmNode O x
final_node, Assignment
a Assignment -> Assignments -> Assignments
forall a. a -> [a] -> [a]
: Assignments
rest')
          where (CmmNode O x
final_node, Assignments
rest') = UniqFM LocalReg Int
-> LRegSet
-> CmmNode O x
-> LRegSet
-> Assignments
-> (CmmNode O x, Assignments)
go UniqFM LocalReg Int
usages LRegSet
live' CmmNode O x
node' (LocalReg -> LRegSet -> LRegSet
insertLRegSet LocalReg
l LRegSet
skipped) Assignments
rest

                -- Avoid discarding of assignments to vars on the rhs.
                -- See Note [Keeping assignments mentioned in skipped RHSs]
                -- usages' = foldLocalRegsUsed platform (\m r -> addToUFM m r 2)
                                            -- usages rhs
                live' :: LRegSet
live' = (Platform
 -> (LRegSet -> LocalReg -> LRegSet)
 -> LRegSet
 -> CmmExpr
 -> LRegSet)
-> Platform
-> (LRegSet -> LocalReg -> LRegSet)
-> LRegSet
-> CmmExpr
-> LRegSet
forall a. a -> a
inline Platform
-> (LRegSet -> LocalReg -> LRegSet)
-> LRegSet
-> CmmExpr
-> LRegSet
forall a b.
UserOfRegs LocalReg a =>
Platform -> (b -> LocalReg -> b) -> b -> a -> b
foldLocalRegsUsed Platform
platform (\LRegSet
m LocalReg
r -> LocalReg -> LRegSet -> LRegSet
insertLRegSet LocalReg
r LRegSet
m)
                                            LRegSet
live CmmExpr
rhs

        cannot_inline :: Bool
cannot_inline = LRegSet
skipped LRegSet -> CmmExpr -> Bool
`regsUsedIn` CmmExpr
rhs -- See Note [dependent assignments]
                        Bool -> Bool -> Bool
|| LocalReg
l LocalReg -> LRegSet -> Bool
`elemLRegSet` LRegSet
skipped
                        Bool -> Bool -> Bool
|| Bool -> Bool
not (Platform -> CmmExpr -> CmmNode O x -> Bool
forall (e :: Extensibility) (x :: Extensibility).
Platform -> CmmExpr -> CmmNode e x -> Bool
okToInline Platform
platform CmmExpr
rhs CmmNode O x
node)

        -- How often is l used in the current node.
        l_usages :: Maybe Int
l_usages = UniqFM LocalReg Int -> LocalReg -> Maybe Int
forall key elt. Uniquable key => UniqFM key elt -> key -> Maybe elt
lookupUFM UniqFM LocalReg Int
usages LocalReg
l
        l_live :: Bool
l_live   = LocalReg
l LocalReg -> LRegSet -> Bool
`elemLRegSet` LRegSet
live

        occurs_once :: Bool
occurs_once = Bool -> Bool
not Bool
l_live Bool -> Bool -> Bool
&& Maybe Int
l_usages Maybe Int -> Maybe Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Maybe Int
forall a. a -> Maybe a
Just Int
1
        occurs_none :: Bool
occurs_none = Bool -> Bool
not Bool
l_live Bool -> Bool -> Bool
&& Maybe Int
l_usages Maybe Int -> Maybe Int -> Bool
forall a. Eq a => a -> a -> Bool
== Maybe Int
forall a. Maybe a
Nothing

        inl_node :: CmmNode O x
inl_node = CmmNode O x -> CmmNode O x
forall (x :: Extensibility). CmmNode O x -> CmmNode O x
improveConditional ((CmmExpr -> CmmExpr) -> CmmNode O x -> CmmNode O x
forall (e :: Extensibility) (x :: Extensibility).
(CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
mapExpDeep CmmExpr -> CmmExpr
inl_exp CmmNode O x
node)

        inl_exp :: CmmExpr -> CmmExpr
        -- inl_exp is where the inlining actually takes place!
        inl_exp :: CmmExpr -> CmmExpr
inl_exp (CmmReg    (CmmLocal LocalReg
l'))     | LocalReg
l LocalReg -> LocalReg -> Bool
forall a. Eq a => a -> a -> Bool
== LocalReg
l' = CmmExpr
rhs
        inl_exp (CmmRegOff (CmmLocal LocalReg
l') Int
off) | LocalReg
l LocalReg -> LocalReg -> Bool
forall a. Eq a => a -> a -> Bool
== LocalReg
l'
                    = Platform -> CmmExpr -> Int -> CmmExpr
cmmOffset Platform
platform CmmExpr
rhs Int
off
                    -- re-constant fold after inlining
        inl_exp (CmmMachOp MachOp
op [CmmExpr]
args) = Platform -> MachOp -> [CmmExpr] -> CmmExpr
cmmMachOpFold Platform
platform MachOp
op [CmmExpr]
args
        inl_exp CmmExpr
other = CmmExpr
other

{- Note [Keeping assignments mentioned in skipped RHSs]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    If we have two assignments: [z = y, y = e1] and we skip
    z we *must* retain the assignment y = e1. This is because
    we might inline "z = y" into another node later on so we
    must ensure y is still defined at this point.

    If we dropped the assignment of "y = e1" then we would end up
    referencing a variable which hasn't been mentioned after
    inlining.

    We use a hack to do this.

    We pretend the regs from the rhs are live after the current
    node. Since we only discard assignments to variables
    which are dead after the current block this prevents discarding of the
    assignment. It still allows inlining should e1 be a trivial rhs
    however.

-}

{- Note [improveConditional]
   ~~~~~~~~~~~~~~~~~~~~~~~~~
cmmMachOpFold tries to simplify conditionals to turn things like
  (a == b) != 1
into
  (a != b)
but there's one case it can't handle: when the comparison is over
floating-point values, we can't invert it, because floating-point
comparisons aren't invertible (because of NaNs).

But we *can* optimise this conditional by swapping the true and false
branches. Given
  CmmCondBranch ((a >## b) != 1) t f
we can turn it into
  CmmCondBranch (a >## b) f t

So here we catch conditionals that weren't optimised by cmmMachOpFold,
and apply above transformation to eliminate the comparison against 1.

It's tempting to just turn every != into == and then let cmmMachOpFold
do its thing, but that risks changing a nice fall-through conditional
into one that requires two jumps. (see swapcond_last in
GHC.Cmm.ContFlowOpt), so instead we carefully look for just the cases where
we can eliminate a comparison.
-}
improveConditional :: CmmNode O x -> CmmNode O x
improveConditional :: forall (x :: Extensibility). CmmNode O x -> CmmNode O x
improveConditional
  (CmmCondBranch (CmmMachOp MachOp
mop [CmmExpr
x, CmmLit (CmmInt Integer
1 Width
_)]) BlockId
t BlockId
f Maybe Bool
l)
  | MachOp -> Bool
neLike MachOp
mop, CmmExpr -> Bool
isComparisonExpr CmmExpr
x
  = CmmExpr -> BlockId -> BlockId -> Maybe Bool -> CmmNode O C
CmmCondBranch CmmExpr
x BlockId
f BlockId
t ((Bool -> Bool) -> Maybe Bool -> Maybe Bool
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Bool -> Bool
not Maybe Bool
l)
  where
    neLike :: MachOp -> Bool
neLike (MO_Ne Width
_) = Bool
True
    neLike (MO_U_Lt Width
_) = Bool
True   -- (x<y) < 1 behaves like (x<y) != 1
    neLike (MO_S_Lt Width
_) = Bool
True   -- (x<y) < 1 behaves like (x<y) != 1
    neLike MachOp
_ = Bool
False
improveConditional CmmNode O x
other = CmmNode O x
other

-- Note [dependent assignments]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- If our assignment list looks like
--
--    [ y = e,  x = ... y ... ]
--
-- We cannot inline x.  Remember this list is really in reverse order,
-- so it means  x = ... y ...; y = e
--
-- Hence if we inline x, the outer assignment to y will capture the
-- reference in x's right hand side.
--
-- In this case we should rename the y in x's right-hand side,
-- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ]
-- Now we can go ahead and inline x.
--
-- For now we do nothing, because this would require putting
-- everything inside UniqSM.
--
-- One more variant of this (#7366):
--
--   [ y = e, y = z ]
--
-- If we don't want to inline y = e, because y is used many times, we
-- might still be tempted to inline y = z (because we always inline
-- trivial rhs's).  But of course we can't, because y is equal to e,
-- not z.

-- Note [discard during inlining]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Opportunities to discard assignments sometimes appear after we've
-- done some inlining.  Here's an example:
--
--      x = R1;
--      y = P64[x + 7];
--      z = P64[x + 15];
--      /* z is dead */
--      R1 = y & (-8);
--
-- The x assignment is trivial, so we inline it in the RHS of y, and
-- keep both x and y.  z gets dropped because it is dead, then we
-- inline y, and we have a dead assignment to x.  If we don't notice
-- that x is dead in tryToInline, we end up retaining it.

addUsage :: UniqFM LocalReg Int -> LocalReg -> UniqFM LocalReg Int
addUsage :: UniqFM LocalReg Int -> LocalReg -> UniqFM LocalReg Int
addUsage UniqFM LocalReg Int
m LocalReg
r = (Int -> Int -> Int)
-> UniqFM LocalReg Int -> LocalReg -> Int -> UniqFM LocalReg Int
forall key elt.
Uniquable key =>
(elt -> elt -> elt)
-> UniqFM key elt -> key -> elt -> UniqFM key elt
addToUFM_C Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) UniqFM LocalReg Int
m LocalReg
r Int
1

regsUsedIn :: LRegSet -> CmmExpr -> Bool
regsUsedIn :: LRegSet -> CmmExpr -> Bool
regsUsedIn LRegSet
ls CmmExpr
_ | LRegSet -> Bool
nullLRegSet LRegSet
ls = Bool
False
regsUsedIn LRegSet
ls CmmExpr
e = LRegSet -> CmmExpr -> Bool -> Bool
go LRegSet
ls CmmExpr
e Bool
False
  where use :: LRegSet -> CmmExpr -> Bool -> Bool
        use :: LRegSet -> CmmExpr -> Bool -> Bool
use LRegSet
ls (CmmReg (CmmLocal LocalReg
l))      Bool
_ | LocalReg
l LocalReg -> LRegSet -> Bool
`elemLRegSet` LRegSet
ls = Bool
True
        use LRegSet
ls (CmmRegOff (CmmLocal LocalReg
l) Int
_) Bool
_ | LocalReg
l LocalReg -> LRegSet -> Bool
`elemLRegSet` LRegSet
ls = Bool
True
        use LRegSet
_ls CmmExpr
_ Bool
z = Bool
z

        go :: LRegSet -> CmmExpr -> Bool -> Bool
        go :: LRegSet -> CmmExpr -> Bool -> Bool
go LRegSet
ls (CmmMachOp MachOp
_ [CmmExpr]
es)   Bool
z = (CmmExpr -> Bool -> Bool) -> Bool -> [CmmExpr] -> Bool
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (LRegSet -> CmmExpr -> Bool -> Bool
go LRegSet
ls) Bool
z [CmmExpr]
es
        go LRegSet
ls (CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_) Bool
z = LRegSet -> CmmExpr -> Bool -> Bool
go LRegSet
ls CmmExpr
addr Bool
z
        go LRegSet
ls CmmExpr
e                  Bool
z = LRegSet -> CmmExpr -> Bool -> Bool
use LRegSet
ls CmmExpr
e Bool
z

-- we don't inline into CmmUnsafeForeignCall if the expression refers
-- to global registers.  This is a HACK to avoid global registers
-- clashing with C argument-passing registers, really the back-end
-- ought to be able to handle it properly, but currently neither PprC
-- nor the NCG can do it.  See Note [Register parameter passing]
-- See also GHC.StgToCmm.Foreign.load_args_into_temps.
okToInline :: Platform -> CmmExpr -> CmmNode e x -> Bool
okToInline :: forall (e :: Extensibility) (x :: Extensibility).
Platform -> CmmExpr -> CmmNode e x -> Bool
okToInline Platform
platform CmmExpr
expr node :: CmmNode e x
node@(CmmUnsafeForeignCall{}) =
    Bool -> Bool
not (Platform -> CmmExpr -> CmmNode e x -> Bool
forall (e :: Extensibility) (x :: Extensibility).
Platform -> CmmExpr -> CmmNode e x -> Bool
globalRegistersConflict Platform
platform CmmExpr
expr CmmNode e x
node)
okToInline Platform
_ CmmExpr
_ CmmNode e x
_ = Bool
True

-- -----------------------------------------------------------------------------

-- | @conflicts (r,e) node@ is @False@ if and only if the assignment
-- @r = e@ can be safely commuted past statement @node@.
conflicts :: Platform -> Assignment -> CmmNode O x -> Bool
conflicts :: forall (x :: Extensibility).
Platform -> Assignment -> CmmNode O x -> Bool
conflicts Platform
platform (LocalReg
r, CmmExpr
rhs, AbsMem
addr) CmmNode O x
node

  -- (1) node defines registers used by rhs of assignment. This catches
  -- assignments and all three kinds of calls. See Note [Sinking and calls]
  | Platform -> CmmExpr -> CmmNode O x -> Bool
forall (e :: Extensibility) (x :: Extensibility).
Platform -> CmmExpr -> CmmNode e x -> Bool
globalRegistersConflict Platform
platform CmmExpr
rhs CmmNode O x
node                       = Bool
True
  | Platform -> CmmExpr -> CmmNode O x -> Bool
forall (e :: Extensibility) (x :: Extensibility).
Platform -> CmmExpr -> CmmNode e x -> Bool
localRegistersConflict  Platform
platform CmmExpr
rhs CmmNode O x
node                       = Bool
True

  -- (2) node uses register defined by assignment
  | Platform
-> (Bool -> LocalReg -> Bool) -> Bool -> CmmNode O x -> Bool
forall b. Platform -> (b -> LocalReg -> b) -> b -> CmmNode O x -> b
forall r a b.
UserOfRegs r a =>
Platform -> (b -> r -> b) -> b -> a -> b
foldRegsUsed Platform
platform (\Bool
b LocalReg
r' -> LocalReg
r LocalReg -> LocalReg -> Bool
forall a. Eq a => a -> a -> Bool
== LocalReg
r' Bool -> Bool -> Bool
|| Bool
b) Bool
False CmmNode O x
node        = Bool
True

  -- (3) a store to an address conflicts with a read of the same memory
  | CmmStore CmmExpr
addr' CmmExpr
e AlignmentSpec
_ <- CmmNode O x
node
  , AbsMem -> AbsMem -> Bool
memConflicts AbsMem
addr (Platform -> CmmExpr -> Width -> AbsMem
loadAddr Platform
platform CmmExpr
addr' (Platform -> CmmExpr -> Width
cmmExprWidth Platform
platform CmmExpr
e)) = Bool
True

  -- (4) an assignment to Hp/Sp conflicts with a heap/stack read respectively
  | AbsMem
HeapMem    <- AbsMem
addr, CmmAssign (CmmGlobal (GlobalRegUse GlobalReg
Hp CmmType
_)) CmmExpr
_ <- CmmNode O x
node        = Bool
True
  | AbsMem
StackMem   <- AbsMem
addr, CmmAssign (CmmGlobal (GlobalRegUse GlobalReg
Sp CmmType
_)) CmmExpr
_ <- CmmNode O x
node        = Bool
True
  | SpMem{}    <- AbsMem
addr, CmmAssign (CmmGlobal (GlobalRegUse GlobalReg
Sp CmmType
_)) CmmExpr
_ <- CmmNode O x
node        = Bool
True

  -- (5) foreign calls clobber heap: see Note [Foreign calls clobber heap]
  | CmmUnsafeForeignCall{} <- CmmNode O x
node, AbsMem -> AbsMem -> Bool
memConflicts AbsMem
addr AbsMem
AnyMem      = Bool
True

  -- (6) suspendThread clobbers every global register not backed by a real
  -- register. It also clobbers heap and stack but this is handled by (5)
  | CmmUnsafeForeignCall (PrimTarget CallishMachOp
MO_SuspendThread) [LocalReg]
_ [CmmExpr]
_ <- CmmNode O x
node
  , Platform -> (Bool -> GlobalReg -> Bool) -> Bool -> CmmExpr -> Bool
forall b. Platform -> (b -> GlobalReg -> b) -> b -> CmmExpr -> b
forall r a b.
UserOfRegs r a =>
Platform -> (b -> r -> b) -> b -> a -> b
foldRegsUsed Platform
platform (\Bool
b GlobalReg
g -> Platform -> GlobalReg -> Maybe RealReg
globalRegMaybe Platform
platform GlobalReg
g Maybe RealReg -> Maybe RealReg -> Bool
forall a. Eq a => a -> a -> Bool
== Maybe RealReg
forall a. Maybe a
Nothing Bool -> Bool -> Bool
|| Bool
b) Bool
False CmmExpr
rhs
  = Bool
True

  -- (7) native calls clobber any memory
  | CmmCall{} <- CmmNode O x
node, AbsMem -> AbsMem -> Bool
memConflicts AbsMem
addr AbsMem
AnyMem                   = Bool
True

  -- (8) otherwise, no conflict
  | Bool
otherwise = Bool
False

{- Note [Inlining foldRegsDefd]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   foldRegsDefd is, after optimization, *not* a small function so
   it's only marked INLINEABLE, but not INLINE.

   However in some specific cases we call it *very* often making it
   important to avoid the overhead of allocating the folding function.

   So we simply force inlining via the magic inline function.
   For T3294 this improves allocation with -O by ~1%.

-}

-- Returns True if node defines any global registers that are used in the
-- Cmm expression
globalRegistersConflict :: Platform -> CmmExpr -> CmmNode e x -> Bool
globalRegistersConflict :: forall (e :: Extensibility) (x :: Extensibility).
Platform -> CmmExpr -> CmmNode e x -> Bool
globalRegistersConflict Platform
platform CmmExpr
expr CmmNode e x
node =
   -- See Note [Inlining foldRegsDefd]
   (Platform
 -> (Bool -> GlobalReg -> Bool) -> Bool -> CmmNode e x -> Bool)
-> Platform
-> (Bool -> GlobalReg -> Bool)
-> Bool
-> CmmNode e x
-> Bool
forall a. a -> a
inline Platform
-> (Bool -> GlobalReg -> Bool) -> Bool -> CmmNode e x -> Bool
forall b.
Platform -> (b -> GlobalReg -> b) -> b -> CmmNode e x -> b
forall r a b.
DefinerOfRegs r a =>
Platform -> (b -> r -> b) -> b -> a -> b
foldRegsDefd Platform
platform (\Bool
b GlobalReg
r -> Bool
b Bool -> Bool -> Bool
|| Platform -> GlobalReg -> CmmExpr -> Bool
globalRegUsedIn Platform
platform GlobalReg
r CmmExpr
expr)
                Bool
False CmmNode e x
node

-- Returns True if node defines any local registers that are used in the
-- Cmm expression
localRegistersConflict :: Platform -> CmmExpr -> CmmNode e x -> Bool
localRegistersConflict :: forall (e :: Extensibility) (x :: Extensibility).
Platform -> CmmExpr -> CmmNode e x -> Bool
localRegistersConflict Platform
platform CmmExpr
expr CmmNode e x
node =
    -- See Note [Inlining foldRegsDefd]
    (Platform
 -> (Bool -> LocalReg -> Bool) -> Bool -> CmmNode e x -> Bool)
-> Platform
-> (Bool -> LocalReg -> Bool)
-> Bool
-> CmmNode e x
-> Bool
forall a. a -> a
inline Platform
-> (Bool -> LocalReg -> Bool) -> Bool -> CmmNode e x -> Bool
forall b. Platform -> (b -> LocalReg -> b) -> b -> CmmNode e x -> b
forall r a b.
DefinerOfRegs r a =>
Platform -> (b -> r -> b) -> b -> a -> b
foldRegsDefd Platform
platform (\Bool
b LocalReg
r -> Bool
b Bool -> Bool -> Bool
|| Platform -> CmmReg -> CmmExpr -> Bool
regUsedIn Platform
platform (LocalReg -> CmmReg
CmmLocal  LocalReg
r) CmmExpr
expr)
                 Bool
False CmmNode e x
node

-- Note [Sinking and calls]
-- ~~~~~~~~~~~~~~~~~~~~~~~~
-- We have three kinds of calls: normal (CmmCall), safe foreign (CmmForeignCall)
-- and unsafe foreign (CmmUnsafeForeignCall). We perform sinking pass after
-- stack layout (see Note [Sinking after stack layout]) which leads to two
-- invariants related to calls:
--
--   a) during stack layout phase all safe foreign calls are turned into
--      unsafe foreign calls (see Note [Lower safe foreign calls]). This
--      means that we will never encounter CmmForeignCall node when running
--      sinking after stack layout
--
--   b) stack layout saves all variables live across a call on the stack
--      just before making a call (remember we are not sinking assignments to
--      stack):
--
--       L1:
--          x = R1
--          P64[Sp - 16] = L2
--          P64[Sp - 8]  = x
--          Sp = Sp - 16
--          call f() returns L2
--       L2:
--
--      We will attempt to sink { x = R1 } but we will detect conflict with
--      { P64[Sp - 8]  = x } and hence we will drop { x = R1 } without even
--      checking whether it conflicts with { call f() }. In this way we will
--      never need to check any assignment conflicts with CmmCall. Remember
--      that we still need to check for potential memory conflicts.
--
-- So the result is that we only need to worry about CmmUnsafeForeignCall nodes
-- when checking conflicts (see Note [Unsafe foreign calls clobber caller-save registers]).
-- This assumption holds only when we do sinking after stack layout. If we run
-- it before stack layout we need to check for possible conflicts with all three
-- kinds of calls. Our `conflicts` function does that by using a generic
-- foldRegsDefd and foldRegsUsed functions defined in DefinerOfRegs and
-- UserOfRegs typeclasses.
--

-- An abstraction of memory read or written.
data AbsMem
  = NoMem            -- no memory accessed
  | AnyMem           -- arbitrary memory
  | HeapMem          -- definitely heap memory
  | StackMem         -- definitely stack memory
  | SpMem            -- <size>[Sp+n]
       {-# UNPACK #-} !Int
       {-# UNPACK #-} !Int

-- Having SpMem is important because it lets us float loads from Sp
-- past stores to Sp as long as they don't overlap, and this helps to
-- unravel some long sequences of
--    x1 = [Sp + 8]
--    x2 = [Sp + 16]
--    ...
--    [Sp + 8]  = xi
--    [Sp + 16] = xj
--
-- Note that SpMem is invalidated if Sp is changed, but the definition
-- of 'conflicts' above handles that.

-- ToDo: this won't currently fix the following commonly occurring code:
--    x1 = [R1 + 8]
--    x2 = [R1 + 16]
--    ..
--    [Hp - 8] = x1
--    [Hp - 16] = x2
--    ..

-- because [R1 + 8] and [Hp - 8] are both HeapMem.  We know that
-- assignments to [Hp + n] do not conflict with any other heap memory,
-- but this is tricky to nail down.  What if we had
--
--   x = Hp + n
--   [x] = ...
--
--  the store to [x] should be "new heap", not "old heap".
--  Furthermore, you could imagine that if we started inlining
--  functions in Cmm then there might well be reads of heap memory
--  that was written in the same basic block.  To take advantage of
--  non-aliasing of heap memory we will have to be more clever.

-- Note [Foreign calls clobber heap]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- It is tempting to say that foreign calls clobber only
-- non-heap/stack memory, but unfortunately we break this invariant in
-- the RTS.  For example, in stg_catch_retry_frame we call
-- stmCommitNestedTransaction() which modifies the contents of the
-- TRec it is passed (this actually caused incorrect code to be
-- generated).
--
-- Since the invariant is true for the majority of foreign calls,
-- perhaps we ought to have a special annotation for calls that can
-- modify heap/stack memory.  For now we just use the conservative
-- definition here.
--
-- Some CallishMachOp imply a memory barrier e.g. AtomicRMW and
-- therefore we should never float any memory operations across one of
-- these calls.
--
-- `suspendThread` releases the capability used by the thread, hence we mustn't
-- float accesses to heap, stack or virtual global registers stored in the
-- capability (e.g. with unregisterised build, see #19237).


bothMems :: AbsMem -> AbsMem -> AbsMem
bothMems :: AbsMem -> AbsMem -> AbsMem
bothMems AbsMem
NoMem    AbsMem
x         = AbsMem
x
bothMems AbsMem
x        AbsMem
NoMem     = AbsMem
x
bothMems AbsMem
HeapMem  AbsMem
HeapMem   = AbsMem
HeapMem
bothMems AbsMem
StackMem AbsMem
StackMem     = AbsMem
StackMem
bothMems (SpMem Int
o1 Int
w1) (SpMem Int
o2 Int
w2)
  | Int
o1 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
o2  = Int -> Int -> AbsMem
SpMem Int
o1 (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
w1 Int
w2)
  | Bool
otherwise = AbsMem
StackMem
bothMems SpMem{}  AbsMem
StackMem  = AbsMem
StackMem
bothMems AbsMem
StackMem SpMem{}   = AbsMem
StackMem
bothMems AbsMem
_         AbsMem
_        = AbsMem
AnyMem

memConflicts :: AbsMem -> AbsMem -> Bool
memConflicts :: AbsMem -> AbsMem -> Bool
memConflicts AbsMem
NoMem      AbsMem
_          = Bool
False
memConflicts AbsMem
_          AbsMem
NoMem      = Bool
False
memConflicts AbsMem
HeapMem    AbsMem
StackMem   = Bool
False
memConflicts AbsMem
StackMem   AbsMem
HeapMem    = Bool
False
memConflicts SpMem{}    AbsMem
HeapMem    = Bool
False
memConflicts AbsMem
HeapMem    SpMem{}    = Bool
False
memConflicts (SpMem Int
o1 Int
w1) (SpMem Int
o2 Int
w2)
  | Int
o1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
o2   = Int
o1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
w1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
o2
  | Bool
otherwise = Int
o2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
w2 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
o1
memConflicts AbsMem
_         AbsMem
_         = Bool
True

exprMem :: Platform -> CmmExpr -> AbsMem
exprMem :: Platform -> CmmExpr -> AbsMem
exprMem Platform
platform (CmmLoad CmmExpr
addr CmmType
w AlignmentSpec
_)  = AbsMem -> AbsMem -> AbsMem
bothMems (Platform -> CmmExpr -> Width -> AbsMem
loadAddr Platform
platform CmmExpr
addr (CmmType -> Width
typeWidth CmmType
w)) (Platform -> CmmExpr -> AbsMem
exprMem Platform
platform CmmExpr
addr)
exprMem Platform
platform (CmmMachOp MachOp
_ [CmmExpr]
es)    = (AbsMem -> AbsMem -> AbsMem) -> AbsMem -> [AbsMem] -> AbsMem
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr AbsMem -> AbsMem -> AbsMem
bothMems AbsMem
NoMem ((CmmExpr -> AbsMem) -> [CmmExpr] -> [AbsMem]
forall a b. (a -> b) -> [a] -> [b]
map (Platform -> CmmExpr -> AbsMem
exprMem Platform
platform) [CmmExpr]
es)
exprMem Platform
_        CmmExpr
_                   = AbsMem
NoMem

loadAddr :: Platform -> CmmExpr -> Width -> AbsMem
loadAddr :: Platform -> CmmExpr -> Width -> AbsMem
loadAddr Platform
platform CmmExpr
e Width
w =
  case CmmExpr
e of
   CmmReg CmmReg
r       -> CmmReg -> Int -> Width -> AbsMem
regAddr CmmReg
r Int
0 Width
w
   CmmRegOff CmmReg
r Int
i  -> CmmReg -> Int -> Width -> AbsMem
regAddr CmmReg
r Int
i Width
w
   CmmExpr
_other | Platform -> CmmReg -> CmmExpr -> Bool
regUsedIn Platform
platform (Platform -> CmmReg
spReg Platform
platform) CmmExpr
e -> AbsMem
StackMem
          | Bool
otherwise                             -> AbsMem
AnyMem

regAddr :: CmmReg -> Int -> Width -> AbsMem
regAddr :: CmmReg -> Int -> Width -> AbsMem
regAddr (CmmGlobal (GlobalRegUse GlobalReg
Sp CmmType
_)) Int
i Width
w = Int -> Int -> AbsMem
SpMem Int
i (Width -> Int
widthInBytes Width
w)
regAddr (CmmGlobal (GlobalRegUse GlobalReg
Hp CmmType
_)) Int
_ Width
_ = AbsMem
HeapMem
regAddr (CmmGlobal (GlobalRegUse GlobalReg
CurrentTSO CmmType
_)) Int
_ Width
_ = AbsMem
HeapMem -- important for PrimOps
regAddr CmmReg
r Int
_ Width
_ | CmmType -> Bool
isGcPtrType (CmmReg -> CmmType
cmmRegType CmmReg
r) = AbsMem
HeapMem -- yay! GCPtr pays for itself
regAddr CmmReg
_ Int
_ Width
_ = AbsMem
AnyMem

{-
Note [Inline GlobalRegs?]
~~~~~~~~~~~~~~~~~~~~~~~~~

Should we freely inline GlobalRegs?

Actually it doesn't make a huge amount of difference either way, so we
*do* currently treat GlobalRegs as "trivial" and inline them
everywhere, but for what it's worth, here is what I discovered when I
(SimonM) looked into this:

Common sense says we should not inline GlobalRegs, because when we
have

  x = R1

the register allocator will coalesce this assignment, generating no
code, and simply record the fact that x is bound to $rbx (or
whatever).  Furthermore, if we were to sink this assignment, then the
range of code over which R1 is live increases, and the range of code
over which x is live decreases.  All things being equal, it is better
for x to be live than R1, because R1 is a fixed register whereas x can
live in any register.  So we should neither sink nor inline 'x = R1'.

However, not inlining GlobalRegs can have surprising
consequences. e.g. (cgrun020)

  c3EN:
      _s3DB::P64 = R1;
      _c3ES::P64 = _s3DB::P64 & 7;
      if (_c3ES::P64 >= 2) goto c3EU; else goto c3EV;
  c3EU:
      _s3DD::P64 = P64[_s3DB::P64 + 6];
      _s3DE::P64 = P64[_s3DB::P64 + 14];
      I64[Sp - 8] = c3F0;
      R1 = _s3DE::P64;
      P64[Sp] = _s3DD::P64;

inlining the GlobalReg gives:

  c3EN:
      if (R1 & 7 >= 2) goto c3EU; else goto c3EV;
  c3EU:
      I64[Sp - 8] = c3F0;
      _s3DD::P64 = P64[R1 + 6];
      R1 = P64[R1 + 14];
      P64[Sp] = _s3DD::P64;

but if we don't inline the GlobalReg, instead we get:

      _s3DB::P64 = R1;
      if (_s3DB::P64 & 7 >= 2) goto c3EU; else goto c3EV;
  c3EU:
      I64[Sp - 8] = c3F0;
      R1 = P64[_s3DB::P64 + 14];
      P64[Sp] = P64[_s3DB::P64 + 6];

This looks better - we managed to inline _s3DD - but in fact it
generates an extra reg-reg move:

.Lc3EU:
        movq $c3F0_info,-8(%rbp)
        movq %rbx,%rax
        movq 14(%rbx),%rbx
        movq 6(%rax),%rax
        movq %rax,(%rbp)

because _s3DB is now live across the R1 assignment, we lost the
benefit of coalescing.

Who is at fault here?  Perhaps if we knew that _s3DB was an alias for
R1, then we would not sink a reference to _s3DB past the R1
assignment.  Or perhaps we *should* do that - we might gain by sinking
it, despite losing the coalescing opportunity.

Sometimes not inlining global registers wins by virtue of the rule
about not inlining into arguments of a foreign call, e.g. (T7163) this
is what happens when we inlined F1:

      _s3L2::F32 = F1;
      _c3O3::F32 = %MO_F_Mul_W32(F1, 10.0 :: W32);
      (_s3L7::F32) = call "ccall" arg hints:  []  result hints:  [] rintFloat(_c3O3::F32);

but if we don't inline F1:

      (_s3L7::F32) = call "ccall" arg hints:  []  result hints:  [] rintFloat(%MO_F_Mul_W32(_s3L2::F32,
                                                                                            10.0 :: W32));
-}