{-# LANGUAGE GADTs #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE MultiWayIf #-} module GHC.Cmm.Sink ( cmmSink ) where import GHC.Prelude import GHC.Cmm import GHC.Cmm.Opt import GHC.Cmm.Liveness import GHC.Cmm.LRegSet import GHC.Cmm.Utils import GHC.Cmm.Dataflow.Block import GHC.Cmm.Dataflow.Label import GHC.Cmm.Dataflow.Graph import GHC.Platform.Regs import GHC.Platform import GHC.Types.Unique.FM import GHC.Types.Unique.Supply import GHC.Cmm.Config import Data.List (partition) import Data.Maybe import GHC.Exts (inline) -- ----------------------------------------------------------------------------- -- Sinking and inlining -- This is an optimisation pass that -- (a) moves assignments closer to their uses, to reduce register pressure -- (b) pushes assignments into a single branch of a conditional if possible -- (c) inlines assignments to registers that are mentioned only once -- (d) discards dead assignments -- -- This tightens up lots of register-heavy code. It is particularly -- helpful in the Cmm generated by the Stg->Cmm code generator, in -- which every function starts with a copyIn sequence like: -- -- x1 = R1 -- x2 = Sp[8] -- x3 = Sp[16] -- if (Sp - 32 < SpLim) then L1 else L2 -- -- we really want to push the x1..x3 assignments into the L2 branch. -- -- Algorithm: -- -- * Start by doing liveness analysis. -- -- * Keep a list of assignments A; earlier ones may refer to later ones. -- Currently we only sink assignments to local registers, because we don't -- have liveness information about global registers. -- -- * Walk forwards through the graph, look at each node N: -- -- * If it is a dead assignment, i.e. assignment to a register that is -- not used after N, discard it. -- -- * Try to inline based on current list of assignments -- * If any assignments in A (1) occur only once in N, and (2) are -- not live after N, inline the assignment and remove it -- from A. -- -- * If an assignment in A is cheap (RHS is local register), then -- inline the assignment and keep it in A in case it is used afterwards. -- -- * Otherwise don't inline. -- -- * If N is assignment to a local register pick up the assignment -- and add it to A. -- -- * If N is not an assignment to a local register: -- * remove any assignments from A that conflict with N, and -- place them before N in the current block. We call this -- "dropping" the assignments. -- -- * An assignment conflicts with N if it: -- - assigns to a register mentioned in N -- - mentions a register assigned by N -- - reads from memory written by N -- * do this recursively, dropping dependent assignments -- -- * At an exit node: -- * drop any assignments that are live on more than one successor -- and are not trivial -- * if any successor has more than one predecessor (a join-point), -- drop everything live in that successor. Since we only propagate -- assignments that are not dead at the successor, we will therefore -- eliminate all assignments dead at this point. Thus analysis of a -- join-point will always begin with an empty list of assignments. -- -- -- As a result of above algorithm, sinking deletes some dead assignments -- (transitively, even). This isn't as good as removeDeadAssignments, -- but it's much cheaper. -- ----------------------------------------------------------------------------- -- things that we aren't optimising very well yet. -- -- ----------- -- (1) From GHC's FastString.hashStr: -- -- s2ay: -- if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp; -- c2gn: -- R1 = _s2au::I64; -- call (I64[Sp])(R1) args: 8, res: 0, upd: 8; -- c2gp: -- _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128, -- 4091); -- _s2an::I64 = _s2an::I64 + 1; -- _s2au::I64 = _s2cO::I64; -- goto s2ay; -- -- a nice loop, but we didn't eliminate the silly assignment at the end. -- See Note [dependent assignments], which would probably fix this. -- This is #8336. -- -- ----------- -- (2) From stg_atomically_frame in PrimOps.cmm -- -- We have a diamond control flow: -- -- x = ... -- | -- / \ -- A B -- \ / -- | -- use of x -- -- Now x won't be sunk down to its use, because we won't push it into -- both branches of the conditional. We certainly do have to check -- that we can sink it past all the code in both A and B, but having -- discovered that, we could sink it to its use. -- -- ----------------------------------------------------------------------------- type Assignment = (LocalReg, CmmExpr, AbsMem) -- Assignment caches AbsMem, an abstraction of the memory read by -- the RHS of the assignment. type Assignments = [Assignment] -- A sequence of assignments; kept in *reverse* order -- So the list [ x=e1, y=e2 ] means the sequence of assignments -- y = e2 -- x = e1 cmmSink :: CmmConfig -> CmmGraph -> UniqSM CmmGraph cmmSink :: CmmConfig -> CmmGraph -> UniqSM CmmGraph cmmSink CmmConfig cfg CmmGraph graph = BlockId -> [CmmBlock] -> CmmGraph ofBlockList (CmmGraph -> BlockId forall (n :: Extensibility -> Extensibility -> *). GenCmmGraph n -> BlockId g_entry CmmGraph graph) ([CmmBlock] -> CmmGraph) -> UniqSM [CmmBlock] -> UniqSM CmmGraph forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> LabelMap Assignments -> [CmmBlock] -> UniqSM [CmmBlock] sink LabelMap Assignments forall v. LabelMap v mapEmpty [CmmBlock] blocks where platform :: Platform platform = CmmConfig -> Platform cmmPlatform CmmConfig cfg liveness :: BlockEntryLivenessL liveness = Platform -> CmmGraph -> BlockEntryLivenessL cmmLocalLivenessL Platform platform CmmGraph graph getLive :: BlockId -> LRegSet getLive BlockId l = LRegSet -> BlockId -> BlockEntryLivenessL -> LRegSet forall a. a -> BlockId -> LabelMap a -> a mapFindWithDefault LRegSet emptyLRegSet BlockId l BlockEntryLivenessL liveness blocks :: [CmmBlock] blocks = CmmGraph -> [CmmBlock] revPostorder CmmGraph graph join_pts :: LabelMap Int join_pts = [CmmBlock] -> LabelMap Int findJoinPoints [CmmBlock] blocks sink :: LabelMap Assignments -> [CmmBlock] -> UniqSM [CmmBlock] sink :: LabelMap Assignments -> [CmmBlock] -> UniqSM [CmmBlock] sink LabelMap Assignments _ [] = [CmmBlock] -> UniqSM [CmmBlock] forall a. a -> UniqSM a forall (f :: * -> *) a. Applicative f => a -> f a pure [] sink LabelMap Assignments sunk (CmmBlock b:[CmmBlock] bs) = do -- Now sink and inline in this block (prepend, last_fold) <- CmmConfig -> Opt (CmmNode O C) -> UniqSM ([CmmNode O O], CmmNode O C) forall a. CmmConfig -> Opt a -> UniqSM ([CmmNode O O], a) runOpt CmmConfig cfg (Opt (CmmNode O C) -> UniqSM ([CmmNode O O], CmmNode O C)) -> Opt (CmmNode O C) -> UniqSM ([CmmNode O O], CmmNode O C) forall a b. (a -> b) -> a -> b $ CmmNode O C -> Opt (CmmNode O C) forall (e :: Extensibility) (x :: Extensibility). CmmNode e x -> Opt (CmmNode e x) constantFoldNode CmmNode O C last (middle', assigs) <- walk cfg (ann_middles ++ annotate platform live_middle prepend) (mapFindWithDefault [] lbl sunk) let (final_last, assigs') = tryToInline platform live last_fold assigs -- Now, drop any assignments that we will not sink any further. (dropped_last, assigs'') = dropAssignments platform drop_if init_live_sets assigs' drop_if :: (LocalReg, CmmExpr, AbsMem) -> [LRegSet] -> (Bool, [LRegSet]) drop_if a :: Assignment a@(LocalReg r,CmmExpr rhs,AbsMem _) [LRegSet] live_sets = (Bool should_drop, [LRegSet] live_sets') where should_drop :: Bool should_drop = Platform -> Assignment -> CmmNode O C -> Bool forall (x :: Extensibility). Platform -> Assignment -> CmmNode O x -> Bool conflicts Platform platform Assignment a CmmNode O C final_last Bool -> Bool -> Bool || Bool -> Bool not (Platform -> CmmExpr -> Bool isTrivial Platform platform CmmExpr rhs) Bool -> Bool -> Bool && [LRegSet] -> LocalReg -> Bool live_in_multi [LRegSet] live_sets LocalReg r Bool -> Bool -> Bool || LocalReg r LocalReg -> LRegSet -> Bool `elemLRegSet` LRegSet live_in_joins live_sets' :: [LRegSet] live_sets' | Bool should_drop = [LRegSet] live_sets | Bool otherwise = (LRegSet -> LRegSet) -> [LRegSet] -> [LRegSet] forall a b. (a -> b) -> [a] -> [b] map LRegSet -> LRegSet upd [LRegSet] live_sets upd :: LRegSet -> LRegSet upd LRegSet set | LocalReg r LocalReg -> LRegSet -> Bool `elemLRegSet` LRegSet set = LRegSet set LRegSet -> LRegSet -> LRegSet `unionLRegSet` LRegSet live_rhs | Bool otherwise = LRegSet set live_rhs :: LRegSet live_rhs = Platform -> (LRegSet -> LocalReg -> LRegSet) -> LRegSet -> CmmExpr -> LRegSet forall b. Platform -> (b -> LocalReg -> b) -> b -> CmmExpr -> b forall r a b. UserOfRegs r a => Platform -> (b -> r -> b) -> b -> a -> b foldRegsUsed Platform platform ((LocalReg -> LRegSet -> LRegSet) -> LRegSet -> LocalReg -> LRegSet forall a b c. (a -> b -> c) -> b -> a -> c flip LocalReg -> LRegSet -> LRegSet insertLRegSet) LRegSet emptyLRegSet CmmExpr rhs final_middle = (Block CmmNode O O -> CmmNode O O -> Block CmmNode O O) -> Block CmmNode O O -> [CmmNode O O] -> Block CmmNode O O forall b a. (b -> a -> b) -> b -> [a] -> b forall (t :: * -> *) b a. Foldable t => (b -> a -> b) -> b -> t a -> b foldl' Block CmmNode O O -> CmmNode O O -> Block CmmNode O O forall (n :: Extensibility -> Extensibility -> *) (e :: Extensibility). Block n e O -> n O O -> Block n e O blockSnoc Block CmmNode O O middle' [CmmNode O O] dropped_last sunk' = LabelMap Assignments -> LabelMap Assignments -> LabelMap Assignments forall v. LabelMap v -> LabelMap v -> LabelMap v mapUnion LabelMap Assignments sunk (LabelMap Assignments -> LabelMap Assignments) -> LabelMap Assignments -> LabelMap Assignments forall a b. (a -> b) -> a -> b $ [(BlockId, Assignments)] -> LabelMap Assignments forall v. [(BlockId, v)] -> LabelMap v mapFromList [ (BlockId l, Platform -> LRegSet -> Assignments -> Assignments filterAssignments Platform platform (BlockId -> LRegSet getLive BlockId l) Assignments assigs'') | BlockId l <- [BlockId] succs ] (blockJoin first final_middle final_last :) <$> sink sunk' bs where lbl :: BlockId lbl = CmmBlock -> BlockId forall (x :: Extensibility). Block CmmNode C x -> BlockId forall (thing :: Extensibility -> Extensibility -> *) (x :: Extensibility). NonLocal thing => thing C x -> BlockId entryLabel CmmBlock b (CmmNode C O first, Block CmmNode O O middle, CmmNode O C last) = CmmBlock -> (CmmNode C O, Block CmmNode O O, CmmNode O C) forall (n :: Extensibility -> Extensibility -> *). Block n C C -> (n C O, Block n O O, n O C) blockSplit CmmBlock b succs :: [BlockId] succs = CmmNode O C -> [BlockId] forall (e :: Extensibility). CmmNode e C -> [BlockId] forall (thing :: Extensibility -> Extensibility -> *) (e :: Extensibility). NonLocal thing => thing e C -> [BlockId] successors CmmNode O C last -- Annotate the middle nodes with the registers live *after* -- the node. This will help us decide whether we can inline -- an assignment in the current node or not. live :: LRegSet live = [LRegSet] -> LRegSet unionsLRegSet ((BlockId -> LRegSet) -> [BlockId] -> [LRegSet] forall a b. (a -> b) -> [a] -> [b] map BlockId -> LRegSet getLive [BlockId] succs) live_middle :: LRegSet live_middle = Platform -> CmmNode O C -> LRegSet -> LRegSet forall n. (DefinerOfRegs LocalReg n, UserOfRegs LocalReg n) => Platform -> n -> LRegSet -> LRegSet gen_killL Platform platform CmmNode O C last LRegSet live ann_middles :: [(LRegSet, CmmNode O O)] ann_middles = Platform -> LRegSet -> [CmmNode O O] -> [(LRegSet, CmmNode O O)] annotate Platform platform LRegSet live_middle (Block CmmNode O O -> [CmmNode O O] forall (n :: Extensibility -> Extensibility -> *). Block n O O -> [n O O] blockToList Block CmmNode O O middle) -- We cannot sink into join points (successors with more than -- one predecessor), so identify the join points and the set -- of registers live in them. ([BlockId] joins, [BlockId] nonjoins) = (BlockId -> Bool) -> [BlockId] -> ([BlockId], [BlockId]) forall a. (a -> Bool) -> [a] -> ([a], [a]) partition (BlockId -> LabelMap Int -> Bool forall a. BlockId -> LabelMap a -> Bool `mapMember` LabelMap Int join_pts) [BlockId] succs live_in_joins :: LRegSet live_in_joins = [LRegSet] -> LRegSet unionsLRegSet ((BlockId -> LRegSet) -> [BlockId] -> [LRegSet] forall a b. (a -> b) -> [a] -> [b] map BlockId -> LRegSet getLive [BlockId] joins) -- We do not want to sink an assignment into multiple branches, -- so identify the set of registers live in multiple successors. -- This is made more complicated because when we sink an assignment -- into one branch, this might change the set of registers that are -- now live in multiple branches. init_live_sets :: [LRegSet] init_live_sets = (BlockId -> LRegSet) -> [BlockId] -> [LRegSet] forall a b. (a -> b) -> [a] -> [b] map BlockId -> LRegSet getLive [BlockId] nonjoins live_in_multi :: [LRegSet] -> LocalReg -> Bool live_in_multi [LRegSet] live_sets LocalReg r = case (LRegSet -> Bool) -> [LRegSet] -> [LRegSet] forall a. (a -> Bool) -> [a] -> [a] filter (LocalReg -> LRegSet -> Bool elemLRegSet LocalReg r) [LRegSet] live_sets of (LRegSet _one:LRegSet _two:[LRegSet] _) -> Bool True [LRegSet] _ -> Bool False {- TODO: enable this later, when we have some good tests in place to measure the effect and tune it. -- small: an expression we don't mind duplicating isSmall :: CmmExpr -> Bool isSmall (CmmReg (CmmLocal _)) = True -- isSmall (CmmLit _) = True isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y isSmall (CmmRegOff (CmmLocal _) _) = True isSmall _ = False -} -- -- We allow duplication of trivial expressions: registers (both local and -- global) and literals. -- isTrivial :: Platform -> CmmExpr -> Bool isTrivial :: Platform -> CmmExpr -> Bool isTrivial Platform _ (CmmReg (CmmLocal LocalReg _)) = Bool True isTrivial Platform platform (CmmReg (CmmGlobal (GlobalRegUse GlobalReg r CmmType _))) = -- see Note [Inline GlobalRegs?] if Arch -> Bool isARM (Platform -> Arch platformArch Platform platform) then Bool True -- CodeGen.Platform.ARM does not have globalRegMaybe else Maybe RealReg -> Bool forall a. Maybe a -> Bool isJust (Platform -> GlobalReg -> Maybe RealReg globalRegMaybe Platform platform GlobalReg r) -- GlobalRegs that are loads from BaseReg are not trivial isTrivial Platform _ (CmmLit CmmLit _) = Bool True isTrivial Platform _ CmmExpr _ = Bool False -- -- annotate each node with the set of registers live *after* the node -- annotate :: Platform -> LRegSet -> [CmmNode O O] -> [(LRegSet, CmmNode O O)] annotate :: Platform -> LRegSet -> [CmmNode O O] -> [(LRegSet, CmmNode O O)] annotate Platform platform LRegSet live [CmmNode O O] nodes = (LRegSet, [(LRegSet, CmmNode O O)]) -> [(LRegSet, CmmNode O O)] forall a b. (a, b) -> b snd ((LRegSet, [(LRegSet, CmmNode O O)]) -> [(LRegSet, CmmNode O O)]) -> (LRegSet, [(LRegSet, CmmNode O O)]) -> [(LRegSet, CmmNode O O)] forall a b. (a -> b) -> a -> b $ (CmmNode O O -> (LRegSet, [(LRegSet, CmmNode O O)]) -> (LRegSet, [(LRegSet, CmmNode O O)])) -> (LRegSet, [(LRegSet, CmmNode O O)]) -> [CmmNode O O] -> (LRegSet, [(LRegSet, CmmNode O O)]) forall a b. (a -> b -> b) -> b -> [a] -> b forall (t :: * -> *) a b. Foldable t => (a -> b -> b) -> b -> t a -> b foldr CmmNode O O -> (LRegSet, [(LRegSet, CmmNode O O)]) -> (LRegSet, [(LRegSet, CmmNode O O)]) ann (LRegSet live,[]) [CmmNode O O] nodes where ann :: CmmNode O O -> (LRegSet, [(LRegSet, CmmNode O O)]) -> (LRegSet, [(LRegSet, CmmNode O O)]) ann CmmNode O O n (LRegSet live,[(LRegSet, CmmNode O O)] nodes) = (Platform -> CmmNode O O -> LRegSet -> LRegSet forall n. (DefinerOfRegs LocalReg n, UserOfRegs LocalReg n) => Platform -> n -> LRegSet -> LRegSet gen_killL Platform platform CmmNode O O n LRegSet live, (LRegSet live,CmmNode O O n) (LRegSet, CmmNode O O) -> [(LRegSet, CmmNode O O)] -> [(LRegSet, CmmNode O O)] forall a. a -> [a] -> [a] : [(LRegSet, CmmNode O O)] nodes) -- -- Find the blocks that have multiple successors (join points) -- findJoinPoints :: [CmmBlock] -> LabelMap Int findJoinPoints :: [CmmBlock] -> LabelMap Int findJoinPoints [CmmBlock] blocks = (Int -> Bool) -> LabelMap Int -> LabelMap Int forall v. (v -> Bool) -> LabelMap v -> LabelMap v mapFilter (Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >Int 1) LabelMap Int succ_counts where all_succs :: [BlockId] all_succs = (CmmBlock -> [BlockId]) -> [CmmBlock] -> [BlockId] forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b] concatMap CmmBlock -> [BlockId] forall (e :: Extensibility). Block CmmNode e C -> [BlockId] forall (thing :: Extensibility -> Extensibility -> *) (e :: Extensibility). NonLocal thing => thing e C -> [BlockId] successors [CmmBlock] blocks succ_counts :: LabelMap Int succ_counts :: LabelMap Int succ_counts = (LabelMap Int -> BlockId -> LabelMap Int) -> LabelMap Int -> [BlockId] -> LabelMap Int forall b a. (b -> a -> b) -> b -> [a] -> b forall (t :: * -> *) b a. Foldable t => (b -> a -> b) -> b -> t a -> b foldl' (\LabelMap Int acc BlockId l -> (Int -> Int -> Int) -> BlockId -> Int -> LabelMap Int -> LabelMap Int forall v. (v -> v -> v) -> BlockId -> v -> LabelMap v -> LabelMap v mapInsertWith Int -> Int -> Int forall a. Num a => a -> a -> a (+) BlockId l Int 1 LabelMap Int acc) LabelMap Int forall v. LabelMap v mapEmpty [BlockId] all_succs -- -- filter the list of assignments to remove any assignments that -- are not live in a continuation. -- filterAssignments :: Platform -> LRegSet -> Assignments -> Assignments filterAssignments :: Platform -> LRegSet -> Assignments -> Assignments filterAssignments Platform platform LRegSet live Assignments assigs = Assignments -> Assignments forall a. [a] -> [a] reverse (Assignments -> Assignments -> Assignments go Assignments assigs []) where go :: Assignments -> Assignments -> Assignments go [] Assignments kept = Assignments kept go (a :: Assignment a@(LocalReg r,CmmExpr _,AbsMem _):Assignments as) Assignments kept | Bool needed = Assignments -> Assignments -> Assignments go Assignments as (Assignment aAssignment -> Assignments -> Assignments forall a. a -> [a] -> [a] :Assignments kept) | Bool otherwise = Assignments -> Assignments -> Assignments go Assignments as Assignments kept where needed :: Bool needed = LocalReg r LocalReg -> LRegSet -> Bool `elemLRegSet` LRegSet live Bool -> Bool -> Bool || (CmmNode O O -> Bool) -> [CmmNode O O] -> Bool forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool any (Platform -> Assignment -> CmmNode O O -> Bool forall (x :: Extensibility). Platform -> Assignment -> CmmNode O x -> Bool conflicts Platform platform Assignment a) ((Assignment -> CmmNode O O) -> Assignments -> [CmmNode O O] forall a b. (a -> b) -> [a] -> [b] map Assignment -> CmmNode O O toNode Assignments kept) -- Note that we must keep assignments that are -- referred to by other assignments we have -- already kept. -- ----------------------------------------------------------------------------- -- Walk through the nodes of a block, sinking and inlining assignments -- as we go. -- -- On input we pass in a: -- * list of nodes in the block -- * a list of assignments that appeared *before* this block and -- that are being sunk. -- -- On output we get: -- * a new block -- * a list of assignments that will be placed *after* that block. -- walk :: CmmConfig -> [(LRegSet, CmmNode O O)] -- nodes of the block, annotated with -- the set of registers live *after* -- this node. -> Assignments -- The current list of -- assignments we are sinking. -- Earlier assignments may refer -- to later ones. -> UniqSM ( Block CmmNode O O -- The new block , Assignments -- Assignments to sink further ) walk :: CmmConfig -> [(LRegSet, CmmNode O O)] -> Assignments -> UniqSM (Block CmmNode O O, Assignments) walk CmmConfig cfg [(LRegSet, CmmNode O O)] nodes Assignments assigs = [(LRegSet, CmmNode O O)] -> Block CmmNode O O -> Assignments -> UniqSM (Block CmmNode O O, Assignments) go [(LRegSet, CmmNode O O)] nodes Block CmmNode O O forall (n :: Extensibility -> Extensibility -> *). Block n O O emptyBlock Assignments assigs where platform :: Platform platform = CmmConfig -> Platform cmmPlatform CmmConfig cfg go :: [(LRegSet, CmmNode O O)] -> Block CmmNode O O -> Assignments -> UniqSM (Block CmmNode O O, Assignments) go [] Block CmmNode O O block Assignments as = (Block CmmNode O O, Assignments) -> UniqSM (Block CmmNode O O, Assignments) forall a. a -> UniqSM a forall (f :: * -> *) a. Applicative f => a -> f a pure (Block CmmNode O O block, Assignments as) go ((LRegSet live,CmmNode O O node):[(LRegSet, CmmNode O O)] ns) Block CmmNode O O block Assignments as -- discard nodes representing dead assignment | CmmNode O O -> LRegSet -> Bool forall (e :: Extensibility) (x :: Extensibility). CmmNode e x -> LRegSet -> Bool shouldDiscard CmmNode O O node LRegSet live = [(LRegSet, CmmNode O O)] -> Block CmmNode O O -> Assignments -> UniqSM (Block CmmNode O O, Assignments) go [(LRegSet, CmmNode O O)] ns Block CmmNode O O block Assignments as | Bool otherwise = do (prepend, node1) <- CmmConfig -> Opt (CmmNode O O) -> UniqSM ([CmmNode O O], CmmNode O O) forall a. CmmConfig -> Opt a -> UniqSM ([CmmNode O O], a) runOpt CmmConfig cfg (Opt (CmmNode O O) -> UniqSM ([CmmNode O O], CmmNode O O)) -> Opt (CmmNode O O) -> UniqSM ([CmmNode O O], CmmNode O O) forall a b. (a -> b) -> a -> b $ CmmNode O O -> Opt (CmmNode O O) forall (e :: Extensibility) (x :: Extensibility). CmmNode e x -> Opt (CmmNode e x) constantFoldNode CmmNode O O node if not (null prepend) then go (annotate platform live (prepend ++ [node1]) ++ ns) block as else do let -- Inline assignments (node2, as1) = tryToInline platform live node1 as -- Drop any earlier assignments conflicting with node2 (dropped, as') = dropAssignmentsSimple platform (\Assignment a -> Platform -> Assignment -> CmmNode O O -> Bool forall (x :: Extensibility). Platform -> Assignment -> CmmNode O x -> Bool conflicts Platform platform Assignment a CmmNode O O node2) as1 -- Walk over the rest of the block. Includes dropped assignments block' = (Block CmmNode O O -> CmmNode O O -> Block CmmNode O O) -> Block CmmNode O O -> [CmmNode O O] -> Block CmmNode O O forall b a. (b -> a -> b) -> b -> [a] -> b forall (t :: * -> *) b a. Foldable t => (b -> a -> b) -> b -> t a -> b foldl' Block CmmNode O O -> CmmNode O O -> Block CmmNode O O forall (n :: Extensibility -> Extensibility -> *) (e :: Extensibility). Block n e O -> n O O -> Block n e O blockSnoc Block CmmNode O O block [CmmNode O O] dropped Block CmmNode O O -> CmmNode O O -> Block CmmNode O O forall (n :: Extensibility -> Extensibility -> *) (e :: Extensibility). Block n e O -> n O O -> Block n e O `blockSnoc` CmmNode O O node2 (prepend2, node3) <- runOpt cfg $ constantFoldNode node2 if | not (null prepend2) -> go (annotate platform live (prepend2 ++ [node3]) ++ ns) block as -- sometimes only after simplification we can tell we can discard the node. -- See Note [Discard simplified nodes] | noOpAssignment node3 -> go ns block as -- Pick up interesting assignments | Just a <- shouldSink platform node3 -> go ns block (a : as1) -- Try inlining, drop assignments and move on | otherwise -> go ns block' as' {- Note [Discard simplified nodes] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Consider a sequence like this: _c1::P64 = R1; _c3::I64 = I64[_c1::P64 + 1]; R1 = _c1::P64; P64[Sp - 72] = _c1::P64; I64[Sp - 64] = _c3::I64; If we discard assignments *before* simplifying nodes when we get to `R1 = _c1`. This is then simplified into `R1 = `R1` and as a consequence prevents sinking of loads from R1. What happens is that we: * Check if we can discard the node `R1 = _c1 (no) * Simplify the node to R1 = R1 * We check all remaining assignments for conflicts. * The assignment `_c3 = [R1 + 1]`; (R1 already inlined on pickup) conflicts with R1 = R1, because it reads `R1` and the node writes to R1 * This is clearly nonsensical because `R1 = R1` doesn't affect R1's value. The solutions is to check if we can discard nodes before and *after* simplifying them. We could only do it after as well, but I assume doing it early might save some work. That is if we process a assignment node we now: * Check if it can be discarded (because it's dead or a no-op) * Simplify the rhs of the assignment. * New: Check again if it might be a no-op now. * ... This can help with problems like the one reported in #20334. For a full example see the test cmm_sink_sp. -} -- -- Heuristic to decide whether to pick up and sink an assignment -- Currently we pick up all assignments to local registers. It might -- be profitable to sink assignments to global regs too, but the -- liveness analysis doesn't track those (yet) so we can't. -- shouldSink :: Platform -> CmmNode e x -> Maybe Assignment shouldSink :: forall (e :: Extensibility) (x :: Extensibility). Platform -> CmmNode e x -> Maybe Assignment shouldSink Platform platform (CmmAssign (CmmLocal LocalReg r) CmmExpr e) | Bool no_local_regs = Assignment -> Maybe Assignment forall a. a -> Maybe a Just (LocalReg r, CmmExpr e, Platform -> CmmExpr -> AbsMem exprMem Platform platform CmmExpr e) where no_local_regs :: Bool no_local_regs = Bool True -- foldRegsUsed (\_ _ -> False) True e shouldSink Platform _ CmmNode e x _other = Maybe Assignment forall a. Maybe a Nothing -- -- discard dead assignments. This doesn't do as good a job as -- removeDeadAssignments, because it would need multiple passes -- to get all the dead code, but it catches the common case of -- superfluous reloads from the stack that the stack allocator -- leaves behind. -- -- Also we catch "r = r" here. You might think it would fall -- out of inlining, but the inliner will see that r is live -- after the instruction and choose not to inline r in the rhs. -- shouldDiscard :: CmmNode e x -> LRegSet -> Bool shouldDiscard :: forall (e :: Extensibility) (x :: Extensibility). CmmNode e x -> LRegSet -> Bool shouldDiscard CmmNode e x node LRegSet live = case CmmNode e x node of -- r = r CmmAssign CmmReg r (CmmReg CmmReg r') | CmmReg r CmmReg -> CmmReg -> Bool forall a. Eq a => a -> a -> Bool == CmmReg r' -> Bool True -- r = e, r is dead after assignment CmmAssign (CmmLocal LocalReg r) CmmExpr _ -> Bool -> Bool not (LocalReg r LocalReg -> LRegSet -> Bool `elemLRegSet` LRegSet live) CmmNode e x _otherwise -> Bool False noOpAssignment :: CmmNode e x -> Bool noOpAssignment :: forall (e :: Extensibility) (x :: Extensibility). CmmNode e x -> Bool noOpAssignment CmmNode e x node = case CmmNode e x node of -- r = r CmmAssign CmmReg r (CmmReg CmmReg r') | CmmReg r CmmReg -> CmmReg -> Bool forall a. Eq a => a -> a -> Bool == CmmReg r' -> Bool True CmmNode e x _otherwise -> Bool False toNode :: Assignment -> CmmNode O O toNode :: Assignment -> CmmNode O O toNode (LocalReg r,CmmExpr rhs,AbsMem _) = CmmReg -> CmmExpr -> CmmNode O O CmmAssign (LocalReg -> CmmReg CmmLocal LocalReg r) CmmExpr rhs dropAssignmentsSimple :: Platform -> (Assignment -> Bool) -> Assignments -> ([CmmNode O O], Assignments) dropAssignmentsSimple :: Platform -> (Assignment -> Bool) -> Assignments -> ([CmmNode O O], Assignments) dropAssignmentsSimple Platform platform Assignment -> Bool f = Platform -> (Assignment -> () -> (Bool, ())) -> () -> Assignments -> ([CmmNode O O], Assignments) forall s. Platform -> (Assignment -> s -> (Bool, s)) -> s -> Assignments -> ([CmmNode O O], Assignments) dropAssignments Platform platform (\Assignment a () _ -> (Assignment -> Bool f Assignment a, ())) () dropAssignments :: Platform -> (Assignment -> s -> (Bool, s)) -> s -> Assignments -> ([CmmNode O O], Assignments) dropAssignments :: forall s. Platform -> (Assignment -> s -> (Bool, s)) -> s -> Assignments -> ([CmmNode O O], Assignments) dropAssignments Platform platform Assignment -> s -> (Bool, s) should_drop s state Assignments assigs = ([CmmNode O O] dropped, Assignments -> Assignments forall a. [a] -> [a] reverse Assignments kept) where ([CmmNode O O] dropped,Assignments kept) = s -> Assignments -> [CmmNode O O] -> Assignments -> ([CmmNode O O], Assignments) go s state Assignments assigs [] [] go :: s -> Assignments -> [CmmNode O O] -> Assignments -> ([CmmNode O O], Assignments) go s _ [] [CmmNode O O] dropped Assignments kept = ([CmmNode O O] dropped, Assignments kept) go s state (Assignment assig : Assignments rest) [CmmNode O O] dropped Assignments kept | Bool conflict = let !node :: CmmNode O O node = Assignment -> CmmNode O O toNode Assignment assig in s -> Assignments -> [CmmNode O O] -> Assignments -> ([CmmNode O O], Assignments) go s state' Assignments rest (CmmNode O O node CmmNode O O -> [CmmNode O O] -> [CmmNode O O] forall a. a -> [a] -> [a] : [CmmNode O O] dropped) Assignments kept | Bool otherwise = s -> Assignments -> [CmmNode O O] -> Assignments -> ([CmmNode O O], Assignments) go s state' Assignments rest [CmmNode O O] dropped (Assignment assigAssignment -> Assignments -> Assignments forall a. a -> [a] -> [a] :Assignments kept) where (Bool dropit, s state') = Assignment -> s -> (Bool, s) should_drop Assignment assig s state conflict :: Bool conflict = Bool dropit Bool -> Bool -> Bool || (CmmNode O O -> Bool) -> [CmmNode O O] -> Bool forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool any (Platform -> Assignment -> CmmNode O O -> Bool forall (x :: Extensibility). Platform -> Assignment -> CmmNode O x -> Bool conflicts Platform platform Assignment assig) [CmmNode O O] dropped -- ----------------------------------------------------------------------------- -- Try to inline assignments into a node. -- This also does constant folding for primops, since -- inlining opens up opportunities for doing so. tryToInline :: forall x. Platform -> LRegSet -- set of registers live after this -- node. We cannot inline anything -- that is live after the node, unless -- it is small enough to duplicate. -> CmmNode O x -- The node to inline into -> Assignments -- Assignments to inline -> ( CmmNode O x -- New node , Assignments -- Remaining assignments ) tryToInline :: forall (x :: Extensibility). Platform -> LRegSet -> CmmNode O x -> Assignments -> (CmmNode O x, Assignments) tryToInline Platform platform LRegSet liveAfter CmmNode O x node Assignments assigs = -- pprTrace "tryToInline assig length:" (ppr $ length assigs) $ UniqFM LocalReg Int -> LRegSet -> CmmNode O x -> LRegSet -> Assignments -> (CmmNode O x, Assignments) go UniqFM LocalReg Int usages LRegSet liveAfter CmmNode O x node LRegSet emptyLRegSet Assignments assigs where usages :: UniqFM LocalReg Int -- Maps each LocalReg to a count of how often it is used usages :: UniqFM LocalReg Int usages = Platform -> (UniqFM LocalReg Int -> LocalReg -> UniqFM LocalReg Int) -> UniqFM LocalReg Int -> CmmNode O x -> UniqFM LocalReg Int forall a b. UserOfRegs LocalReg a => Platform -> (b -> LocalReg -> b) -> b -> a -> b foldLocalRegsUsed Platform platform UniqFM LocalReg Int -> LocalReg -> UniqFM LocalReg Int addUsage UniqFM LocalReg Int forall {k} (key :: k) elt. UniqFM key elt emptyUFM CmmNode O x node go :: UniqFM LocalReg Int -> LRegSet -> CmmNode O x -> LRegSet -> Assignments -> (CmmNode O x, Assignments) go :: UniqFM LocalReg Int -> LRegSet -> CmmNode O x -> LRegSet -> Assignments -> (CmmNode O x, Assignments) go UniqFM LocalReg Int _usages LRegSet _live CmmNode O x node LRegSet _skipped [] = (CmmNode O x node, []) go UniqFM LocalReg Int usages LRegSet live CmmNode O x node LRegSet skipped (a :: Assignment a@(LocalReg l,CmmExpr rhs,AbsMem _) : Assignments rest) | Bool cannot_inline = (CmmNode O x, Assignments) dont_inline | Bool occurs_none = (CmmNode O x, Assignments) discard -- See Note [discard during inlining] | Bool occurs_once = (CmmNode O x, Assignments) inline_and_discard | Platform -> CmmExpr -> Bool isTrivial Platform platform CmmExpr rhs = (CmmNode O x, Assignments) inline_and_keep | Bool otherwise = (CmmNode O x, Assignments) dont_inline where inline_and_discard :: (CmmNode O x, Assignments) inline_and_discard = UniqFM LocalReg Int -> LRegSet -> CmmNode O x -> LRegSet -> Assignments -> (CmmNode O x, Assignments) go UniqFM LocalReg Int usages' LRegSet live CmmNode O x inl_node LRegSet skipped Assignments rest where usages' :: UniqFM LocalReg Int usages' = Platform -> (UniqFM LocalReg Int -> LocalReg -> UniqFM LocalReg Int) -> UniqFM LocalReg Int -> CmmExpr -> UniqFM LocalReg Int forall a b. UserOfRegs LocalReg a => Platform -> (b -> LocalReg -> b) -> b -> a -> b foldLocalRegsUsed Platform platform UniqFM LocalReg Int -> LocalReg -> UniqFM LocalReg Int addUsage UniqFM LocalReg Int usages CmmExpr rhs discard :: (CmmNode O x, Assignments) discard = UniqFM LocalReg Int -> LRegSet -> CmmNode O x -> LRegSet -> Assignments -> (CmmNode O x, Assignments) go UniqFM LocalReg Int usages LRegSet live CmmNode O x node LRegSet skipped Assignments rest dont_inline :: (CmmNode O x, Assignments) dont_inline = CmmNode O x -> (CmmNode O x, Assignments) keep CmmNode O x node -- don't inline the assignment, keep it inline_and_keep :: (CmmNode O x, Assignments) inline_and_keep = CmmNode O x -> (CmmNode O x, Assignments) keep CmmNode O x inl_node -- inline the assignment, keep it keep :: CmmNode O x -> (CmmNode O x, Assignments) keep :: CmmNode O x -> (CmmNode O x, Assignments) keep CmmNode O x node' = (CmmNode O x final_node, Assignment a Assignment -> Assignments -> Assignments forall a. a -> [a] -> [a] : Assignments rest') where (CmmNode O x final_node, Assignments rest') = UniqFM LocalReg Int -> LRegSet -> CmmNode O x -> LRegSet -> Assignments -> (CmmNode O x, Assignments) go UniqFM LocalReg Int usages LRegSet live' CmmNode O x node' (LocalReg -> LRegSet -> LRegSet insertLRegSet LocalReg l LRegSet skipped) Assignments rest -- Avoid discarding of assignments to vars on the rhs. -- See Note [Keeping assignments mentioned in skipped RHSs] -- usages' = foldLocalRegsUsed platform (\m r -> addToUFM m r 2) -- usages rhs live' :: LRegSet live' = (Platform -> (LRegSet -> LocalReg -> LRegSet) -> LRegSet -> CmmExpr -> LRegSet) -> Platform -> (LRegSet -> LocalReg -> LRegSet) -> LRegSet -> CmmExpr -> LRegSet forall a. a -> a inline Platform -> (LRegSet -> LocalReg -> LRegSet) -> LRegSet -> CmmExpr -> LRegSet forall a b. UserOfRegs LocalReg a => Platform -> (b -> LocalReg -> b) -> b -> a -> b foldLocalRegsUsed Platform platform (\LRegSet m LocalReg r -> LocalReg -> LRegSet -> LRegSet insertLRegSet LocalReg r LRegSet m) LRegSet live CmmExpr rhs cannot_inline :: Bool cannot_inline = LRegSet skipped LRegSet -> CmmExpr -> Bool `regsUsedIn` CmmExpr rhs -- See Note [dependent assignments] Bool -> Bool -> Bool || LocalReg l LocalReg -> LRegSet -> Bool `elemLRegSet` LRegSet skipped Bool -> Bool -> Bool || Bool -> Bool not (Platform -> CmmExpr -> CmmNode O x -> Bool forall (e :: Extensibility) (x :: Extensibility). Platform -> CmmExpr -> CmmNode e x -> Bool okToInline Platform platform CmmExpr rhs CmmNode O x node) -- How often is l used in the current node. l_usages :: Maybe Int l_usages = UniqFM LocalReg Int -> LocalReg -> Maybe Int forall key elt. Uniquable key => UniqFM key elt -> key -> Maybe elt lookupUFM UniqFM LocalReg Int usages LocalReg l l_live :: Bool l_live = LocalReg l LocalReg -> LRegSet -> Bool `elemLRegSet` LRegSet live occurs_once :: Bool occurs_once = Bool -> Bool not Bool l_live Bool -> Bool -> Bool && Maybe Int l_usages Maybe Int -> Maybe Int -> Bool forall a. Eq a => a -> a -> Bool == Int -> Maybe Int forall a. a -> Maybe a Just Int 1 occurs_none :: Bool occurs_none = Bool -> Bool not Bool l_live Bool -> Bool -> Bool && Maybe Int l_usages Maybe Int -> Maybe Int -> Bool forall a. Eq a => a -> a -> Bool == Maybe Int forall a. Maybe a Nothing inl_node :: CmmNode O x inl_node = CmmNode O x -> CmmNode O x forall (x :: Extensibility). CmmNode O x -> CmmNode O x improveConditional ((CmmExpr -> CmmExpr) -> CmmNode O x -> CmmNode O x forall (e :: Extensibility) (x :: Extensibility). (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x mapExpDeep CmmExpr -> CmmExpr inl_exp CmmNode O x node) inl_exp :: CmmExpr -> CmmExpr -- inl_exp is where the inlining actually takes place! inl_exp :: CmmExpr -> CmmExpr inl_exp (CmmReg (CmmLocal LocalReg l')) | LocalReg l LocalReg -> LocalReg -> Bool forall a. Eq a => a -> a -> Bool == LocalReg l' = CmmExpr rhs inl_exp (CmmRegOff (CmmLocal LocalReg l') Int off) | LocalReg l LocalReg -> LocalReg -> Bool forall a. Eq a => a -> a -> Bool == LocalReg l' = Platform -> CmmExpr -> Int -> CmmExpr cmmOffset Platform platform CmmExpr rhs Int off -- re-constant fold after inlining inl_exp (CmmMachOp MachOp op [CmmExpr] args) = Platform -> MachOp -> [CmmExpr] -> CmmExpr cmmMachOpFold Platform platform MachOp op [CmmExpr] args inl_exp CmmExpr other = CmmExpr other {- Note [Keeping assignments mentioned in skipped RHSs] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If we have two assignments: [z = y, y = e1] and we skip z we *must* retain the assignment y = e1. This is because we might inline "z = y" into another node later on so we must ensure y is still defined at this point. If we dropped the assignment of "y = e1" then we would end up referencing a variable which hasn't been mentioned after inlining. We use a hack to do this. We pretend the regs from the rhs are live after the current node. Since we only discard assignments to variables which are dead after the current block this prevents discarding of the assignment. It still allows inlining should e1 be a trivial rhs however. -} {- Note [improveConditional] ~~~~~~~~~~~~~~~~~~~~~~~~~ cmmMachOpFold tries to simplify conditionals to turn things like (a == b) != 1 into (a != b) but there's one case it can't handle: when the comparison is over floating-point values, we can't invert it, because floating-point comparisons aren't invertible (because of NaNs). But we *can* optimise this conditional by swapping the true and false branches. Given CmmCondBranch ((a >## b) != 1) t f we can turn it into CmmCondBranch (a >## b) f t So here we catch conditionals that weren't optimised by cmmMachOpFold, and apply above transformation to eliminate the comparison against 1. It's tempting to just turn every != into == and then let cmmMachOpFold do its thing, but that risks changing a nice fall-through conditional into one that requires two jumps. (see swapcond_last in GHC.Cmm.ContFlowOpt), so instead we carefully look for just the cases where we can eliminate a comparison. -} improveConditional :: CmmNode O x -> CmmNode O x improveConditional :: forall (x :: Extensibility). CmmNode O x -> CmmNode O x improveConditional (CmmCondBranch (CmmMachOp MachOp mop [CmmExpr x, CmmLit (CmmInt Integer 1 Width _)]) BlockId t BlockId f Maybe Bool l) | MachOp -> Bool neLike MachOp mop, CmmExpr -> Bool isComparisonExpr CmmExpr x = CmmExpr -> BlockId -> BlockId -> Maybe Bool -> CmmNode O C CmmCondBranch CmmExpr x BlockId f BlockId t ((Bool -> Bool) -> Maybe Bool -> Maybe Bool forall a b. (a -> b) -> Maybe a -> Maybe b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap Bool -> Bool not Maybe Bool l) where neLike :: MachOp -> Bool neLike (MO_Ne Width _) = Bool True neLike (MO_U_Lt Width _) = Bool True -- (x<y) < 1 behaves like (x<y) != 1 neLike (MO_S_Lt Width _) = Bool True -- (x<y) < 1 behaves like (x<y) != 1 neLike MachOp _ = Bool False improveConditional CmmNode O x other = CmmNode O x other -- Note [dependent assignments] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- If our assignment list looks like -- -- [ y = e, x = ... y ... ] -- -- We cannot inline x. Remember this list is really in reverse order, -- so it means x = ... y ...; y = e -- -- Hence if we inline x, the outer assignment to y will capture the -- reference in x's right hand side. -- -- In this case we should rename the y in x's right-hand side, -- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ] -- Now we can go ahead and inline x. -- -- For now we do nothing, because this would require putting -- everything inside UniqSM. -- -- One more variant of this (#7366): -- -- [ y = e, y = z ] -- -- If we don't want to inline y = e, because y is used many times, we -- might still be tempted to inline y = z (because we always inline -- trivial rhs's). But of course we can't, because y is equal to e, -- not z. -- Note [discard during inlining] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- Opportunities to discard assignments sometimes appear after we've -- done some inlining. Here's an example: -- -- x = R1; -- y = P64[x + 7]; -- z = P64[x + 15]; -- /* z is dead */ -- R1 = y & (-8); -- -- The x assignment is trivial, so we inline it in the RHS of y, and -- keep both x and y. z gets dropped because it is dead, then we -- inline y, and we have a dead assignment to x. If we don't notice -- that x is dead in tryToInline, we end up retaining it. addUsage :: UniqFM LocalReg Int -> LocalReg -> UniqFM LocalReg Int addUsage :: UniqFM LocalReg Int -> LocalReg -> UniqFM LocalReg Int addUsage UniqFM LocalReg Int m LocalReg r = (Int -> Int -> Int) -> UniqFM LocalReg Int -> LocalReg -> Int -> UniqFM LocalReg Int forall key elt. Uniquable key => (elt -> elt -> elt) -> UniqFM key elt -> key -> elt -> UniqFM key elt addToUFM_C Int -> Int -> Int forall a. Num a => a -> a -> a (+) UniqFM LocalReg Int m LocalReg r Int 1 regsUsedIn :: LRegSet -> CmmExpr -> Bool regsUsedIn :: LRegSet -> CmmExpr -> Bool regsUsedIn LRegSet ls CmmExpr _ | LRegSet -> Bool nullLRegSet LRegSet ls = Bool False regsUsedIn LRegSet ls CmmExpr e = LRegSet -> CmmExpr -> Bool -> Bool go LRegSet ls CmmExpr e Bool False where use :: LRegSet -> CmmExpr -> Bool -> Bool use :: LRegSet -> CmmExpr -> Bool -> Bool use LRegSet ls (CmmReg (CmmLocal LocalReg l)) Bool _ | LocalReg l LocalReg -> LRegSet -> Bool `elemLRegSet` LRegSet ls = Bool True use LRegSet ls (CmmRegOff (CmmLocal LocalReg l) Int _) Bool _ | LocalReg l LocalReg -> LRegSet -> Bool `elemLRegSet` LRegSet ls = Bool True use LRegSet _ls CmmExpr _ Bool z = Bool z go :: LRegSet -> CmmExpr -> Bool -> Bool go :: LRegSet -> CmmExpr -> Bool -> Bool go LRegSet ls (CmmMachOp MachOp _ [CmmExpr] es) Bool z = (CmmExpr -> Bool -> Bool) -> Bool -> [CmmExpr] -> Bool forall a b. (a -> b -> b) -> b -> [a] -> b forall (t :: * -> *) a b. Foldable t => (a -> b -> b) -> b -> t a -> b foldr (LRegSet -> CmmExpr -> Bool -> Bool go LRegSet ls) Bool z [CmmExpr] es go LRegSet ls (CmmLoad CmmExpr addr CmmType _ AlignmentSpec _) Bool z = LRegSet -> CmmExpr -> Bool -> Bool go LRegSet ls CmmExpr addr Bool z go LRegSet ls CmmExpr e Bool z = LRegSet -> CmmExpr -> Bool -> Bool use LRegSet ls CmmExpr e Bool z -- we don't inline into CmmUnsafeForeignCall if the expression refers -- to global registers. This is a HACK to avoid global registers -- clashing with C argument-passing registers, really the back-end -- ought to be able to handle it properly, but currently neither PprC -- nor the NCG can do it. See Note [Register parameter passing] -- See also GHC.StgToCmm.Foreign.load_args_into_temps. okToInline :: Platform -> CmmExpr -> CmmNode e x -> Bool okToInline :: forall (e :: Extensibility) (x :: Extensibility). Platform -> CmmExpr -> CmmNode e x -> Bool okToInline Platform platform CmmExpr expr node :: CmmNode e x node@(CmmUnsafeForeignCall{}) = Bool -> Bool not (Platform -> CmmExpr -> CmmNode e x -> Bool forall (e :: Extensibility) (x :: Extensibility). Platform -> CmmExpr -> CmmNode e x -> Bool globalRegistersConflict Platform platform CmmExpr expr CmmNode e x node) okToInline Platform _ CmmExpr _ CmmNode e x _ = Bool True -- ----------------------------------------------------------------------------- -- | @conflicts (r,e) node@ is @False@ if and only if the assignment -- @r = e@ can be safely commuted past statement @node@. conflicts :: Platform -> Assignment -> CmmNode O x -> Bool conflicts :: forall (x :: Extensibility). Platform -> Assignment -> CmmNode O x -> Bool conflicts Platform platform (LocalReg r, CmmExpr rhs, AbsMem addr) CmmNode O x node -- (1) node defines registers used by rhs of assignment. This catches -- assignments and all three kinds of calls. See Note [Sinking and calls] | Platform -> CmmExpr -> CmmNode O x -> Bool forall (e :: Extensibility) (x :: Extensibility). Platform -> CmmExpr -> CmmNode e x -> Bool globalRegistersConflict Platform platform CmmExpr rhs CmmNode O x node = Bool True | Platform -> CmmExpr -> CmmNode O x -> Bool forall (e :: Extensibility) (x :: Extensibility). Platform -> CmmExpr -> CmmNode e x -> Bool localRegistersConflict Platform platform CmmExpr rhs CmmNode O x node = Bool True -- (2) node uses register defined by assignment | Platform -> (Bool -> LocalReg -> Bool) -> Bool -> CmmNode O x -> Bool forall b. Platform -> (b -> LocalReg -> b) -> b -> CmmNode O x -> b forall r a b. UserOfRegs r a => Platform -> (b -> r -> b) -> b -> a -> b foldRegsUsed Platform platform (\Bool b LocalReg r' -> LocalReg r LocalReg -> LocalReg -> Bool forall a. Eq a => a -> a -> Bool == LocalReg r' Bool -> Bool -> Bool || Bool b) Bool False CmmNode O x node = Bool True -- (3) a store to an address conflicts with a read of the same memory | CmmStore CmmExpr addr' CmmExpr e AlignmentSpec _ <- CmmNode O x node , AbsMem -> AbsMem -> Bool memConflicts AbsMem addr (Platform -> CmmExpr -> Width -> AbsMem loadAddr Platform platform CmmExpr addr' (Platform -> CmmExpr -> Width cmmExprWidth Platform platform CmmExpr e)) = Bool True -- (4) an assignment to Hp/Sp conflicts with a heap/stack read respectively | AbsMem HeapMem <- AbsMem addr, CmmAssign (CmmGlobal (GlobalRegUse GlobalReg Hp CmmType _)) CmmExpr _ <- CmmNode O x node = Bool True | AbsMem StackMem <- AbsMem addr, CmmAssign (CmmGlobal (GlobalRegUse GlobalReg Sp CmmType _)) CmmExpr _ <- CmmNode O x node = Bool True | SpMem{} <- AbsMem addr, CmmAssign (CmmGlobal (GlobalRegUse GlobalReg Sp CmmType _)) CmmExpr _ <- CmmNode O x node = Bool True -- (5) foreign calls clobber heap: see Note [Foreign calls clobber heap] | CmmUnsafeForeignCall{} <- CmmNode O x node, AbsMem -> AbsMem -> Bool memConflicts AbsMem addr AbsMem AnyMem = Bool True -- (6) suspendThread clobbers every global register not backed by a real -- register. It also clobbers heap and stack but this is handled by (5) | CmmUnsafeForeignCall (PrimTarget CallishMachOp MO_SuspendThread) [LocalReg] _ [CmmExpr] _ <- CmmNode O x node , Platform -> (Bool -> GlobalReg -> Bool) -> Bool -> CmmExpr -> Bool forall b. Platform -> (b -> GlobalReg -> b) -> b -> CmmExpr -> b forall r a b. UserOfRegs r a => Platform -> (b -> r -> b) -> b -> a -> b foldRegsUsed Platform platform (\Bool b GlobalReg g -> Platform -> GlobalReg -> Maybe RealReg globalRegMaybe Platform platform GlobalReg g Maybe RealReg -> Maybe RealReg -> Bool forall a. Eq a => a -> a -> Bool == Maybe RealReg forall a. Maybe a Nothing Bool -> Bool -> Bool || Bool b) Bool False CmmExpr rhs = Bool True -- (7) native calls clobber any memory | CmmCall{} <- CmmNode O x node, AbsMem -> AbsMem -> Bool memConflicts AbsMem addr AbsMem AnyMem = Bool True -- (8) otherwise, no conflict | Bool otherwise = Bool False {- Note [Inlining foldRegsDefd] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ foldRegsDefd is, after optimization, *not* a small function so it's only marked INLINEABLE, but not INLINE. However in some specific cases we call it *very* often making it important to avoid the overhead of allocating the folding function. So we simply force inlining via the magic inline function. For T3294 this improves allocation with -O by ~1%. -} -- Returns True if node defines any global registers that are used in the -- Cmm expression globalRegistersConflict :: Platform -> CmmExpr -> CmmNode e x -> Bool globalRegistersConflict :: forall (e :: Extensibility) (x :: Extensibility). Platform -> CmmExpr -> CmmNode e x -> Bool globalRegistersConflict Platform platform CmmExpr expr CmmNode e x node = -- See Note [Inlining foldRegsDefd] (Platform -> (Bool -> GlobalReg -> Bool) -> Bool -> CmmNode e x -> Bool) -> Platform -> (Bool -> GlobalReg -> Bool) -> Bool -> CmmNode e x -> Bool forall a. a -> a inline Platform -> (Bool -> GlobalReg -> Bool) -> Bool -> CmmNode e x -> Bool forall b. Platform -> (b -> GlobalReg -> b) -> b -> CmmNode e x -> b forall r a b. DefinerOfRegs r a => Platform -> (b -> r -> b) -> b -> a -> b foldRegsDefd Platform platform (\Bool b GlobalReg r -> Bool b Bool -> Bool -> Bool || Platform -> GlobalReg -> CmmExpr -> Bool globalRegUsedIn Platform platform GlobalReg r CmmExpr expr) Bool False CmmNode e x node -- Returns True if node defines any local registers that are used in the -- Cmm expression localRegistersConflict :: Platform -> CmmExpr -> CmmNode e x -> Bool localRegistersConflict :: forall (e :: Extensibility) (x :: Extensibility). Platform -> CmmExpr -> CmmNode e x -> Bool localRegistersConflict Platform platform CmmExpr expr CmmNode e x node = -- See Note [Inlining foldRegsDefd] (Platform -> (Bool -> LocalReg -> Bool) -> Bool -> CmmNode e x -> Bool) -> Platform -> (Bool -> LocalReg -> Bool) -> Bool -> CmmNode e x -> Bool forall a. a -> a inline Platform -> (Bool -> LocalReg -> Bool) -> Bool -> CmmNode e x -> Bool forall b. Platform -> (b -> LocalReg -> b) -> b -> CmmNode e x -> b forall r a b. DefinerOfRegs r a => Platform -> (b -> r -> b) -> b -> a -> b foldRegsDefd Platform platform (\Bool b LocalReg r -> Bool b Bool -> Bool -> Bool || Platform -> CmmReg -> CmmExpr -> Bool regUsedIn Platform platform (LocalReg -> CmmReg CmmLocal LocalReg r) CmmExpr expr) Bool False CmmNode e x node -- Note [Sinking and calls] -- ~~~~~~~~~~~~~~~~~~~~~~~~ -- We have three kinds of calls: normal (CmmCall), safe foreign (CmmForeignCall) -- and unsafe foreign (CmmUnsafeForeignCall). We perform sinking pass after -- stack layout (see Note [Sinking after stack layout]) which leads to two -- invariants related to calls: -- -- a) during stack layout phase all safe foreign calls are turned into -- unsafe foreign calls (see Note [Lower safe foreign calls]). This -- means that we will never encounter CmmForeignCall node when running -- sinking after stack layout -- -- b) stack layout saves all variables live across a call on the stack -- just before making a call (remember we are not sinking assignments to -- stack): -- -- L1: -- x = R1 -- P64[Sp - 16] = L2 -- P64[Sp - 8] = x -- Sp = Sp - 16 -- call f() returns L2 -- L2: -- -- We will attempt to sink { x = R1 } but we will detect conflict with -- { P64[Sp - 8] = x } and hence we will drop { x = R1 } without even -- checking whether it conflicts with { call f() }. In this way we will -- never need to check any assignment conflicts with CmmCall. Remember -- that we still need to check for potential memory conflicts. -- -- So the result is that we only need to worry about CmmUnsafeForeignCall nodes -- when checking conflicts (see Note [Unsafe foreign calls clobber caller-save registers]). -- This assumption holds only when we do sinking after stack layout. If we run -- it before stack layout we need to check for possible conflicts with all three -- kinds of calls. Our `conflicts` function does that by using a generic -- foldRegsDefd and foldRegsUsed functions defined in DefinerOfRegs and -- UserOfRegs typeclasses. -- -- An abstraction of memory read or written. data AbsMem = NoMem -- no memory accessed | AnyMem -- arbitrary memory | HeapMem -- definitely heap memory | StackMem -- definitely stack memory | SpMem -- <size>[Sp+n] {-# UNPACK #-} !Int {-# UNPACK #-} !Int -- Having SpMem is important because it lets us float loads from Sp -- past stores to Sp as long as they don't overlap, and this helps to -- unravel some long sequences of -- x1 = [Sp + 8] -- x2 = [Sp + 16] -- ... -- [Sp + 8] = xi -- [Sp + 16] = xj -- -- Note that SpMem is invalidated if Sp is changed, but the definition -- of 'conflicts' above handles that. -- ToDo: this won't currently fix the following commonly occurring code: -- x1 = [R1 + 8] -- x2 = [R1 + 16] -- .. -- [Hp - 8] = x1 -- [Hp - 16] = x2 -- .. -- because [R1 + 8] and [Hp - 8] are both HeapMem. We know that -- assignments to [Hp + n] do not conflict with any other heap memory, -- but this is tricky to nail down. What if we had -- -- x = Hp + n -- [x] = ... -- -- the store to [x] should be "new heap", not "old heap". -- Furthermore, you could imagine that if we started inlining -- functions in Cmm then there might well be reads of heap memory -- that was written in the same basic block. To take advantage of -- non-aliasing of heap memory we will have to be more clever. -- Note [Foreign calls clobber heap] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- It is tempting to say that foreign calls clobber only -- non-heap/stack memory, but unfortunately we break this invariant in -- the RTS. For example, in stg_catch_retry_frame we call -- stmCommitNestedTransaction() which modifies the contents of the -- TRec it is passed (this actually caused incorrect code to be -- generated). -- -- Since the invariant is true for the majority of foreign calls, -- perhaps we ought to have a special annotation for calls that can -- modify heap/stack memory. For now we just use the conservative -- definition here. -- -- Some CallishMachOp imply a memory barrier e.g. AtomicRMW and -- therefore we should never float any memory operations across one of -- these calls. -- -- `suspendThread` releases the capability used by the thread, hence we mustn't -- float accesses to heap, stack or virtual global registers stored in the -- capability (e.g. with unregisterised build, see #19237). bothMems :: AbsMem -> AbsMem -> AbsMem bothMems :: AbsMem -> AbsMem -> AbsMem bothMems AbsMem NoMem AbsMem x = AbsMem x bothMems AbsMem x AbsMem NoMem = AbsMem x bothMems AbsMem HeapMem AbsMem HeapMem = AbsMem HeapMem bothMems AbsMem StackMem AbsMem StackMem = AbsMem StackMem bothMems (SpMem Int o1 Int w1) (SpMem Int o2 Int w2) | Int o1 Int -> Int -> Bool forall a. Eq a => a -> a -> Bool == Int o2 = Int -> Int -> AbsMem SpMem Int o1 (Int -> Int -> Int forall a. Ord a => a -> a -> a max Int w1 Int w2) | Bool otherwise = AbsMem StackMem bothMems SpMem{} AbsMem StackMem = AbsMem StackMem bothMems AbsMem StackMem SpMem{} = AbsMem StackMem bothMems AbsMem _ AbsMem _ = AbsMem AnyMem memConflicts :: AbsMem -> AbsMem -> Bool memConflicts :: AbsMem -> AbsMem -> Bool memConflicts AbsMem NoMem AbsMem _ = Bool False memConflicts AbsMem _ AbsMem NoMem = Bool False memConflicts AbsMem HeapMem AbsMem StackMem = Bool False memConflicts AbsMem StackMem AbsMem HeapMem = Bool False memConflicts SpMem{} AbsMem HeapMem = Bool False memConflicts AbsMem HeapMem SpMem{} = Bool False memConflicts (SpMem Int o1 Int w1) (SpMem Int o2 Int w2) | Int o1 Int -> Int -> Bool forall a. Ord a => a -> a -> Bool < Int o2 = Int o1 Int -> Int -> Int forall a. Num a => a -> a -> a + Int w1 Int -> Int -> Bool forall a. Ord a => a -> a -> Bool > Int o2 | Bool otherwise = Int o2 Int -> Int -> Int forall a. Num a => a -> a -> a + Int w2 Int -> Int -> Bool forall a. Ord a => a -> a -> Bool > Int o1 memConflicts AbsMem _ AbsMem _ = Bool True exprMem :: Platform -> CmmExpr -> AbsMem exprMem :: Platform -> CmmExpr -> AbsMem exprMem Platform platform (CmmLoad CmmExpr addr CmmType w AlignmentSpec _) = AbsMem -> AbsMem -> AbsMem bothMems (Platform -> CmmExpr -> Width -> AbsMem loadAddr Platform platform CmmExpr addr (CmmType -> Width typeWidth CmmType w)) (Platform -> CmmExpr -> AbsMem exprMem Platform platform CmmExpr addr) exprMem Platform platform (CmmMachOp MachOp _ [CmmExpr] es) = (AbsMem -> AbsMem -> AbsMem) -> AbsMem -> [AbsMem] -> AbsMem forall a b. (a -> b -> b) -> b -> [a] -> b forall (t :: * -> *) a b. Foldable t => (a -> b -> b) -> b -> t a -> b foldr AbsMem -> AbsMem -> AbsMem bothMems AbsMem NoMem ((CmmExpr -> AbsMem) -> [CmmExpr] -> [AbsMem] forall a b. (a -> b) -> [a] -> [b] map (Platform -> CmmExpr -> AbsMem exprMem Platform platform) [CmmExpr] es) exprMem Platform _ CmmExpr _ = AbsMem NoMem loadAddr :: Platform -> CmmExpr -> Width -> AbsMem loadAddr :: Platform -> CmmExpr -> Width -> AbsMem loadAddr Platform platform CmmExpr e Width w = case CmmExpr e of CmmReg CmmReg r -> CmmReg -> Int -> Width -> AbsMem regAddr CmmReg r Int 0 Width w CmmRegOff CmmReg r Int i -> CmmReg -> Int -> Width -> AbsMem regAddr CmmReg r Int i Width w CmmExpr _other | Platform -> CmmReg -> CmmExpr -> Bool regUsedIn Platform platform (Platform -> CmmReg spReg Platform platform) CmmExpr e -> AbsMem StackMem | Bool otherwise -> AbsMem AnyMem regAddr :: CmmReg -> Int -> Width -> AbsMem regAddr :: CmmReg -> Int -> Width -> AbsMem regAddr (CmmGlobal (GlobalRegUse GlobalReg Sp CmmType _)) Int i Width w = Int -> Int -> AbsMem SpMem Int i (Width -> Int widthInBytes Width w) regAddr (CmmGlobal (GlobalRegUse GlobalReg Hp CmmType _)) Int _ Width _ = AbsMem HeapMem regAddr (CmmGlobal (GlobalRegUse GlobalReg CurrentTSO CmmType _)) Int _ Width _ = AbsMem HeapMem -- important for PrimOps regAddr CmmReg r Int _ Width _ | CmmType -> Bool isGcPtrType (CmmReg -> CmmType cmmRegType CmmReg r) = AbsMem HeapMem -- yay! GCPtr pays for itself regAddr CmmReg _ Int _ Width _ = AbsMem AnyMem {- Note [Inline GlobalRegs?] ~~~~~~~~~~~~~~~~~~~~~~~~~ Should we freely inline GlobalRegs? Actually it doesn't make a huge amount of difference either way, so we *do* currently treat GlobalRegs as "trivial" and inline them everywhere, but for what it's worth, here is what I discovered when I (SimonM) looked into this: Common sense says we should not inline GlobalRegs, because when we have x = R1 the register allocator will coalesce this assignment, generating no code, and simply record the fact that x is bound to $rbx (or whatever). Furthermore, if we were to sink this assignment, then the range of code over which R1 is live increases, and the range of code over which x is live decreases. All things being equal, it is better for x to be live than R1, because R1 is a fixed register whereas x can live in any register. So we should neither sink nor inline 'x = R1'. However, not inlining GlobalRegs can have surprising consequences. e.g. (cgrun020) c3EN: _s3DB::P64 = R1; _c3ES::P64 = _s3DB::P64 & 7; if (_c3ES::P64 >= 2) goto c3EU; else goto c3EV; c3EU: _s3DD::P64 = P64[_s3DB::P64 + 6]; _s3DE::P64 = P64[_s3DB::P64 + 14]; I64[Sp - 8] = c3F0; R1 = _s3DE::P64; P64[Sp] = _s3DD::P64; inlining the GlobalReg gives: c3EN: if (R1 & 7 >= 2) goto c3EU; else goto c3EV; c3EU: I64[Sp - 8] = c3F0; _s3DD::P64 = P64[R1 + 6]; R1 = P64[R1 + 14]; P64[Sp] = _s3DD::P64; but if we don't inline the GlobalReg, instead we get: _s3DB::P64 = R1; if (_s3DB::P64 & 7 >= 2) goto c3EU; else goto c3EV; c3EU: I64[Sp - 8] = c3F0; R1 = P64[_s3DB::P64 + 14]; P64[Sp] = P64[_s3DB::P64 + 6]; This looks better - we managed to inline _s3DD - but in fact it generates an extra reg-reg move: .Lc3EU: movq $c3F0_info,-8(%rbp) movq %rbx,%rax movq 14(%rbx),%rbx movq 6(%rax),%rax movq %rax,(%rbp) because _s3DB is now live across the R1 assignment, we lost the benefit of coalescing. Who is at fault here? Perhaps if we knew that _s3DB was an alias for R1, then we would not sink a reference to _s3DB past the R1 assignment. Or perhaps we *should* do that - we might gain by sinking it, despite losing the coalescing opportunity. Sometimes not inlining global registers wins by virtue of the rule about not inlining into arguments of a foreign call, e.g. (T7163) this is what happens when we inlined F1: _s3L2::F32 = F1; _c3O3::F32 = %MO_F_Mul_W32(F1, 10.0 :: W32); (_s3L7::F32) = call "ccall" arg hints: [] result hints: [] rintFloat(_c3O3::F32); but if we don't inline F1: (_s3L7::F32) = call "ccall" arg hints: [] result hints: [] rintFloat(%MO_F_Mul_W32(_s3L2::F32, 10.0 :: W32)); -}