{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE ParallelListComp #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE NondecreasingIndentation #-}

{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}

-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
-- (a) the sectioning, and (b) the type signatures, the
-- structure should not be too overwhelming.

module GHC.CmmToAsm.X86.CodeGen (
        cmmTopCodeGen,
        generateJumpTableForInstr,
        extractUnwindPoints,
        invertCondBranches,
        InstrBlock
)

where

-- NCG stuff:
import GHC.Prelude

import GHC.CmmToAsm.X86.Instr
import GHC.CmmToAsm.X86.Cond
import GHC.CmmToAsm.X86.Regs
import GHC.CmmToAsm.X86.Ppr
import GHC.CmmToAsm.X86.RegInfo

import GHC.Platform.Regs
import GHC.CmmToAsm.CPrim
import GHC.CmmToAsm.Types
import GHC.Cmm.DebugBlock
   ( DebugBlock(..), UnwindPoint(..), UnwindTable
   , UnwindExpr(UwReg), toUnwindExpr
   )
import GHC.CmmToAsm.PIC
import GHC.CmmToAsm.Monad
   ( NatM, getNewRegNat, getNewLabelNat, setDeltaNat
   , getDeltaNat, getBlockIdNat, getPicBaseNat
   , Reg64(..), RegCode64(..), getNewReg64, localReg64
   , getPicBaseMaybeNat, getDebugBlock, getFileId
   , addImmediateSuccessorNat, updateCfgNat, getConfig, getPlatform
   , getCfgWeights
   )
import GHC.CmmToAsm.CFG
import GHC.CmmToAsm.Format
import GHC.CmmToAsm.Config
import GHC.Platform.Reg
import GHC.Platform

-- Our intermediate code:
import GHC.Types.Basic
import GHC.Cmm.BlockId
import GHC.Unit.Types ( primUnitId )
import GHC.Cmm.Utils
import GHC.Cmm.Switch
import GHC.Cmm
import GHC.Cmm.Dataflow.Block
import GHC.Cmm.Dataflow.Graph
import GHC.Cmm.Dataflow.Label
import GHC.Cmm.CLabel
import GHC.Types.Tickish ( GenTickish(..) )
import GHC.Types.SrcLoc  ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )

-- The rest:
import GHC.Data.Maybe ( expectJust )
import GHC.Types.ForeignCall ( CCallConv(..) )
import GHC.Data.OrdList
import GHC.Utils.Outputable
import GHC.Utils.Constants (debugIsOn)
import GHC.Utils.Monad ( foldMapM )
import GHC.Utils.Panic
import GHC.Data.FastString
import GHC.Utils.Misc
import GHC.Types.Unique.DSM ( getUniqueM )

import qualified Data.Semigroup as S

import Control.Monad
import Control.Monad.Trans.State.Strict
  ( StateT, evalStateT, get, put )
import Control.Monad.Trans.Class (lift)
import Data.Foldable (fold)
import Data.Int
import Data.Maybe
import Data.Word

import qualified Data.Map as Map

is32BitPlatform :: NatM Bool
is32BitPlatform :: NatM Bool
is32BitPlatform = do
    platform <- NatM Platform
getPlatform
    return $ target32Bit platform

sse4_1Enabled :: NatM Bool
sse4_1Enabled :: NatM Bool
sse4_1Enabled = do
  config <- NatM NCGConfig
getConfig
  return (ncgSseVersion config >= Just SSE4)

sse4_2Enabled :: NatM Bool
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
  config <- NatM NCGConfig
getConfig
  return (ncgSseVersion config >= Just SSE42)

avxEnabled :: NatM Bool
avxEnabled :: NatM Bool
avxEnabled = do
  config <- NatM NCGConfig
getConfig
  return (ncgAvxEnabled config)

cmmTopCodeGen
        :: RawCmmDecl
        -> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr]

cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr]
cmmTopCodeGen (CmmProc LabelMap RawCmmStatics
info CLabel
lab [GlobalRegUse]
live CmmGraph
graph) = do
  let blocks :: [CmmBlock]
blocks = CmmGraph -> [CmmBlock]
toBlockListEntryFirst CmmGraph
graph
  (nat_blocks,statics) <- (CmmBlock
 -> NatM
      ([NatBasicBlock Instr],
       [NatCmmDecl (Alignment, RawCmmStatics) Instr]))
-> [CmmBlock]
-> NatM
     ([[NatBasicBlock Instr]],
      [[NatCmmDecl (Alignment, RawCmmStatics) Instr]])
forall (m :: * -> *) a b c.
Applicative m =>
(a -> m (b, c)) -> [a] -> m ([b], [c])
mapAndUnzipM CmmBlock
-> NatM
     ([NatBasicBlock Instr],
      [NatCmmDecl (Alignment, RawCmmStatics) Instr])
basicBlockCodeGen [CmmBlock]
blocks
  picBaseMb <- getPicBaseMaybeNat
  platform <- getPlatform
  let proc = LabelMap RawCmmStatics
-> CLabel
-> [GlobalRegUse]
-> ListGraph Instr
-> NatCmmDecl (Alignment, RawCmmStatics) Instr
forall d h g.
h -> CLabel -> [GlobalRegUse] -> g -> GenCmmDecl d h g
CmmProc LabelMap RawCmmStatics
info CLabel
lab [GlobalRegUse]
live ([NatBasicBlock Instr] -> ListGraph Instr
forall i. [GenBasicBlock i] -> ListGraph i
ListGraph ([NatBasicBlock Instr] -> ListGraph Instr)
-> [NatBasicBlock Instr] -> ListGraph Instr
forall a b. (a -> b) -> a -> b
$ [[NatBasicBlock Instr]] -> [NatBasicBlock Instr]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[NatBasicBlock Instr]]
nat_blocks)
      tops = NatCmmDecl (Alignment, RawCmmStatics) Instr
proc NatCmmDecl (Alignment, RawCmmStatics) Instr
-> [NatCmmDecl (Alignment, RawCmmStatics) Instr]
-> [NatCmmDecl (Alignment, RawCmmStatics) Instr]
forall a. a -> [a] -> [a]
: [[NatCmmDecl (Alignment, RawCmmStatics) Instr]]
-> [NatCmmDecl (Alignment, RawCmmStatics) Instr]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[NatCmmDecl (Alignment, RawCmmStatics) Instr]]
statics
      os   = Platform -> OS
platformOS Platform
platform

  case picBaseMb of
      Just Reg
picBase -> OS
-> Reg
-> [NatCmmDecl (Alignment, RawCmmStatics) Instr]
-> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr]
initializePicBase_x86 OS
os Reg
picBase [NatCmmDecl (Alignment, RawCmmStatics) Instr]
tops
      Maybe Reg
Nothing -> [NatCmmDecl (Alignment, RawCmmStatics) Instr]
-> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr]
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return [NatCmmDecl (Alignment, RawCmmStatics) Instr]
tops

cmmTopCodeGen (CmmData Section
sec RawCmmStatics
dat) =
  [NatCmmDecl (Alignment, RawCmmStatics) Instr]
-> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr]
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return [Section
-> (Alignment, RawCmmStatics)
-> NatCmmDecl (Alignment, RawCmmStatics) Instr
forall d h g. Section -> d -> GenCmmDecl d h g
CmmData Section
sec (Int -> Alignment
mkAlignment Int
1, RawCmmStatics
dat)]  -- no translation, we just use CmmStatic

{- Note [Verifying basic blocks]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   We want to guarantee a few things about the results
   of instruction selection.

   Namely that each basic blocks consists of:
    * A (potentially empty) sequence of straight line instructions
  followed by
    * A (potentially empty) sequence of jump like instructions.

    We can verify this by going through the instructions and
    making sure that any non-jumpish instruction can't appear
    after a jumpish instruction.

    There are gotchas however:
    * CALLs are strictly speaking control flow but here we care
      not about them. Hence we treat them as regular instructions.

      It's safe for them to appear inside a basic block
      as (ignoring side effects inside the call) they will result in
      straight line code.

    * NEWBLOCK marks the start of a new basic block so can
      be followed by any instructions.
-}

-- Verifying basic blocks is cheap, but not cheap enough to enable it unconditionally.
verifyBasicBlock :: Platform -> [Instr] -> ()
verifyBasicBlock :: Platform -> [Instr] -> ()
verifyBasicBlock Platform
platform [Instr]
instrs
  | Bool
debugIsOn     = Bool -> [Instr] -> ()
go Bool
False [Instr]
instrs
  | Bool
otherwise     = ()
  where
    go :: Bool -> [Instr] -> ()
go Bool
_     [] = ()
    go Bool
atEnd (Instr
i:[Instr]
instr)
        = case Instr
i of
            -- Start a new basic block
            NEWBLOCK {} -> Bool -> [Instr] -> ()
go Bool
False [Instr]
instr
            -- Calls are not viable block terminators
            CALL {}     | Bool
atEnd -> Instr -> ()
faultyBlockWith Instr
i
                        | Bool -> Bool
not Bool
atEnd -> Bool -> [Instr] -> ()
go Bool
atEnd [Instr]
instr
            -- All instructions ok, check if we reached the end and continue.
            Instr
_ | Bool -> Bool
not Bool
atEnd -> Bool -> [Instr] -> ()
go (Instr -> Bool
isJumpishInstr Instr
i) [Instr]
instr
              -- Only jumps allowed at the end of basic blocks.
              | Bool
otherwise -> if Instr -> Bool
isJumpishInstr Instr
i
                                then Bool -> [Instr] -> ()
go Bool
True [Instr]
instr
                                else Instr -> ()
faultyBlockWith Instr
i
    faultyBlockWith :: Instr -> ()
faultyBlockWith Instr
i
        = String -> SDoc -> ()
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Non control flow instructions after end of basic block."
                   (Platform -> Instr -> SDoc
forall doc. IsDoc doc => Platform -> Instr -> doc
pprInstr Platform
platform Instr
i SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"in:" SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ [SDoc] -> SDoc
forall doc. IsDoc doc => [doc] -> doc
vcat ((Instr -> SDoc) -> [Instr] -> [SDoc]
forall a b. (a -> b) -> [a] -> [b]
map (Platform -> Instr -> SDoc
forall doc. IsDoc doc => Platform -> Instr -> doc
pprInstr Platform
platform) [Instr]
instrs))

basicBlockCodeGen
        :: CmmBlock
        -> NatM ( [NatBasicBlock Instr]
                , [NatCmmDecl (Alignment, RawCmmStatics) Instr])

basicBlockCodeGen :: CmmBlock
-> NatM
     ([NatBasicBlock Instr],
      [NatCmmDecl (Alignment, RawCmmStatics) Instr])
basicBlockCodeGen CmmBlock
block = do
  let (CmmNode C O
_, Block CmmNode O O
nodes, CmmNode O C
tail)  = CmmBlock -> (CmmNode C O, Block CmmNode O O, CmmNode O C)
forall (n :: Extensibility -> Extensibility -> *).
Block n C C -> (n C O, Block n O O, n O C)
blockSplit CmmBlock
block
      id :: Label
id = CmmBlock -> Label
forall (x :: Extensibility). Block CmmNode C x -> Label
forall (thing :: Extensibility -> Extensibility -> *)
       (x :: Extensibility).
NonLocal thing =>
thing C x -> Label
entryLabel CmmBlock
block
      stmts :: [CmmNode O O]
stmts = Block CmmNode O O -> [CmmNode O O]
forall (n :: Extensibility -> Extensibility -> *).
Block n O O -> [n O O]
blockToList Block CmmNode O O
nodes
  -- Generate location directive
  dbg <- Label -> NatM (Maybe DebugBlock)
getDebugBlock (CmmBlock -> Label
forall (x :: Extensibility). Block CmmNode C x -> Label
forall (thing :: Extensibility -> Extensibility -> *)
       (x :: Extensibility).
NonLocal thing =>
thing C x -> Label
entryLabel CmmBlock
block)
  loc_instrs <- case dblSourceTick =<< dbg of
    Just (SourceNote RealSrcSpan
span (LexicalFastString FastString
name))
      -> do fileId <- FastString -> NatM Int
getFileId (RealSrcSpan -> FastString
srcSpanFile RealSrcSpan
span)
            let line = RealSrcSpan -> Int
srcSpanStartLine RealSrcSpan
span; col = RealSrcSpan -> Int
srcSpanStartCol RealSrcSpan
span
            return $ unitOL $ LOCATION fileId line col (unpackFS name)
    Maybe CmmTickish
_ -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a
nilOL
  (mid_instrs,mid_bid) <- stmtsToInstrs id stmts
  (!tail_instrs,_) <- stmtToInstrs mid_bid tail
  let instrs = InstrBlock
loc_instrs InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
mid_instrs InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
tail_instrs
  platform <- getPlatform
  return $! verifyBasicBlock platform (fromOL instrs)
  instrs' <- fold <$> traverse addSpUnwindings instrs
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs'

        mkBlocks (NEWBLOCK Label
id) ([Instr]
instrs,[NatBasicBlock Instr]
blocks,[GenCmmDecl (Alignment, RawCmmStatics) h g]
statics)
          = ([], Label -> [Instr] -> NatBasicBlock Instr
forall i. Label -> [i] -> GenBasicBlock i
BasicBlock Label
id [Instr]
instrs NatBasicBlock Instr
-> [NatBasicBlock Instr] -> [NatBasicBlock Instr]
forall a. a -> [a] -> [a]
: [NatBasicBlock Instr]
blocks, [GenCmmDecl (Alignment, RawCmmStatics) h g]
statics)
        mkBlocks (LDATA Section
sec (Alignment, RawCmmStatics)
dat) ([Instr]
instrs,[NatBasicBlock Instr]
blocks,[GenCmmDecl (Alignment, RawCmmStatics) h g]
statics)
          = ([Instr]
instrs, [NatBasicBlock Instr]
blocks, Section
-> (Alignment, RawCmmStatics)
-> GenCmmDecl (Alignment, RawCmmStatics) h g
forall d h g. Section -> d -> GenCmmDecl d h g
CmmData Section
sec (Alignment, RawCmmStatics)
datGenCmmDecl (Alignment, RawCmmStatics) h g
-> [GenCmmDecl (Alignment, RawCmmStatics) h g]
-> [GenCmmDecl (Alignment, RawCmmStatics) h g]
forall a. a -> [a] -> [a]
:[GenCmmDecl (Alignment, RawCmmStatics) h g]
statics)
        mkBlocks Instr
instr ([Instr]
instrs,[NatBasicBlock Instr]
blocks,[GenCmmDecl (Alignment, RawCmmStatics) h g]
statics)
          = (Instr
instrInstr -> [Instr] -> [Instr]
forall a. a -> [a] -> [a]
:[Instr]
instrs, [NatBasicBlock Instr]
blocks, [GenCmmDecl (Alignment, RawCmmStatics) h g]
statics)
  return (BasicBlock id top : other_blocks, statics)

-- | Convert 'DELTA' instructions into 'UNWIND' instructions to capture changes
-- in the @sp@ register. See Note [What is this unwinding business?] in "GHC.Cmm.DebugBlock"
-- for details.
addSpUnwindings :: Instr -> NatM (OrdList Instr)
addSpUnwindings :: Instr -> NatM InstrBlock
addSpUnwindings instr :: Instr
instr@(DELTA Int
d) = do
    config <- NatM NCGConfig
getConfig
    let platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config
    if ncgDwarfUnwindings config
        then do lbl <- mkAsmTempLabel <$> getUniqueM
                let unwind = GlobalReg -> Maybe UnwindExpr -> Map GlobalReg (Maybe UnwindExpr)
forall k a. k -> a -> Map k a
Map.singleton GlobalReg
MachSp (UnwindExpr -> Maybe UnwindExpr
forall a. a -> Maybe a
Just (UnwindExpr -> Maybe UnwindExpr) -> UnwindExpr -> Maybe UnwindExpr
forall a b. (a -> b) -> a -> b
$ GlobalRegUse -> Int -> UnwindExpr
UwReg (GlobalReg -> CmmType -> GlobalRegUse
GlobalRegUse GlobalReg
MachSp (Platform -> CmmType
bWord Platform
platform)) (Int -> UnwindExpr) -> Int -> UnwindExpr
forall a b. (a -> b) -> a -> b
$ Int -> Int
forall a. Num a => a -> a
negate Int
d)
                return $ toOL [ instr, UNWIND lbl unwind ]
        else return (unitOL instr)
addSpUnwindings Instr
instr = InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock -> NatM InstrBlock) -> InstrBlock -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ Instr -> InstrBlock
forall a. a -> OrdList a
unitOL Instr
instr

{- Note [Keeping track of the current block]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When generating instructions for Cmm we sometimes require
the current block for things like retry loops.

We also sometimes change the current block, if a MachOP
results in branching control flow.

Issues arise if we have two statements in the same block,
which both depend on the current block id *and* change the
basic block after them. This happens for atomic primops
in the X86 backend where we want to update the CFG data structure
when introducing new basic blocks.

For example in #17334 we got this Cmm code:

        c3Bf: // global
            (_s3t1::I64) = call MO_AtomicRMW W64 AMO_And(_s3sQ::P64 + 88, 18);
            (_s3t4::I64) = call MO_AtomicRMW W64 AMO_Or(_s3sQ::P64 + 88, 0);
            _s3sT::I64 = _s3sV::I64;
            goto c3B1;

This resulted in two new basic blocks being inserted:

        c3Bf:
                movl $18,%vI_n3Bo
                movq 88(%vI_s3sQ),%rax
                jmp _n3Bp
        n3Bp:
                ...
                cmpxchgq %vI_n3Bq,88(%vI_s3sQ)
                jne _n3Bp
                ...
                jmp _n3Bs
        n3Bs:
                ...
                cmpxchgq %vI_n3Bt,88(%vI_s3sQ)
                jne _n3Bs
                ...
                jmp _c3B1
        ...

Based on the Cmm we called stmtToInstrs we translated both atomic operations under
the assumption they would be placed into their Cmm basic block `c3Bf`.
However for the retry loop we introduce new labels, so this is not the case
for the second statement.
This resulted in a desync between the explicit control flow graph
we construct as a separate data type and the actual control flow graph in the code.

Instead we now return the new basic block if a statement causes a change
in the current block and use the block for all following statements.

For this reason genForeignCall is also split into two parts.  One for calls which
*won't* change the basic blocks in which successive instructions will be
placed (since they only evaluate CmmExpr, which can only contain MachOps, which
cannot introduce basic blocks in their lowerings).  A different one for calls
which *are* known to change the basic block.

-}

-- See Note [Keeping track of the current block] for why
-- we pass the BlockId.
stmtsToInstrs :: BlockId -- ^ Basic block these statement will start to be placed in.
              -> [CmmNode O O] -- ^ Cmm Statement
              -> NatM (InstrBlock, BlockId) -- ^ Resulting instruction
stmtsToInstrs :: Label -> [CmmNode O O] -> NatM (InstrBlock, Label)
stmtsToInstrs Label
bid [CmmNode O O]
stmts =
    Label -> [CmmNode O O] -> InstrBlock -> NatM (InstrBlock, Label)
forall {e :: Extensibility} {x :: Extensibility}.
Label -> [CmmNode e x] -> InstrBlock -> NatM (InstrBlock, Label)
go Label
bid [CmmNode O O]
stmts InstrBlock
forall a. OrdList a
nilOL
  where
    go :: Label -> [CmmNode e x] -> InstrBlock -> NatM (InstrBlock, Label)
go Label
bid  []        InstrBlock
instrs = (InstrBlock, Label) -> NatM (InstrBlock, Label)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock
instrs,Label
bid)
    go Label
bid (CmmNode e x
s:[CmmNode e x]
stmts)  InstrBlock
instrs = do
      (instrs',bid') <- Label -> CmmNode e x -> NatM (InstrBlock, Maybe Label)
forall (e :: Extensibility) (x :: Extensibility).
Label -> CmmNode e x -> NatM (InstrBlock, Maybe Label)
stmtToInstrs Label
bid CmmNode e x
s
      -- If the statement introduced a new block, we use that one
      let !newBid = Label -> Maybe Label -> Label
forall a. a -> Maybe a -> a
fromMaybe Label
bid Maybe Label
bid'
      go newBid stmts (instrs `appOL` instrs')

-- | `bid` refers to the current block and is used to update the CFG
--   if new blocks are inserted in the control flow.
-- See Note [Keeping track of the current block] for more details.
stmtToInstrs :: BlockId -- ^ Basic block this statement will start to be placed in.
             -> CmmNode e x
             -> NatM (InstrBlock, Maybe BlockId)
             -- ^ Instructions, and bid of new block if successive
             -- statements are placed in a different basic block.
stmtToInstrs :: forall (e :: Extensibility) (x :: Extensibility).
Label -> CmmNode e x -> NatM (InstrBlock, Maybe Label)
stmtToInstrs Label
bid CmmNode e x
stmt = do
  is32Bit <- NatM Bool
is32BitPlatform
  platform <- getPlatform
  case stmt of
    CmmUnsafeForeignCall ForeignTarget
target [LocalReg]
result_regs [CmmExpr]
args
       -> ForeignTarget
-> [LocalReg]
-> [CmmExpr]
-> Label
-> NatM (InstrBlock, Maybe Label)
genForeignCall ForeignTarget
target [LocalReg]
result_regs [CmmExpr]
args Label
bid

    CmmNode e x
_ -> (,Maybe Label
forall a. Maybe a
Nothing) (InstrBlock -> (InstrBlock, Maybe Label))
-> NatM InstrBlock -> NatM (InstrBlock, Maybe Label)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> case CmmNode e x
stmt of
      CmmComment FastString
s   -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (FastString -> Instr
COMMENT FastString
s))
      CmmTick {}     -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a
nilOL

      CmmUnwind [(GlobalReg, Maybe CmmExpr)]
regs -> do
        let to_unwind_entry :: (GlobalReg, Maybe CmmExpr) -> UnwindTable
            to_unwind_entry :: (GlobalReg, Maybe CmmExpr) -> Map GlobalReg (Maybe UnwindExpr)
to_unwind_entry (GlobalReg
reg, Maybe CmmExpr
expr) = GlobalReg -> Maybe UnwindExpr -> Map GlobalReg (Maybe UnwindExpr)
forall k a. k -> a -> Map k a
Map.singleton GlobalReg
reg ((CmmExpr -> UnwindExpr) -> Maybe CmmExpr -> Maybe UnwindExpr
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Platform -> CmmExpr -> UnwindExpr
toUnwindExpr Platform
platform) Maybe CmmExpr
expr)
        case ((GlobalReg, Maybe CmmExpr) -> Map GlobalReg (Maybe UnwindExpr))
-> [(GlobalReg, Maybe CmmExpr)] -> Map GlobalReg (Maybe UnwindExpr)
forall m a. Monoid m => (a -> m) -> [a] -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (GlobalReg, Maybe CmmExpr) -> Map GlobalReg (Maybe UnwindExpr)
to_unwind_entry [(GlobalReg, Maybe CmmExpr)]
regs of
          Map GlobalReg (Maybe UnwindExpr)
tbl | Map GlobalReg (Maybe UnwindExpr) -> Bool
forall k a. Map k a -> Bool
Map.null Map GlobalReg (Maybe UnwindExpr)
tbl -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a
nilOL
              | Bool
otherwise    -> do
                  lbl <- Unique -> CLabel
forall a. Uniquable a => a -> CLabel
mkAsmTempLabel (Unique -> CLabel) -> NatM Unique -> NatM CLabel
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NatM Unique
forall (m :: * -> *). MonadGetUnique m => m Unique
getUniqueM
                  return $ unitOL $ UNWIND lbl tbl

      CmmAssign CmmReg
reg CmmExpr
src
        | CmmType -> Bool
isFloatType CmmType
ty         -> CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_FltCode CmmReg
reg CmmExpr
src
        | Bool
is32Bit Bool -> Bool -> Bool
&& CmmType -> Bool
isWord64 CmmType
ty -> CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_I64Code CmmReg
reg CmmExpr
src
        | CmmType -> Bool
isVecType CmmType
ty           -> CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_VecCode CmmReg
reg CmmExpr
src
        | Bool
otherwise              -> CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_IntCode CmmReg
reg CmmExpr
src
          where ty :: CmmType
ty = CmmReg -> CmmType
cmmRegType CmmReg
reg

      CmmStore CmmExpr
addr CmmExpr
src AlignmentSpec
_alignment
        | CmmType -> Bool
isFloatType CmmType
ty         -> Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_FltCode Format
format CmmExpr
addr CmmExpr
src
        | Bool
is32Bit Bool -> Bool -> Bool
&& CmmType -> Bool
isWord64 CmmType
ty -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code        CmmExpr
addr CmmExpr
src
        | CmmType -> Bool
isVecType CmmType
ty           -> Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_VecCode Format
format CmmExpr
addr CmmExpr
src
        | Bool
otherwise              -> Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_IntCode Format
format CmmExpr
addr CmmExpr
src
          where ty :: CmmType
ty = Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
src
                format :: Format
format = CmmType -> Format
cmmTypeFormat CmmType
ty

      CmmBranch Label
id          -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock -> NatM InstrBlock) -> InstrBlock -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ Label -> InstrBlock
genBranch Label
id

      --We try to arrange blocks such that the likely branch is the fallthrough
      --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here.
      CmmCondBranch CmmExpr
arg Label
true Label
false Maybe Bool
_ -> Label -> Label -> Label -> CmmExpr -> NatM InstrBlock
genCondBranch Label
bid Label
true Label
false CmmExpr
arg
      CmmSwitch CmmExpr
arg SwitchTargets
ids -> CmmExpr -> SwitchTargets -> NatM InstrBlock
genSwitch CmmExpr
arg SwitchTargets
ids
      CmmCall { cml_target :: CmmNode O C -> CmmExpr
cml_target = CmmExpr
arg
              , cml_args_regs :: CmmNode O C -> [GlobalRegUse]
cml_args_regs = [GlobalRegUse]
gregs } -> CmmExpr -> [RegWithFormat] -> NatM InstrBlock
genJump CmmExpr
arg (Platform -> [GlobalRegUse] -> [RegWithFormat]
jumpRegs Platform
platform [GlobalRegUse]
gregs)
      CmmNode e x
_ ->
        String -> NatM InstrBlock
forall a. HasCallStack => String -> a
panic String
"stmtToInstrs: statement should have been cps'd away"


jumpRegs :: Platform -> [GlobalRegUse] -> [RegWithFormat]
jumpRegs :: Platform -> [GlobalRegUse] -> [RegWithFormat]
jumpRegs Platform
platform [GlobalRegUse]
gregs =
  [ Reg -> Format -> RegWithFormat
RegWithFormat (RealReg -> Reg
RegReal RealReg
r) (CmmType -> Format
cmmTypeFormat CmmType
ty)
  | GlobalRegUse GlobalReg
gr CmmType
ty <- [GlobalRegUse]
gregs
  , Just RealReg
r <- [Platform -> GlobalReg -> Maybe RealReg
globalRegMaybe Platform
platform GlobalReg
gr] ]

--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
--      They are really trees of insns to facilitate fast appending, where a
--      left-to-right traversal yields the insns in the correct order.
--
type InstrBlock
        = OrdList Instr


-- | Condition codes passed up the tree.
--
data CondCode
        = CondCode Bool Cond InstrBlock


-- | Register's passed up the tree.  If the stix code forces the register
--      to live in a pre-decided machine register, it comes out as @Fixed@;
--      otherwise, it comes out as @Any@, and the parent can decide which
--      register to put it in.
--
data Register
        = Fixed Format Reg InstrBlock
        | Any   Format (Reg -> InstrBlock)


swizzleRegisterRep :: Register -> Format -> Register
swizzleRegisterRep :: Register -> Format -> Register
swizzleRegisterRep (Fixed Format
_ Reg
reg InstrBlock
code) Format
format = Format -> Reg -> InstrBlock -> Register
Fixed Format
format Reg
reg InstrBlock
code
swizzleRegisterRep (Any Format
_ Reg -> InstrBlock
codefn)     Format
format = Format -> (Reg -> InstrBlock) -> Register
Any   Format
format Reg -> InstrBlock
codefn

getLocalRegReg :: LocalReg -> Reg
getLocalRegReg :: LocalReg -> Reg
getLocalRegReg (LocalReg Unique
u CmmType
ty)
  = -- by assuming SSE2, Int, Word, Float, Double and vectors all can be register allocated
    VirtualReg -> Reg
RegVirtual (Unique -> Format -> VirtualReg
mkVirtualReg Unique
u (CmmType -> Format
cmmTypeFormat CmmType
ty))

-- | Grab the Reg for a CmmReg
getRegisterReg :: Platform  -> CmmReg -> Reg

getRegisterReg :: Platform -> CmmReg -> Reg
getRegisterReg Platform
_   (CmmLocal LocalReg
lreg) = LocalReg -> Reg
getLocalRegReg LocalReg
lreg

getRegisterReg Platform
platform  (CmmGlobal GlobalRegUse
mid)
  = case Platform -> GlobalReg -> Maybe RealReg
globalRegMaybe Platform
platform (GlobalReg -> Maybe RealReg) -> GlobalReg -> Maybe RealReg
forall a b. (a -> b) -> a -> b
$ GlobalRegUse -> GlobalReg
globalRegUse_reg GlobalRegUse
mid of
        Just RealReg
reg -> RealReg -> Reg
RegReal (RealReg -> Reg) -> RealReg -> Reg
forall a b. (a -> b) -> a -> b
$ RealReg
reg
        Maybe RealReg
Nothing  -> String -> SDoc -> Reg
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"getRegisterReg-memory" (CmmReg -> SDoc
forall a. Outputable a => a -> SDoc
ppr (CmmReg -> SDoc) -> CmmReg -> SDoc
forall a b. (a -> b) -> a -> b
$ GlobalRegUse -> CmmReg
CmmGlobal GlobalRegUse
mid)
        -- By this stage, the only MagicIds remaining should be the
        -- ones which map to a real machine register on this
        -- platform.  Hence ...

-- | Memory addressing modes passed up the tree.
data Amode
        = Amode AddrMode InstrBlock

{-
Now, given a tree (the argument to a CmmLoad) that references memory,
produce a suitable addressing mode.

A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to.  So you can't put
anything in between, lest it overwrite some of those registers.  If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:

    code
    LEA amode, tmp
    ... other computation ...
    ... (tmp) ...
-}

{-
Note [%rip-relative addressing on x86-64]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
On x86-64 GHC produces code for use in the "small" or, when `-fPIC` is set,
"small PIC" code models defined by the x86-64 System V ABI (section 3.5.1 of
specification version 0.99).

In general the small code model would allow us to assume that code is located
between 0 and 2^31 - 1. However, this is not true on Windows which, due to
high-entropy ASLR, may place the executable image anywhere in 64-bit address
space. This is problematic since immediate operands in x86-64 are generally
32-bit sign-extended values (with the exception of the 64-bit MOVABS encoding).
Consequently, to avoid overflowing we use %rip-relative addressing universally.
Since %rip-relative addressing comes essentially for free and makes linking far
easier, we use it even on non-Windows platforms.

See also: the documentation for GCC's `-mcmodel=small` flag.
-}


-- | Check whether an integer will fit in 32 bits.
--      A CmmInt is intended to be truncated to the appropriate
--      number of bits, so here we truncate it to Int64.  This is
--      important because e.g. -1 as a CmmInt might be either
--      -1 or 18446744073709551615.
--
is32BitInteger :: Integer -> Bool
is32BitInteger :: Integer -> Bool
is32BitInteger Integer
i = Int64
i64 Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
<= Int64
0x7fffffff Bool -> Bool -> Bool
&& Int64
i64 Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
>= -Int64
0x80000000
  where i64 :: Int64
i64 = Integer -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
i :: Int64


-- | Convert a BlockId to some CmmStatic data
jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic
jumpTableEntry :: NCGConfig -> Maybe Label -> CmmStatic
jumpTableEntry NCGConfig
config Maybe Label
Nothing = CmmLit -> CmmStatic
CmmStaticLit (Integer -> Width -> CmmLit
CmmInt Integer
0 (NCGConfig -> Width
ncgWordWidth NCGConfig
config))
jumpTableEntry NCGConfig
_ (Just Label
blockid) = CmmLit -> CmmStatic
CmmStaticLit (CLabel -> CmmLit
CmmLabel CLabel
blockLabel)
    where blockLabel :: CLabel
blockLabel = Label -> CLabel
blockLbl Label
blockid


-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
mangleIndexTree :: CmmReg -> Int -> CmmExpr
mangleIndexTree :: CmmReg -> Int -> CmmExpr
mangleIndexTree CmmReg
reg Int
off
  = MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp (Width -> MachOp
MO_Add Width
width) [CmmReg -> CmmExpr
CmmReg CmmReg
reg, CmmLit -> CmmExpr
CmmLit (Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
off) Width
width)]
  where width :: Width
width = CmmType -> Width
typeWidth (CmmReg -> CmmType
cmmRegType CmmReg
reg)

-- | The dual to getAnyReg: compute an expression into a register, but
--      we don't mind which one it is.
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
expr = do
  r <- HasDebugCallStack => CmmExpr -> NatM Register
CmmExpr -> NatM Register
getRegister CmmExpr
expr
  case r of
    Any Format
rep Reg -> InstrBlock
code -> do
        tmp <- Format -> NatM Reg
getNewRegNat Format
rep
        return (tmp, code tmp)
    Fixed Format
_ Reg
reg InstrBlock
code ->
        (Reg, InstrBlock) -> NatM (Reg, InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Reg
reg, InstrBlock
code)

assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code CmmExpr
addrTree CmmExpr
valueTree = do
  Amode addr addr_code <- CmmExpr -> NatM Amode
getAmode CmmExpr
addrTree
  RegCode64 vcode rhi rlo <- iselExpr64 valueTree
  let
        -- Little-endian store
        mov_lo = Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
rlo) (AddrMode -> Operand
OpAddr AddrMode
addr)
        mov_hi = Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
rhi) (AddrMode -> Operand
OpAddr (Maybe AddrMode -> AddrMode
forall a. HasCallStack => Maybe a -> a
fromJust (AddrMode -> Int -> Maybe AddrMode
addrOffset AddrMode
addr Int
4)))
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_I64Code (CmmLocal LocalReg
dst) CmmExpr
valueTree = do
   RegCode64 vcode r_src_hi r_src_lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
valueTree
   let
         Reg64 r_dst_hi r_dst_lo = localReg64 dst
         mov_lo = Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r_src_lo) (Reg -> Operand
OpReg Reg
r_dst_lo)
         mov_hi = Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r_src_hi) (Reg -> Operand
OpReg Reg
r_dst_hi)
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

assignReg_I64Code CmmReg
_ CmmExpr
_
   = String -> NatM InstrBlock
forall a. HasCallStack => String -> a
panic String
"assignReg_I64Code(i386): invalid lvalue"

iselExpr64 :: HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 :: HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 (CmmLit (CmmInt Integer
i Width
_)) = do
  Reg64 rhi rlo <- NatM Reg64
getNewReg64
  let
        r = Word32 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
i :: Word32)
        q = Word32 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer
i Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftR` Int
32) :: Word32)
        code = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
r)) (Reg -> Operand
OpReg Reg
rlo),
                Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
q)) (Reg -> Operand
OpReg Reg
rhi)
                ]
  return (RegCode64 code rhi rlo)

iselExpr64 (CmmLoad CmmExpr
addrTree CmmType
ty AlignmentSpec
_) | CmmType -> Bool
isWord64 CmmType
ty = do
   Amode addr addr_code <- CmmExpr -> NatM Amode
getAmode CmmExpr
addrTree
   Reg64 rhi rlo <- getNewReg64
   let
        mov_lo = Format -> Operand -> Operand -> Instr
MOV Format
II32 (AddrMode -> Operand
OpAddr AddrMode
addr) (Reg -> Operand
OpReg Reg
rlo)
        mov_hi = Format -> Operand -> Operand -> Instr
MOV Format
II32 (AddrMode -> Operand
OpAddr (Maybe AddrMode -> AddrMode
forall a. HasCallStack => Maybe a -> a
fromJust (AddrMode -> Int -> Maybe AddrMode
addrOffset AddrMode
addr Int
4))) (Reg -> Operand
OpReg Reg
rhi)
   return (
            RegCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) rhi rlo
     )

iselExpr64 (CmmReg (CmmLocal LocalReg
local_reg)) = do
  let Reg64 Reg
hi Reg
lo = HasDebugCallStack => LocalReg -> Reg64
LocalReg -> Reg64
localReg64 LocalReg
local_reg
  RegCode64 InstrBlock -> NatM (RegCode64 InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock -> Reg -> Reg -> RegCode64 InstrBlock
forall code. code -> Reg -> Reg -> RegCode64 code
RegCode64 InstrBlock
forall a. OrdList a
nilOL Reg
hi Reg
lo)

iselExpr64 (CmmMachOp (MO_Add Width
_) [CmmExpr
e1, CmmLit (CmmInt Integer
i Width
_)]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
e1
   Reg64 rhi rlo <- getNewReg64
   let
        r = Word32 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
i :: Word32)
        q = Word32 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer
i Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftR` Int
32) :: Word32)
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
ADD Format
II32 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
r)) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
ADC Format
II32 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
q)) (Reg -> Operand
OpReg Reg
rhi) ]
   return (RegCode64 code rhi rlo)

iselExpr64 (CmmMachOp (MO_Add Width
_) [CmmExpr
e1,CmmExpr
e2]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
e1
   RegCode64 code2 r2hi r2lo <- iselExpr64 e2
   Reg64 rhi rlo <- getNewReg64
   let
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                InstrBlock
code2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
ADD Format
II32 (Reg -> Operand
OpReg Reg
r2lo) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
ADC Format
II32 (Reg -> Operand
OpReg Reg
r2hi) (Reg -> Operand
OpReg Reg
rhi) ]
   return (RegCode64 code rhi rlo)

iselExpr64 (CmmMachOp (MO_Sub Width
_) [CmmExpr
e1,CmmExpr
e2]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
e1
   RegCode64 code2 r2hi r2lo <- iselExpr64 e2
   Reg64 rhi rlo <- getNewReg64
   let
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                InstrBlock
code2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
SUB Format
II32 (Reg -> Operand
OpReg Reg
r2lo) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
SBB Format
II32 (Reg -> Operand
OpReg Reg
r2hi) (Reg -> Operand
OpReg Reg
rhi) ]
   return (RegCode64 code rhi rlo)

iselExpr64 (CmmMachOp (MO_UU_Conv Width
W32 Width
W64) [CmmExpr
expr]) = do
     code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
expr
     Reg64 r_dst_hi r_dst_lo <- getNewReg64
     return $ RegCode64 (code r_dst_lo `snocOL`
                          XOR II32 (OpReg r_dst_hi) (OpReg r_dst_hi))
                          r_dst_hi
                          r_dst_lo

iselExpr64 (CmmMachOp (MO_UU_Conv Width
W16 Width
W64) [CmmExpr
expr]) = do
     (rsrc, code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getByteReg CmmExpr
expr
     Reg64 r_dst_hi r_dst_lo <- getNewReg64
     return $ RegCode64 (code `appOL` toOL [
                          MOVZxL II16 (OpReg rsrc) (OpReg r_dst_lo),
                          XOR    II32 (OpReg r_dst_hi) (OpReg r_dst_hi)
                          ])
                          r_dst_hi
                          r_dst_lo

iselExpr64 (CmmMachOp (MO_UU_Conv Width
W8 Width
W64) [CmmExpr
expr]) = do
     (rsrc, code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getByteReg CmmExpr
expr
     Reg64 r_dst_hi r_dst_lo <- getNewReg64
     return $ RegCode64 (code `appOL` toOL [
                          MOVZxL II8 (OpReg rsrc) (OpReg r_dst_lo),
                          XOR    II32 (OpReg r_dst_hi) (OpReg r_dst_hi)
                          ])
                          r_dst_hi
                          r_dst_lo

iselExpr64 (CmmMachOp (MO_SS_Conv Width
W32 Width
W64) [CmmExpr
expr]) = do
     code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
expr
     Reg64 r_dst_hi r_dst_lo <- getNewReg64
     return $ RegCode64 (code r_dst_lo `snocOL`
                          MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL`
                          CLTD II32 `snocOL`
                          MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL`
                          MOV II32 (OpReg edx) (OpReg r_dst_hi))
                          r_dst_hi
                          r_dst_lo

iselExpr64 (CmmMachOp (MO_SS_Conv Width
W16 Width
W64) [CmmExpr
expr]) = do
     (r, code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getByteReg CmmExpr
expr
     Reg64 r_dst_hi r_dst_lo <- getNewReg64
     return $ RegCode64 (code `appOL` toOL [
                          MOVSxL II16 (OpReg r) (OpReg eax),
                          CLTD II32,
                          MOV II32 (OpReg eax) (OpReg r_dst_lo),
                          MOV II32 (OpReg edx) (OpReg r_dst_hi)])
                          r_dst_hi
                          r_dst_lo

iselExpr64 (CmmMachOp (MO_SS_Conv Width
W8 Width
W64) [CmmExpr
expr]) = do
     (r, code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getByteReg CmmExpr
expr
     Reg64 r_dst_hi r_dst_lo <- getNewReg64
     return $ RegCode64 (code `appOL` toOL [
                          MOVSxL II8 (OpReg r) (OpReg eax),
                          CLTD II32,
                          MOV II32 (OpReg eax) (OpReg r_dst_lo),
                          MOV II32 (OpReg edx) (OpReg r_dst_hi)])
                          r_dst_hi
                          r_dst_lo

iselExpr64 (CmmMachOp (MO_S_Neg Width
_) [CmmExpr
expr]) = do
   RegCode64 code rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
expr
   Reg64 rohi rolo <- getNewReg64
   let
        ocode = InstrBlock
code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
rlo) (Reg -> Operand
OpReg Reg
rolo),
                       Format -> Operand -> Operand -> Instr
XOR Format
II32 (Reg -> Operand
OpReg Reg
rohi) (Reg -> Operand
OpReg Reg
rohi),
                       Format -> Operand -> Instr
NEGI Format
II32 (Reg -> Operand
OpReg Reg
rolo),
                       Format -> Operand -> Operand -> Instr
SBB Format
II32 (Reg -> Operand
OpReg Reg
rhi) (Reg -> Operand
OpReg Reg
rohi) ]
   return (RegCode64 ocode rohi rolo)

-- To multiply two 64-bit numbers we use the following decomposition (in C notation):
--
--     ((r1hi << 32) + r1lo) * ((r2hi << 32) + r2lo)
--      = ((r2lo * r1hi) << 32)
--      + ((r1lo * r2hi) << 32)
--      + r1lo * r2lo
--
-- Note that @(r1hi * r2hi) << 64@ can be dropped because it overflows completely.

iselExpr64 (CmmMachOp (MO_Mul Width
_) [CmmExpr
e1,CmmExpr
e2]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
e1
   RegCode64 code2 r2hi r2lo <- iselExpr64 e2
   Reg64 rhi rlo <- getNewReg64
   tmp <- getNewRegNat II32
   let
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                InstrBlock
code2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV  Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
eax),
                       Format -> Operand -> Operand -> Instr
MOV  Format
II32 (Reg -> Operand
OpReg Reg
r2lo) (Reg -> Operand
OpReg Reg
tmp),
                       Format -> Operand -> Operand -> Instr
MOV  Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
IMUL Format
II32 (Reg -> Operand
OpReg Reg
tmp) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
MOV  Format
II32 (Reg -> Operand
OpReg Reg
r2hi) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
IMUL Format
II32 (Reg -> Operand
OpReg Reg
eax) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
ADD  Format
II32 (Reg -> Operand
OpReg Reg
rlo) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Instr
MUL2 Format
II32 (Reg -> Operand
OpReg Reg
tmp),
                       Format -> Operand -> Operand -> Instr
ADD  Format
II32 (Reg -> Operand
OpReg Reg
edx) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
MOV  Format
II32 (Reg -> Operand
OpReg Reg
eax) (Reg -> Operand
OpReg Reg
rlo)
                     ]
   return (RegCode64 code rhi rlo)

iselExpr64 (CmmMachOp (MO_S_MulMayOflo Width
W64) [CmmExpr]
_) = do
   -- Performance sensitive users won't use 32 bit so let's keep it simple:
   -- We always return a (usually false) positive.
   Reg64 rhi rlo <- NatM Reg64
getNewReg64
   let code = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL   [
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
1)) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
1)) (Reg -> Operand
OpReg Reg
rlo)
                     ]
   return (RegCode64 code rhi rlo)


-- To shift a 64-bit number to the left we use the SHLD and SHL instructions.
-- We use SHLD to shift the bits in @rhi@ to the left while copying
-- high bits from @rlo@ to fill the new space in the low bits of @rhi@.
-- That leaves @rlo@ unchanged, so we use SHL to shift the bits of @rlo@ left.
-- However, both these instructions only use the lowest 5 bits from %cl to do
-- their shifting. So if the sixth bit (0x32) is set then we additionally move
-- the contents of @rlo@ to @rhi@ and clear @rlo@.

iselExpr64 (CmmMachOp (MO_Shl Width
_) [CmmExpr
e1,CmmExpr
e2]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
e1
   code2 <- getAnyReg e2
   Reg64 rhi rlo <- getNewReg64
   lbl1 <- newBlockId
   lbl2 <- newBlockId
   let
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                Reg -> InstrBlock
code2 Reg
ecx InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Operand -> Instr
SHLD Format
II32 (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
rlo) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
SHL Format
II32 (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
TEST Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
32)) (Reg -> Operand
OpReg Reg
ecx),
                       Cond -> Label -> Instr
JXX Cond
EQQ Label
lbl2,
                       Cond -> Label -> Instr
JXX Cond
ALWAYS Label
lbl1,
                       Label -> Instr
NEWBLOCK Label
lbl1,
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
rlo) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
XOR Format
II32 (Reg -> Operand
OpReg Reg
rlo) (Reg -> Operand
OpReg Reg
rlo),
                       Cond -> Label -> Instr
JXX Cond
ALWAYS Label
lbl2,
                       Label -> Instr
NEWBLOCK Label
lbl2
                     ]
   return (RegCode64 code rhi rlo)

-- Similar to above, however now we're shifting to the right
-- and we're doing a signed shift which means that @rhi@ needs
-- to be set to either 0 if @rhi@ is positive or 0xffffffff otherwise,
-- and if the sixth bit of %cl is set (so the shift amount is more than 32).
-- To accomplish that we shift @rhi@ by 31.

iselExpr64 (CmmMachOp (MO_S_Shr Width
_) [CmmExpr
e1,CmmExpr
e2]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
e1
   (r2, code2) <- getSomeReg e2
   Reg64 rhi rlo <- getNewReg64
   lbl1 <- newBlockId
   lbl2 <- newBlockId
   let
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                InstrBlock
code2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r2) (Reg -> Operand
OpReg Reg
ecx),
                       Format -> Operand -> Operand -> Operand -> Instr
SHRD Format
II32 (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
rhi) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
SAR Format
II32 (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
TEST Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
32)) (Reg -> Operand
OpReg Reg
ecx),
                       Cond -> Label -> Instr
JXX Cond
EQQ Label
lbl2,
                       Cond -> Label -> Instr
JXX Cond
ALWAYS Label
lbl1,
                       Label -> Instr
NEWBLOCK Label
lbl1,
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
rhi) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
SAR Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
31)) (Reg -> Operand
OpReg Reg
rhi),
                       Cond -> Label -> Instr
JXX Cond
ALWAYS Label
lbl2,
                       Label -> Instr
NEWBLOCK Label
lbl2
                     ]
   return (RegCode64 code rhi rlo)

-- Similar to the above.

iselExpr64 (CmmMachOp (MO_U_Shr Width
_) [CmmExpr
e1,CmmExpr
e2]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
e1
   (r2, code2) <- getSomeReg e2
   Reg64 rhi rlo <- getNewReg64
   lbl1 <- newBlockId
   lbl2 <- newBlockId
   let
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                InstrBlock
code2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r2) (Reg -> Operand
OpReg Reg
ecx),
                       Format -> Operand -> Operand -> Operand -> Instr
SHRD Format
II32 (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
rhi) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
SHR Format
II32 (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
TEST Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
32)) (Reg -> Operand
OpReg Reg
ecx),
                       Cond -> Label -> Instr
JXX Cond
EQQ Label
lbl2,
                       Cond -> Label -> Instr
JXX Cond
ALWAYS Label
lbl1,
                       Label -> Instr
NEWBLOCK Label
lbl1,
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
rhi) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
XOR Format
II32 (Reg -> Operand
OpReg Reg
rhi) (Reg -> Operand
OpReg Reg
rhi),
                       Cond -> Label -> Instr
JXX Cond
ALWAYS Label
lbl2,
                       Label -> Instr
NEWBLOCK Label
lbl2
                     ]
   return (RegCode64 code rhi rlo)

iselExpr64 (CmmMachOp (MO_And Width
_) [CmmExpr
e1,CmmExpr
e2]) = (Format -> Operand -> Operand -> Instr)
-> CmmExpr -> CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64ParallelBin Format -> Operand -> Operand -> Instr
AND CmmExpr
e1 CmmExpr
e2
iselExpr64 (CmmMachOp (MO_Or  Width
_) [CmmExpr
e1,CmmExpr
e2]) = (Format -> Operand -> Operand -> Instr)
-> CmmExpr -> CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64ParallelBin Format -> Operand -> Operand -> Instr
OR  CmmExpr
e1 CmmExpr
e2
iselExpr64 (CmmMachOp (MO_Xor Width
_) [CmmExpr
e1,CmmExpr
e2]) = (Format -> Operand -> Operand -> Instr)
-> CmmExpr -> CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64ParallelBin Format -> Operand -> Operand -> Instr
XOR CmmExpr
e1 CmmExpr
e2

iselExpr64 (CmmMachOp (MO_Not Width
_) [CmmExpr
e1]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
e1
   Reg64 rhi rlo <- getNewReg64
   let
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Instr
NOT Format
II32 (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Instr
NOT Format
II32 (Reg -> Operand
OpReg Reg
rhi)
                     ]
   return (RegCode64 code rhi rlo)

iselExpr64 (CmmRegOff CmmReg
r Int
i) = HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 (CmmReg -> Int -> CmmExpr
mangleIndexTree CmmReg
r Int
i)

iselExpr64 CmmExpr
expr
   = do
      platform <- NatM Platform
getPlatform
      pprPanic "iselExpr64(i386)" (pdoc platform expr $+$ text (show expr))

iselExpr64ParallelBin :: (Format -> Operand -> Operand -> Instr)
                      -> CmmExpr -> CmmExpr -> NatM (RegCode64 (OrdList Instr))
iselExpr64ParallelBin :: (Format -> Operand -> Operand -> Instr)
-> CmmExpr -> CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64ParallelBin Format -> Operand -> Operand -> Instr
op CmmExpr
e1 CmmExpr
e2 = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
e1
   RegCode64 code2 r2hi r2lo <- iselExpr64 e2
   Reg64 rhi rlo <- getNewReg64
   let
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                InstrBlock
code2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
rhi),
                       Format -> Operand -> Operand -> Instr
op  Format
II32 (Reg -> Operand
OpReg Reg
r2lo) (Reg -> Operand
OpReg Reg
rlo),
                       Format -> Operand -> Operand -> Instr
op  Format
II32 (Reg -> Operand
OpReg Reg
r2hi) (Reg -> Operand
OpReg Reg
rhi)
                     ]
   return (RegCode64 code rhi rlo)

--------------------------------------------------------------------------------

-- This is a helper data type which helps reduce the code duplication for
-- the code generation of arithmetic operations. This is not specifically
-- targetted for any particular type like Int8, Int32 etc
data VectorArithInstns = VA_Add | VA_Sub | VA_Mul | VA_Div | VA_Min | VA_Max

getRegister :: HasDebugCallStack => CmmExpr -> NatM Register
getRegister :: HasDebugCallStack => CmmExpr -> NatM Register
getRegister CmmExpr
e = do platform <- NatM Platform
getPlatform
                   is32Bit <- is32BitPlatform
                   getRegister' platform is32Bit e

getRegister' :: HasDebugCallStack => Platform -> Bool -> CmmExpr -> NatM Register

getRegister' :: HasDebugCallStack => Platform -> Bool -> CmmExpr -> NatM Register
getRegister' Platform
platform Bool
is32Bit (CmmReg CmmReg
reg)
  = case CmmReg
reg of
        CmmGlobal (GlobalRegUse GlobalReg
PicBaseReg CmmType
_)
         | Bool
is32Bit ->
            -- on x86_64, we have %rip for PicBaseReg, but it's not
            -- a full-featured register, it can only be used for
            -- rip-relative addressing.
            do reg' <- Format -> NatM Reg
getPicBaseNat (Bool -> Format
archWordFormat Bool
is32Bit)
               return (Fixed (archWordFormat is32Bit) reg' nilOL)
        CmmReg
_ ->
          let ty :: CmmType
ty = CmmReg -> CmmType
cmmRegType CmmReg
reg
              reg_fmt :: Format
reg_fmt = CmmType -> Format
cmmTypeFormat CmmType
ty
          in Register -> NatM Register
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Register -> NatM Register) -> Register -> NatM Register
forall a b. (a -> b) -> a -> b
$ Format -> Reg -> InstrBlock -> Register
Fixed Format
reg_fmt (Platform -> CmmReg -> Reg
getRegisterReg Platform
platform CmmReg
reg) InstrBlock
forall a. OrdList a
nilOL

getRegister' Platform
platform Bool
is32Bit (CmmRegOff CmmReg
r Int
n)
  = HasDebugCallStack => Platform -> Bool -> CmmExpr -> NatM Register
Platform -> Bool -> CmmExpr -> NatM Register
getRegister' Platform
platform Bool
is32Bit (CmmExpr -> NatM Register) -> CmmExpr -> NatM Register
forall a b. (a -> b) -> a -> b
$ CmmReg -> Int -> CmmExpr
mangleIndexTree CmmReg
r Int
n

getRegister' Platform
platform Bool
is32Bit (CmmMachOp (MO_RelaxedRead Width
w) [CmmExpr
e])
  = HasDebugCallStack => Platform -> Bool -> CmmExpr -> NatM Register
Platform -> Bool -> CmmExpr -> NatM Register
getRegister' Platform
platform Bool
is32Bit (CmmExpr -> CmmType -> AlignmentSpec -> CmmExpr
CmmLoad CmmExpr
e (Width -> CmmType
cmmBits Width
w) AlignmentSpec
NaturallyAligned)

getRegister' Platform
platform Bool
is32Bit (CmmMachOp (MO_AlignmentCheck Int
align Width
_) [CmmExpr
e])
  = Int -> Register -> Register
addAlignmentCheck Int
align (Register -> Register) -> NatM Register -> NatM Register
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HasDebugCallStack => Platform -> Bool -> CmmExpr -> NatM Register
Platform -> Bool -> CmmExpr -> NatM Register
getRegister' Platform
platform Bool
is32Bit CmmExpr
e

-- for 32-bit architectures, support some 64 -> 32 bit conversions:
-- TO_W_(x), TO_W_(x >> 32)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W64 Width
W32)
                     [CmmMachOp (MO_U_Shr Width
W64) [CmmExpr
x,CmmLit (CmmInt Integer
32 Width
_)]])
 | Bool
is32Bit = do
  RegCode64 code rhi _rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
x
  return $ Fixed II32 rhi code

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_SS_Conv Width
W64 Width
W32)
                     [CmmMachOp (MO_U_Shr Width
W64) [CmmExpr
x,CmmLit (CmmInt Integer
32 Width
_)]])
 | Bool
is32Bit = do
  RegCode64 code rhi _rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
x
  return $ Fixed II32 rhi code

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W64 Width
W32) [CmmExpr
x])
 | Bool
is32Bit = do
  RegCode64 code _rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
x
  return $ Fixed II32 rlo code

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_SS_Conv Width
W64 Width
W32) [CmmExpr
x])
 | Bool
is32Bit = do
  RegCode64 code _rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
x
  return $ Fixed II32 rlo code

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W64 Width
W8) [CmmExpr
x])
 | Bool
is32Bit = do
  RegCode64 code _rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
x
  ro <- getNewRegNat II8
  return $ Fixed II8 ro (code `appOL` toOL [ MOVZxL II8 (OpReg rlo) (OpReg ro) ])

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W64 Width
W16) [CmmExpr
x])
 | Bool
is32Bit = do
  RegCode64 code _rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
x
  ro <- getNewRegNat II16
  return $ Fixed II16 ro (code `appOL` toOL [ MOVZxL II16 (OpReg rlo) (OpReg ro) ])

-- catch simple cases of zero- or sign-extended load
getRegister' Platform
_ Bool
_ (CmmMachOp (MO_UU_Conv Width
W8 Width
W32) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_]) = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVZxL Format
II8) CmmExpr
addr
  return (Any II32 code)

getRegister' Platform
_ Bool
_ (CmmMachOp (MO_SS_Conv Width
W8 Width
W32) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_]) = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVSxL Format
II8) CmmExpr
addr
  return (Any II32 code)

getRegister' Platform
_ Bool
_ (CmmMachOp (MO_UU_Conv Width
W16 Width
W32) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_]) = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVZxL Format
II16) CmmExpr
addr
  return (Any II32 code)

getRegister' Platform
_ Bool
_ (CmmMachOp (MO_SS_Conv Width
W16 Width
W32) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_]) = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVSxL Format
II16) CmmExpr
addr
  return (Any II32 code)

-- catch simple cases of zero- or sign-extended load
getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W8 Width
W64) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_])
 | Bool -> Bool
not Bool
is32Bit = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVZxL Format
II8) CmmExpr
addr
  return (Any II64 code)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_SS_Conv Width
W8 Width
W64) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_])
 | Bool -> Bool
not Bool
is32Bit = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVSxL Format
II8) CmmExpr
addr
  return (Any II64 code)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W16 Width
W64) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_])
 | Bool -> Bool
not Bool
is32Bit = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVZxL Format
II16) CmmExpr
addr
  return (Any II64 code)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_SS_Conv Width
W16 Width
W64) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_])
 | Bool -> Bool
not Bool
is32Bit = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVSxL Format
II16) CmmExpr
addr
  return (Any II64 code)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W32 Width
W64) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_])
 | Bool -> Bool
not Bool
is32Bit = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOV Format
II32) CmmExpr
addr -- 32-bit loads zero-extend
  return (Any II64 code)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_SS_Conv Width
W32 Width
W64) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_])
 | Bool -> Bool
not Bool
is32Bit = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVSxL Format
II32) CmmExpr
addr
  return (Any II64 code)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_Add Width
W64) [CmmReg (CmmGlobal (GlobalRegUse GlobalReg
PicBaseReg CmmType
_)),
                                     CmmLit CmmLit
displacement])
 | Bool -> Bool
not Bool
is32Bit =
      Register -> NatM Register
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Register -> NatM Register) -> Register -> NatM Register
forall a b. (a -> b) -> a -> b
$ Format -> (Reg -> InstrBlock) -> Register
Any Format
II64 (\Reg
dst -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$
        Format -> Operand -> Operand -> Instr
LEA Format
II64 (AddrMode -> Operand
OpAddr (Imm -> AddrMode
ripRel (CmmLit -> Imm
litToImm CmmLit
displacement))) (Reg -> Operand
OpReg Reg
dst))

getRegister' Platform
_ Bool
_ (CmmMachOp MachOp
mop []) =
  String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"getRegister(x86): nullary MachOp" (String -> SDoc
forall doc. IsLine doc => String -> doc
text (String -> SDoc) -> String -> SDoc
forall a b. (a -> b) -> a -> b
$ MachOp -> String
forall a. Show a => a -> String
show MachOp
mop)

getRegister' Platform
platform Bool
is32Bit (CmmMachOp MachOp
mop [CmmExpr
x]) = do -- unary MachOps
    avx    <- NatM Bool
avxEnabled
    case mop of
      MO_F_Neg Width
w  -> Width -> CmmExpr -> NatM Register
sse2NegCode Width
w CmmExpr
x


      MO_S_Neg Width
w -> (Format -> Operand -> Instr) -> Format -> NatM Register
triv_ucode Format -> Operand -> Instr
NEGI (Width -> Format
intFormat Width
w)
      MO_Not Width
w   -> (Format -> Operand -> Instr) -> Format -> NatM Register
triv_ucode Format -> Operand -> Instr
NOT  (Width -> Format
intFormat Width
w)

      -- Nop conversions
      MO_UU_Conv Width
W32 Width
W8  -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W32 CmmExpr
x
      MO_SS_Conv Width
W32 Width
W8  -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W32 CmmExpr
x
      MO_XX_Conv Width
W32 Width
W8  -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W32 CmmExpr
x
      MO_UU_Conv Width
W16 Width
W8  -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W16 CmmExpr
x
      MO_SS_Conv Width
W16 Width
W8  -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W16 CmmExpr
x
      MO_XX_Conv Width
W16 Width
W8  -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W16 CmmExpr
x
      MO_UU_Conv Width
W32 Width
W16 -> Width -> CmmExpr -> NatM Register
toI16Reg Width
W32 CmmExpr
x
      MO_SS_Conv Width
W32 Width
W16 -> Width -> CmmExpr -> NatM Register
toI16Reg Width
W32 CmmExpr
x
      MO_XX_Conv Width
W32 Width
W16 -> Width -> CmmExpr -> NatM Register
toI16Reg Width
W32 CmmExpr
x

      MO_UU_Conv Width
W64 Width
W32 | Bool -> Bool
not Bool
is32Bit -> Format -> CmmExpr -> NatM Register
conversionNop Format
II64 CmmExpr
x
      MO_SS_Conv Width
W64 Width
W32 | Bool -> Bool
not Bool
is32Bit -> Format -> CmmExpr -> NatM Register
conversionNop Format
II64 CmmExpr
x
      MO_XX_Conv Width
W64 Width
W32 | Bool -> Bool
not Bool
is32Bit -> Format -> CmmExpr -> NatM Register
conversionNop Format
II64 CmmExpr
x
      MO_UU_Conv Width
W64 Width
W16 | Bool -> Bool
not Bool
is32Bit -> Width -> CmmExpr -> NatM Register
toI16Reg Width
W64 CmmExpr
x
      MO_SS_Conv Width
W64 Width
W16 | Bool -> Bool
not Bool
is32Bit -> Width -> CmmExpr -> NatM Register
toI16Reg Width
W64 CmmExpr
x
      MO_XX_Conv Width
W64 Width
W16 | Bool -> Bool
not Bool
is32Bit -> Width -> CmmExpr -> NatM Register
toI16Reg Width
W64 CmmExpr
x
      MO_UU_Conv Width
W64 Width
W8  | Bool -> Bool
not Bool
is32Bit -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W64 CmmExpr
x
      MO_SS_Conv Width
W64 Width
W8  | Bool -> Bool
not Bool
is32Bit -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W64 CmmExpr
x
      MO_XX_Conv Width
W64 Width
W8  | Bool -> Bool
not Bool
is32Bit -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W64 CmmExpr
x

      MO_UU_Conv Width
rep1 Width
rep2 | Width
rep1 Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
rep2 -> Format -> CmmExpr -> NatM Register
conversionNop (Width -> Format
intFormat Width
rep1) CmmExpr
x
      MO_SS_Conv Width
rep1 Width
rep2 | Width
rep1 Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
rep2 -> Format -> CmmExpr -> NatM Register
conversionNop (Width -> Format
intFormat Width
rep1) CmmExpr
x
      MO_XX_Conv Width
rep1 Width
rep2 | Width
rep1 Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
rep2 -> Format -> CmmExpr -> NatM Register
conversionNop (Width -> Format
intFormat Width
rep1) CmmExpr
x

      MO_FW_Bitcast Width
W32 -> Format -> Format -> CmmExpr -> NatM Register
bitcast Format
FF32 Format
II32 CmmExpr
x
      MO_WF_Bitcast Width
W32 -> Format -> Format -> CmmExpr -> NatM Register
bitcast Format
II32 Format
FF32 CmmExpr
x
      MO_FW_Bitcast Width
W64 -> Format -> Format -> CmmExpr -> NatM Register
bitcast Format
FF64 Format
II64 CmmExpr
x
      MO_WF_Bitcast Width
W64 -> Format -> Format -> CmmExpr -> NatM Register
bitcast Format
II64 Format
FF64 CmmExpr
x
      MO_WF_Bitcast {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_FW_Bitcast {}  -> NatM Register
forall a. NatM a
incorrectOperands

      -- widenings
      MO_UU_Conv Width
W8  Width
W32 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W32 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
x
      MO_UU_Conv Width
W16 Width
W32 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W16 Width
W32 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
x
      MO_UU_Conv Width
W8  Width
W16 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W16 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
x

      MO_SS_Conv Width
W8  Width
W32 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W32 Format -> Operand -> Operand -> Instr
MOVSxL CmmExpr
x
      MO_SS_Conv Width
W16 Width
W32 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W16 Width
W32 Format -> Operand -> Operand -> Instr
MOVSxL CmmExpr
x
      MO_SS_Conv Width
W8  Width
W16 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W16 Format -> Operand -> Operand -> Instr
MOVSxL CmmExpr
x

      -- We don't care about the upper bits for MO_XX_Conv, so MOV is enough. However, on 32-bit we
      -- have 8-bit registers only for a few registers (as opposed to x86-64 where every register
      -- has 8-bit version). So for 32-bit code, we'll just zero-extend.
      MO_XX_Conv Width
W8  Width
W32
          | Bool
is32Bit   -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8 Width
W32 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
x
          | Bool
otherwise -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8 Width
W32 Format -> Operand -> Operand -> Instr
MOV CmmExpr
x
      MO_XX_Conv Width
W8  Width
W16
          | Bool
is32Bit   -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8 Width
W16 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
x
          | Bool
otherwise -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8 Width
W16 Format -> Operand -> Operand -> Instr
MOV CmmExpr
x
      MO_XX_Conv Width
W16 Width
W32 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W16 Width
W32 Format -> Operand -> Operand -> Instr
MOV CmmExpr
x

      MO_UU_Conv Width
W8  Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W64 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
x
      MO_UU_Conv Width
W16 Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W16 Width
W64 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
x
      MO_UU_Conv Width
W32 Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W32 Width
W64 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
x
      MO_SS_Conv Width
W8  Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W64 Format -> Operand -> Operand -> Instr
MOVSxL CmmExpr
x
      MO_SS_Conv Width
W16 Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W16 Width
W64 Format -> Operand -> Operand -> Instr
MOVSxL CmmExpr
x
      MO_SS_Conv Width
W32 Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W32 Width
W64 Format -> Operand -> Operand -> Instr
MOVSxL CmmExpr
x
      -- For 32-to-64 bit zero extension, amd64 uses an ordinary movl.
      -- However, we don't want the register allocator to throw it
      -- away as an unnecessary reg-to-reg move, so we keep it in
      -- the form of a movzl and print it as a movl later.
      -- This doesn't apply to MO_XX_Conv since in this case we don't care about
      -- the upper bits. So we can just use MOV.
      MO_XX_Conv Width
W8  Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W64 Format -> Operand -> Operand -> Instr
MOV CmmExpr
x
      MO_XX_Conv Width
W16 Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W16 Width
W64 Format -> Operand -> Operand -> Instr
MOV CmmExpr
x
      MO_XX_Conv Width
W32 Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W32 Width
W64 Format -> Operand -> Operand -> Instr
MOV CmmExpr
x

      MO_FF_Conv Width
W32 Width
W64 -> Width -> CmmExpr -> NatM Register
coerceFP2FP Width
W64 CmmExpr
x
      MO_FF_Conv Width
W64 Width
W32 -> Width -> CmmExpr -> NatM Register
coerceFP2FP Width
W32 CmmExpr
x

      MO_FF_Conv Width
from Width
to -> Width -> Width -> NatM Register
forall a. Width -> Width -> NatM a
invalidConversion Width
from Width
to
      MO_UU_Conv Width
from Width
to -> Width -> Width -> NatM Register
forall a. Width -> Width -> NatM a
invalidConversion Width
from Width
to
      MO_SS_Conv Width
from Width
to -> Width -> Width -> NatM Register
forall a. Width -> Width -> NatM a
invalidConversion Width
from Width
to
      MO_XX_Conv Width
from Width
to -> Width -> Width -> NatM Register
forall a. Width -> Width -> NatM a
invalidConversion Width
from Width
to

      MO_FS_Truncate Width
from Width
to -> Width -> Width -> CmmExpr -> NatM Register
coerceFP2Int Width
from Width
to CmmExpr
x
      MO_SF_Round    Width
from Width
to -> Width -> Width -> CmmExpr -> NatM Register
coerceInt2FP Width
from Width
to CmmExpr
x

      MO_VF_Neg Int
l Width
w  | Bool
avx       -> Int -> Width -> CmmExpr -> NatM Register
vector_float_negate_avx Int
l Width
w CmmExpr
x
                     | Bool
otherwise -> Int -> Width -> CmmExpr -> NatM Register
vector_float_negate_sse Int
l Width
w CmmExpr
x
      -- SIMD NCG TODO: add integer negation
      MO_VS_Neg {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop

      MO_VF_Broadcast Int
l Width
w
        | Bool
avx
        -> Int -> Width -> CmmExpr -> NatM Register
vector_float_broadcast_avx Int
l Width
w CmmExpr
x
        | Bool
otherwise
        -> Int -> Width -> CmmExpr -> NatM Register
vector_float_broadcast_sse Int
l Width
w CmmExpr
x
      MO_V_Broadcast Int
l Width
w
        -> Int -> Width -> CmmExpr -> NatM Register
vector_int_broadcast Int
l Width
w CmmExpr
x

      -- Binary MachOps
      MO_Add {}    -> NatM Register
forall a. NatM a
incorrectOperands
      MO_Sub {}    -> NatM Register
forall a. NatM a
incorrectOperands
      MO_Eq {}     -> NatM Register
forall a. NatM a
incorrectOperands
      MO_Ne {}     -> NatM Register
forall a. NatM a
incorrectOperands
      MO_Mul {}    -> NatM Register
forall a. NatM a
incorrectOperands
      MO_S_MulMayOflo {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_S_Quot {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_S_Rem {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_U_Quot {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_U_Rem {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_S_Ge {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_S_Le {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_S_Gt {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_S_Lt {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_U_Ge {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_U_Le {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_U_Gt {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_U_Lt {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_F_Add {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_F_Sub {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_F_Mul {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_F_Quot {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_F_Eq {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_F_Ne {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_F_Ge {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_F_Le {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_F_Gt {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_F_Lt {}   -> NatM Register
forall a. NatM a
incorrectOperands
      MO_F_Min {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_F_Max {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_And {}    -> NatM Register
forall a. NatM a
incorrectOperands
      MO_Or {}     -> NatM Register
forall a. NatM a
incorrectOperands
      MO_Xor {}    -> NatM Register
forall a. NatM a
incorrectOperands
      MO_Shl {}    -> NatM Register
forall a. NatM a
incorrectOperands
      MO_U_Shr {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_S_Shr {}  -> NatM Register
forall a. NatM a
incorrectOperands

      MO_V_Extract {}     -> NatM Register
forall a. NatM a
incorrectOperands
      MO_V_Add {}         -> NatM Register
forall a. NatM a
incorrectOperands
      MO_V_Sub {}         -> NatM Register
forall a. NatM a
incorrectOperands
      MO_V_Mul {}         -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VS_Quot {}       -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VS_Rem {}        -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VU_Quot {}       -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VU_Rem {}        -> NatM Register
forall a. NatM a
incorrectOperands
      MO_V_Shuffle {}     -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VF_Shuffle {}    -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VU_Min {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VU_Max {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VS_Min {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VS_Max {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VF_Min {}  -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VF_Max {}  -> NatM Register
forall a. NatM a
incorrectOperands

      MO_VF_Extract {}    -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VF_Add {}        -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VF_Sub {}        -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VF_Mul {}        -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VF_Quot {}       -> NatM Register
forall a. NatM a
incorrectOperands

      -- Ternary MachOps
      MO_FMA {}           -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VF_Insert {}     -> NatM Register
forall a. NatM a
incorrectOperands
      MO_V_Insert {}      -> NatM Register
forall a. NatM a
incorrectOperands

      --_other -> pprPanic "getRegister" (pprMachOp mop)
   where
        triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
        triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
triv_ucode Format -> Operand -> Instr
instr Format
format = Format -> (Operand -> Instr) -> CmmExpr -> NatM Register
trivialUCode Format
format (Format -> Operand -> Instr
instr Format
format) CmmExpr
x

        -- signed or unsigned extension.
        integerExtend :: Width -> Width
                      -> (Format -> Operand -> Operand -> Instr)
                      -> CmmExpr -> NatM Register
        integerExtend :: Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
from Width
to Format -> Operand -> Operand -> Instr
instr CmmExpr
expr = do
            (reg,e_code) <- if Width
from Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
W8 then HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getByteReg CmmExpr
expr
                                          else CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
expr
            let
                code Reg
dst =
                  InstrBlock
e_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                  Format -> Operand -> Operand -> Instr
instr (Width -> Format
intFormat Width
from) (Reg -> Operand
OpReg Reg
reg) (Reg -> Operand
OpReg Reg
dst)
            return (Any (intFormat to) code)

        bitcast :: Format -> Format -> CmmExpr -> NatM Register
        bitcast :: Format -> Format -> CmmExpr -> NatM Register
bitcast Format
fmt Format
rfmt CmmExpr
expr =
          do (src, e_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
expr
             let code = \Reg
dst -> InstrBlock
e_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` (Format -> Operand -> Operand -> Instr
MOVD Format
fmt (Reg -> Operand
OpReg Reg
src) (Reg -> Operand
OpReg Reg
dst))
             return (Any rfmt code)

        toI8Reg :: Width -> CmmExpr -> NatM Register
        toI8Reg :: Width -> CmmExpr -> NatM Register
toI8Reg Width
new_rep CmmExpr
expr
            = do codefn <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
expr
                 return (Any (intFormat new_rep) codefn)
                -- HACK: use getAnyReg to get a byte-addressable register.
                -- If the source was a Fixed register, this will add the
                -- mov instruction to put it into the desired destination.
                -- We're assuming that the destination won't be a fixed
                -- non-byte-addressable register; it won't be, because all
                -- fixed registers are word-sized.

        toI16Reg :: Width -> CmmExpr -> NatM Register
toI16Reg = Width -> CmmExpr -> NatM Register
toI8Reg -- for now

        conversionNop :: Format -> CmmExpr -> NatM Register
        conversionNop :: Format -> CmmExpr -> NatM Register
conversionNop Format
new_format CmmExpr
expr
            = do e_code <- HasDebugCallStack => Platform -> Bool -> CmmExpr -> NatM Register
Platform -> Bool -> CmmExpr -> NatM Register
getRegister' Platform
platform Bool
is32Bit CmmExpr
expr
                 return (swizzleRegisterRep e_code new_format)

        vector_float_negate_avx :: Length -> Width -> CmmExpr -> NatM Register
        vector_float_negate_avx :: Int -> Width -> CmmExpr -> NatM Register
vector_float_negate_avx Int
l Width
w CmmExpr
expr = do
          let fmt :: Format
              mask :: CmmLit
              (Format
fmt, CmmLit
mask) = case Width
w of
                       Width
W32 -> (Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtFloat , Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a. (Num a, Bits a) => Int -> a
bit Int
31) Width
w) -- TODO: these should be negative 0 floating point literals,
                       Width
W64 -> (Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtDouble, Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a. (Num a, Bits a) => Int -> a
bit Int
63) Width
w) -- but we don't currently have those in Cmm.
                       Width
_ -> String -> (Format, CmmLit)
forall a. HasCallStack => String -> a
panic String
"AVX floating-point negation: elements must be FF32 or FF64"
          (maskReg, maskCode) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg (CmmLit -> CmmExpr
CmmLit (CmmLit -> CmmExpr) -> CmmLit -> CmmExpr
forall a b. (a -> b) -> a -> b
$ [CmmLit] -> CmmLit
CmmVec ([CmmLit] -> CmmLit) -> [CmmLit] -> CmmLit
forall a b. (a -> b) -> a -> b
$ Int -> CmmLit -> [CmmLit]
forall a. Int -> a -> [a]
replicate Int
l CmmLit
mask)
          (reg, exp) <- getSomeReg expr
          let code Reg
dst = InstrBlock
maskCode InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                         InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                         (Format -> Operand -> Operand -> Instr
VMOVU Format
fmt (Reg -> Operand
OpReg Reg
reg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                         (Format -> Operand -> Reg -> Reg -> Instr
VXOR Format
fmt (Reg -> Operand
OpReg Reg
maskReg) Reg
dst Reg
dst)
          return (Any fmt code)

        vector_float_negate_sse :: Length -> Width -> CmmExpr -> NatM Register
        vector_float_negate_sse :: Int -> Width -> CmmExpr -> NatM Register
vector_float_negate_sse Int
l Width
w CmmExpr
expr = do
          let fmt :: Format
              mask :: CmmLit
              (Format
fmt, CmmLit
mask) = case Width
w of
                       Width
W32 -> (Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtFloat , Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a. (Num a, Bits a) => Int -> a
bit Int
31) Width
w) -- Same comment as for vector_float_negate_avx,
                       Width
W64 -> (Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtDouble, Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a. (Num a, Bits a) => Int -> a
bit Int
63) Width
w) -- these should be -0.0 CmmFloat values.
                       Width
_ -> String -> (Format, CmmLit)
forall a. HasCallStack => String -> a
panic String
"SSE floating-point negation: elements must be FF32 or FF64"
          (maskReg, maskCode) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg (CmmLit -> CmmExpr
CmmLit (CmmLit -> CmmExpr) -> CmmLit -> CmmExpr
forall a b. (a -> b) -> a -> b
$ [CmmLit] -> CmmLit
CmmVec ([CmmLit] -> CmmLit) -> [CmmLit] -> CmmLit
forall a b. (a -> b) -> a -> b
$ Int -> CmmLit -> [CmmLit]
forall a. Int -> a -> [a]
replicate Int
l CmmLit
mask)
          (reg, exp) <- getSomeReg expr
          let code Reg
dst = InstrBlock
maskCode InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                         InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                         (Format -> Operand -> Operand -> Instr
MOVU Format
fmt (Reg -> Operand
OpReg Reg
reg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                         (Format -> Operand -> Operand -> Instr
XOR  Format
fmt (Reg -> Operand
OpReg Reg
maskReg) (Reg -> Operand
OpReg Reg
dst))
          return (Any fmt code)

        -----------------------

        -- TODO: we could use VBROADCASTSS/SD when AVX2 is available.
        vector_float_broadcast_avx :: Length
                                   -> Width
                                   -> CmmExpr
                                   -> NatM Register
        vector_float_broadcast_avx :: Int -> Width -> CmmExpr -> NatM Register
vector_float_broadcast_avx Int
len Width
w CmmExpr
expr = do
          (dst, exp) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
expr
          let fmt = Int -> ScalarFormat -> Format
VecFormat Int
len (Width -> ScalarFormat
floatScalarFormat Width
w)
              code = Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0) (Reg -> Operand
OpReg Reg
dst) Reg
dst Reg
dst
          return $ Fixed fmt dst (exp `snocOL` code)

        vector_float_broadcast_sse :: Length
                                   -> Width
                                   -> CmmExpr
                                   -> NatM Register
        vector_float_broadcast_sse :: Int -> Width -> CmmExpr -> NatM Register
vector_float_broadcast_sse Int
len Width
w CmmExpr
expr = do
          (dst, exp) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
expr
          let fmt = Int -> ScalarFormat -> Format
VecFormat Int
len (Width -> ScalarFormat
floatScalarFormat Width
w)
              code = Format -> Imm -> Operand -> Reg -> Instr
SHUF Format
fmt (Int -> Imm
ImmInt Int
0) (Reg -> Operand
OpReg Reg
dst) Reg
dst
          return $ Fixed fmt dst (exp `snocOL` code)

        vector_int_broadcast :: Length
                             -> Width
                             -> CmmExpr
                             -> NatM Register
        vector_int_broadcast :: Int -> Width -> CmmExpr -> NatM Register
vector_int_broadcast Int
len Width
W64 CmmExpr
expr = do
          (reg, exp) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
expr
          let fmt = Int -> ScalarFormat -> Format
VecFormat Int
len ScalarFormat
FmtInt64
          return $ Any fmt (\Reg
dst -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                    (Format -> Operand -> Operand -> Instr
MOVD Format
II64 (Reg -> Operand
OpReg Reg
reg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                    (Format -> Operand -> Reg -> Instr
PUNPCKLQDQ Format
fmt (Reg -> Operand
OpReg Reg
dst) Reg
dst)
                                    )
        vector_int_broadcast Int
len Width
W32 CmmExpr
expr = do
          (reg, exp) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
expr
          let fmt = Int -> ScalarFormat -> Format
VecFormat Int
len ScalarFormat
FmtInt32
          return $ Any fmt (\Reg
dst -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                    (Format -> Operand -> Operand -> Instr
MOVD Format
II32 (Reg -> Operand
OpReg Reg
reg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                    (Format -> Imm -> Operand -> Reg -> Instr
PSHUFD Format
fmt (Int -> Imm
ImmInt Int
0x00) (Reg -> Operand
OpReg Reg
dst) Reg
dst)
                                    )
        vector_int_broadcast Int
_ Width
_ CmmExpr
_ =
          String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"Unsupported Integer vector broadcast operation; please use -fllvm."


getRegister' Platform
platform Bool
is32Bit (CmmMachOp MachOp
mop [CmmExpr
x, CmmExpr
y]) = do -- dyadic MachOps
  avx <- NatM Bool
avxEnabled
  case mop of
      MO_F_Eq Width
_ -> Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
EQQ CmmExpr
x CmmExpr
y
      MO_F_Ne Width
_ -> Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
NE  CmmExpr
x CmmExpr
y
      MO_F_Gt Width
_ -> Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
GTT CmmExpr
x CmmExpr
y
      MO_F_Ge Width
_ -> Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
GE  CmmExpr
x CmmExpr
y
      -- Invert comparison condition and swap operands
      -- See Note [SSE Parity Checks]
      MO_F_Lt Width
_ -> Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
GTT  CmmExpr
y CmmExpr
x
      MO_F_Le Width
_ -> Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
GE   CmmExpr
y CmmExpr
x

      MO_Eq Width
_   -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
EQQ CmmExpr
x CmmExpr
y
      MO_Ne Width
_   -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
NE  CmmExpr
x CmmExpr
y

      MO_S_Gt Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
GTT CmmExpr
x CmmExpr
y
      MO_S_Ge Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
GE  CmmExpr
x CmmExpr
y
      MO_S_Lt Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
LTT CmmExpr
x CmmExpr
y
      MO_S_Le Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
LE  CmmExpr
x CmmExpr
y

      MO_U_Gt Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
GU  CmmExpr
x CmmExpr
y
      MO_U_Ge Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
GEU CmmExpr
x CmmExpr
y
      MO_U_Lt Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
LU  CmmExpr
x CmmExpr
y
      MO_U_Le Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
LEU CmmExpr
x CmmExpr
y

      MO_F_Add  Width
w -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
w Format -> Operand -> Operand -> Instr
ADD  CmmExpr
x CmmExpr
y
      MO_F_Sub  Width
w -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
w Format -> Operand -> Operand -> Instr
SUB  CmmExpr
x CmmExpr
y
      MO_F_Quot Width
w -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
w Format -> Operand -> Operand -> Instr
FDIV CmmExpr
x CmmExpr
y
      MO_F_Mul  Width
w -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
w Format -> Operand -> Operand -> Instr
MUL  CmmExpr
x CmmExpr
y
      MO_F_Min  Width
w -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
w (MinOrMax -> MinMaxType -> Format -> Operand -> Operand -> Instr
MINMAX MinOrMax
Min MinMaxType
FloatMinMax) CmmExpr
x CmmExpr
y
      MO_F_Max  Width
w -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
w (MinOrMax -> MinMaxType -> Format -> Operand -> Operand -> Instr
MINMAX MinOrMax
Max MinMaxType
FloatMinMax) CmmExpr
x CmmExpr
y

      MO_Add Width
rep -> Width -> CmmExpr -> CmmExpr -> NatM Register
add_code Width
rep CmmExpr
x CmmExpr
y
      MO_Sub Width
rep -> Width -> CmmExpr -> CmmExpr -> NatM Register
sub_code Width
rep CmmExpr
x CmmExpr
y

      MO_S_Quot Width
rep -> Width -> Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
div_code Width
rep Bool
True  Bool
True  CmmExpr
x CmmExpr
y
      MO_S_Rem  Width
rep -> Width -> Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
div_code Width
rep Bool
True  Bool
False CmmExpr
x CmmExpr
y
      MO_U_Quot Width
rep -> Width -> Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
div_code Width
rep Bool
False Bool
True  CmmExpr
x CmmExpr
y
      MO_U_Rem  Width
rep -> Width -> Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
div_code Width
rep Bool
False Bool
False CmmExpr
x CmmExpr
y

      MO_S_MulMayOflo Width
rep -> Width -> CmmExpr -> CmmExpr -> NatM Register
imulMayOflo Width
rep CmmExpr
x CmmExpr
y

      MO_Mul Width
W8  -> CmmExpr -> CmmExpr -> NatM Register
imulW8 CmmExpr
x CmmExpr
y
      MO_Mul Width
rep -> Width -> (Format -> Operand -> Operand -> Instr) -> NatM Register
triv_op Width
rep Format -> Operand -> Operand -> Instr
IMUL
      MO_And Width
rep -> Width -> (Format -> Operand -> Operand -> Instr) -> NatM Register
triv_op Width
rep Format -> Operand -> Operand -> Instr
AND
      MO_Or  Width
rep -> Width -> (Format -> Operand -> Operand -> Instr) -> NatM Register
triv_op Width
rep Format -> Operand -> Operand -> Instr
OR
      MO_Xor Width
rep -> Width -> (Format -> Operand -> Operand -> Instr) -> NatM Register
triv_op Width
rep Format -> Operand -> Operand -> Instr
XOR

        {- Shift ops on x86s have constraints on their source, it
           either has to be Imm, CL or 1
            => trivialCode is not restrictive enough (sigh.)
        -}
      MO_Shl Width
rep   -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
shift_code Width
rep Format -> Operand -> Operand -> Instr
SHL CmmExpr
x CmmExpr
y {-False-}
      MO_U_Shr Width
rep -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
shift_code Width
rep Format -> Operand -> Operand -> Instr
SHR CmmExpr
x CmmExpr
y {-False-}
      MO_S_Shr Width
rep -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
shift_code Width
rep Format -> Operand -> Operand -> Instr
SAR CmmExpr
x CmmExpr
y {-False-}

      MO_VF_Shuffle Int
l Width
w [Int]
is
        | Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
* Width -> Int
widthInBits Width
w Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
128
        -> if
            | Bool
avx
            -> Int -> Width -> CmmExpr -> CmmExpr -> [Int] -> NatM Register
vector_shuffle_float Int
l Width
w CmmExpr
x CmmExpr
y [Int]
is
            | Bool
otherwise
            -> String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"Please enable the -mavx flag"
        | Bool
otherwise
        -> String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"Please use -fllvm for wide shuffle instructions"

      MO_VF_Extract Int
l Width
W32   | Bool
avx       -> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_extract Int
l Width
W32 CmmExpr
x CmmExpr
y
                            | Bool
otherwise -> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_extract_sse Int
l Width
W32 CmmExpr
x CmmExpr
y
      MO_VF_Extract Int
l Width
W64               -> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_extract Int
l Width
W64 CmmExpr
x CmmExpr
y
      MO_VF_Extract {} -> NatM Register
forall a. NatM a
incorrectOperands

      MO_V_Extract Int
l Width
W64                -> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_int_extract_sse Int
l Width
W64 CmmExpr
x CmmExpr
y
      -- SIMD NCG TODO: W32, W16, W8
      MO_V_Extract {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop

      MO_VF_Add Int
l Width
w         | Bool
avx       -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
VA_Add Int
l Width
w CmmExpr
x CmmExpr
y
                            | Bool
otherwise -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
VA_Add Int
l Width
w CmmExpr
x CmmExpr
y

      MO_VF_Sub Int
l Width
w         | Bool
avx       -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
VA_Sub Int
l Width
w CmmExpr
x CmmExpr
y
                            | Bool
otherwise -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
VA_Sub Int
l Width
w CmmExpr
x CmmExpr
y

      MO_VF_Mul Int
l Width
w         | Bool
avx       -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
VA_Mul Int
l Width
w CmmExpr
x CmmExpr
y
                            | Bool
otherwise -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
VA_Mul Int
l Width
w CmmExpr
x CmmExpr
y

      MO_VF_Quot Int
l Width
w        | Bool
avx       -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
VA_Div Int
l Width
w CmmExpr
x CmmExpr
y
                            | Bool
otherwise -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
VA_Div Int
l Width
w CmmExpr
x CmmExpr
y

      MO_VF_Min Int
l Width
w         | Bool
avx       -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
VA_Min Int
l Width
w CmmExpr
x CmmExpr
y
                            | Bool
otherwise -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
VA_Min Int
l Width
w CmmExpr
x CmmExpr
y

      MO_VF_Max Int
l Width
w         | Bool
avx       -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
VA_Max Int
l Width
w CmmExpr
x CmmExpr
y
                            | Bool
otherwise -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
VA_Max Int
l Width
w CmmExpr
x CmmExpr
y

      -- SIMD NCG TODO: integer vector operations
      MO_V_Shuffle {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop
      MO_V_Add {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop
      MO_V_Sub {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop
      MO_V_Mul {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop
      MO_VS_Quot {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop
      MO_VS_Rem {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop
      MO_VU_Quot {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop
      MO_VU_Rem {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop

      MO_VU_Min {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop
      MO_VU_Max {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop
      MO_VS_Min {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop
      MO_VS_Max {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
mop

      -- Unary MachOps
      MO_S_Neg {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_F_Neg {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_Not {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_SF_Round {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_FS_Truncate {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_SS_Conv {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_XX_Conv {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_FF_Conv {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_UU_Conv {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_WF_Bitcast {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_FW_Bitcast  {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_RelaxedRead {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_AlignmentCheck {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VS_Neg {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VF_Neg {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_V_Broadcast {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VF_Broadcast {} -> NatM Register
forall a. NatM a
incorrectOperands

      -- Ternary MachOps
      MO_FMA {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_V_Insert {} -> NatM Register
forall a. NatM a
incorrectOperands
      MO_VF_Insert {} -> NatM Register
forall a. NatM a
incorrectOperands

  where
    --------------------
    triv_op :: Width -> (Format -> Operand -> Operand -> Instr) -> NatM Register
triv_op Width
width Format -> Operand -> Operand -> Instr
instr = Width
-> (Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialCode Width
width Operand -> Operand -> Instr
op ((Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
forall a. a -> Maybe a
Just Operand -> Operand -> Instr
op) CmmExpr
x CmmExpr
y
                        where op :: Operand -> Operand -> Instr
op   = Format -> Operand -> Operand -> Instr
instr (Width -> Format
intFormat Width
width)

    -- Special case for IMUL for bytes, since the result of IMULB will be in
    -- %ax, the split to %dx/%edx/%rdx and %ax/%eax/%rax happens only for wider
    -- values.
    imulW8 :: CmmExpr -> CmmExpr -> NatM Register
    imulW8 :: CmmExpr -> CmmExpr -> NatM Register
imulW8 CmmExpr
arg_a CmmExpr
arg_b = do
        (a_reg, a_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
arg_a
        b_code <- getAnyReg arg_b

        let code = InstrBlock
a_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Reg -> InstrBlock
b_code Reg
eax InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                   [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Instr
IMUL2 Format
format (Reg -> Operand
OpReg Reg
a_reg) ]
            format = Width -> Format
intFormat Width
W8

        return (Fixed format eax code)

    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
imulMayOflo Width
W8 CmmExpr
a CmmExpr
b = do
         -- The general case (W16, W32, W64) doesn't work for W8 as its
         -- multiplication doesn't use two registers.
         --
         -- The plan is:
         -- 1. truncate and sign-extend a and b to 8bit width
         -- 2. multiply a' = a * b in 32bit width
         -- 3. copy and sign-extend 8bit from a' to c
         -- 4. compare a' and c: they are equal if there was no overflow
         (a_reg, a_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
a
         (b_reg, b_code) <- getNonClobberedReg b
         let
             code = InstrBlock
a_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
b_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                        [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                           Format -> Operand -> Operand -> Instr
MOVSxL Format
II8 (Reg -> Operand
OpReg Reg
a_reg) (Reg -> Operand
OpReg Reg
a_reg),
                           Format -> Operand -> Operand -> Instr
MOVSxL Format
II8 (Reg -> Operand
OpReg Reg
b_reg) (Reg -> Operand
OpReg Reg
b_reg),
                           Format -> Operand -> Operand -> Instr
IMUL Format
II32 (Reg -> Operand
OpReg Reg
b_reg) (Reg -> Operand
OpReg Reg
a_reg),
                           Format -> Operand -> Operand -> Instr
MOVSxL Format
II8 (Reg -> Operand
OpReg Reg
a_reg) (Reg -> Operand
OpReg Reg
eax),
                           Format -> Operand -> Operand -> Instr
CMP Format
II16 (Reg -> Operand
OpReg Reg
a_reg) (Reg -> Operand
OpReg Reg
eax),
                           Cond -> Operand -> Instr
SETCC Cond
NE (Reg -> Operand
OpReg Reg
eax)
                        ]
         return (Fixed II8 eax code)
    imulMayOflo Width
rep CmmExpr
a CmmExpr
b = do
         (a_reg, a_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
a
         b_code <- getAnyReg b
         let
             shift_amt  = case Width
rep of
                           Width
W16 -> Int
15
                           Width
W32 -> Int
31
                           Width
W64 -> Int
63
                           Width
w -> String -> Int
forall a. HasCallStack => String -> a
panic (String
"shift_amt: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Width -> String
forall a. Show a => a -> String
show Width
w)

             format = Width -> Format
intFormat Width
rep
             code = InstrBlock
a_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Reg -> InstrBlock
b_code Reg
eax InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                        [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                           Format -> Operand -> Instr
IMUL2 Format
format (Reg -> Operand
OpReg Reg
a_reg),   -- result in %edx:%eax
                           Format -> Operand -> Operand -> Instr
SAR Format
format (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
shift_amt)) (Reg -> Operand
OpReg Reg
eax),
                                -- sign extend lower part
                           Format -> Operand -> Operand -> Instr
SUB Format
format (Reg -> Operand
OpReg Reg
edx) (Reg -> Operand
OpReg Reg
eax)
                                -- compare against upper
                           -- eax==0 if high part == sign extended low part
                        ]
         return (Fixed format eax code)

    --------------------
    shift_code :: Width
               -> (Format -> Operand -> Operand -> Instr)
               -> CmmExpr
               -> CmmExpr
               -> NatM Register

    {- Case1: shift length as immediate -}
    shift_code :: Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
shift_code Width
width Format -> Operand -> Operand -> Instr
instr CmmExpr
x (CmmLit CmmLit
lit)
      -- Handle the case of a shift larger than the width of the shifted value.
      -- This is necessary since x86 applies a mask of 0x1f to the shift
      -- amount, meaning that, e.g., `shr 47, $eax` will actually shift by
      -- `47 & 0x1f == 15`. See #20626.
      | CmmInt Integer
n Width
_ <- CmmLit
lit
      , Integer
n Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Width -> Int
widthInBits Width
width)
      = HasDebugCallStack => CmmExpr -> NatM Register
CmmExpr -> NatM Register
getRegister (CmmExpr -> NatM Register) -> CmmExpr -> NatM Register
forall a b. (a -> b) -> a -> b
$ CmmLit -> CmmExpr
CmmLit (CmmLit -> CmmExpr) -> CmmLit -> CmmExpr
forall a b. (a -> b) -> a -> b
$ Integer -> Width -> CmmLit
CmmInt Integer
0 Width
width

      | Bool
otherwise = do
          x_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
x
          let
               format = Width -> Format
intFormat Width
width
               code Reg
dst
                  = Reg -> InstrBlock
x_code Reg
dst InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                    Format -> Operand -> Operand -> Instr
instr Format
format (Imm -> Operand
OpImm (CmmLit -> Imm
litToImm CmmLit
lit)) (Reg -> Operand
OpReg Reg
dst)
          return (Any format code)

    {- Case2: shift length is complex (non-immediate)
      * y must go in %ecx.
      * we cannot do y first *and* put its result in %ecx, because
        %ecx might be clobbered by x.
      * if we do y second, then x cannot be
        in a clobbered reg.  Also, we cannot clobber x's reg
        with the instruction itself.
      * so we can either:
        - do y first, put its result in a fresh tmp, then copy it to %ecx later
        - do y second and put its result into %ecx.  x gets placed in a fresh
          tmp.  This is likely to be better, because the reg alloc can
          eliminate this reg->reg move here (it won't eliminate the other one,
          because the move is into the fixed %ecx).
      * in the case of C calls the use of ecx here can interfere with arguments.
        We avoid this with the hack described in Note [Evaluate C-call
        arguments before placing in destination registers]
    -}
    shift_code Width
width Format -> Operand -> Operand -> Instr
instr CmmExpr
x CmmExpr
y{-amount-} = do
        x_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
x
        let format = Width -> Format
intFormat Width
width
        tmp <- getNewRegNat format
        y_code <- getAnyReg y
        let
           code = Reg -> InstrBlock
x_code Reg
tmp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                  Reg -> InstrBlock
y_code Reg
ecx InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                  Format -> Operand -> Operand -> Instr
instr Format
format (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
tmp)
        return (Fixed format tmp code)

    --------------------
    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
add_code Width
rep CmmExpr
x (CmmLit (CmmInt Integer
y Width
_))
        | Integer -> Bool
is32BitInteger Integer
y
        , Width
rep Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
/= Width
W8 -- LEA doesn't support byte size (#18614)
        = Width -> CmmExpr -> Integer -> NatM Register
add_int Width
rep CmmExpr
x Integer
y
    add_code Width
rep CmmExpr
x CmmExpr
y = Width
-> (Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialCode Width
rep (Format -> Operand -> Operand -> Instr
ADD Format
format) ((Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
forall a. a -> Maybe a
Just (Format -> Operand -> Operand -> Instr
ADD Format
format)) CmmExpr
x CmmExpr
y
      where format :: Format
format = Width -> Format
intFormat Width
rep
    -- TODO: There are other interesting patterns we want to replace
    --     with a LEA, e.g. `(x + offset) + (y << shift)`.

    --------------------
    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
sub_code Width
rep CmmExpr
x (CmmLit (CmmInt Integer
y Width
_))
        | Integer -> Bool
is32BitInteger (-Integer
y)
        , Width
rep Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
/= Width
W8 -- LEA doesn't support byte size (#18614)
        = Width -> CmmExpr -> Integer -> NatM Register
add_int Width
rep CmmExpr
x (-Integer
y)
    sub_code Width
rep CmmExpr
x CmmExpr
y = Width
-> (Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialCode Width
rep (Format -> Operand -> Operand -> Instr
SUB (Width -> Format
intFormat Width
rep)) Maybe (Operand -> Operand -> Instr)
forall a. Maybe a
Nothing CmmExpr
x CmmExpr
y

    -- our three-operand add instruction:
    add_int :: Width -> CmmExpr -> Integer -> NatM Register
add_int Width
width CmmExpr
x Integer
y = do
        (x_reg, x_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
x
        let
            format = Width -> Format
intFormat Width
width
            imm = Int -> Imm
ImmInt (Integer -> Int
forall a. Num a => Integer -> a
fromInteger Integer
y)
            code Reg
dst
               = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                 Format -> Operand -> Operand -> Instr
LEA Format
format
                        (AddrMode -> Operand
OpAddr (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
x_reg) EAIndex
EAIndexNone Imm
imm))
                        (Reg -> Operand
OpReg Reg
dst)
        --
        return (Any format code)

    ----------------------

    -- See Note [DIV/IDIV for bytes]
    div_code :: Width -> Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
div_code Width
W8 Bool
signed Bool
quotient CmmExpr
x CmmExpr
y = do
        let widen :: MachOp
widen | Bool
signed    = Width -> Width -> MachOp
MO_SS_Conv Width
W8 Width
W16
                  | Bool
otherwise = Width -> Width -> MachOp
MO_UU_Conv Width
W8 Width
W16
        Width -> Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
div_code
            Width
W16
            Bool
signed
            Bool
quotient
            (MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp MachOp
widen [CmmExpr
x])
            (MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp MachOp
widen [CmmExpr
y])

    div_code Width
width Bool
signed Bool
quotient CmmExpr
x CmmExpr
y = do
           (y_op, y_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem CmmExpr
y -- cannot be clobbered
           x_code <- getAnyReg x
           let
             format = Width -> Format
intFormat Width
width
             widen | Bool
signed    = Format -> Instr
CLTD Format
format
                   | Bool
otherwise = Format -> Operand -> Operand -> Instr
XOR Format
format (Reg -> Operand
OpReg Reg
edx) (Reg -> Operand
OpReg Reg
edx)

             instr | Bool
signed    = Format -> Operand -> Instr
IDIV
                   | Bool
otherwise = Format -> Operand -> Instr
DIV

             code = InstrBlock
y_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                    Reg -> InstrBlock
x_code Reg
eax InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                    [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [Instr
widen, Format -> Operand -> Instr
instr Format
format Operand
y_op]

             result | Bool
quotient  = Reg
eax
                    | Bool
otherwise = Reg
edx

           return (Fixed format result code)

    -----------------------
    -- Vector operations---
    vector_float_op_avx :: VectorArithInstns
                        -> Length
                        -> Width
                        -> CmmExpr
                        -> CmmExpr
                        -> NatM Register
    vector_float_op_avx :: VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
op Int
l Width
w CmmExpr
expr1 CmmExpr
expr2 = do
      (reg1, exp1) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
expr1
      (reg2, exp2) <- getSomeReg expr2
      let format   = case Width
w of
                       Width
W32 -> Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtFloat
                       Width
W64 -> Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtDouble
                       Width
_ -> String -> SDoc -> Format
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Floating-point AVX vector operation not supported at this width"
                             (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"width:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Width -> SDoc
forall a. Outputable a => a -> SDoc
ppr Width
w)
          code Reg
dst = case VectorArithInstns
op of
            VectorArithInstns
VA_Add -> (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Reg -> Reg -> Instr
VADD
            VectorArithInstns
VA_Sub -> (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Reg -> Reg -> Instr
VSUB
            VectorArithInstns
VA_Mul -> (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Reg -> Reg -> Instr
VMUL
            VectorArithInstns
VA_Div -> (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Reg -> Reg -> Instr
VDIV
            VectorArithInstns
VA_Min -> (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr (MinOrMax -> MinMaxType -> Format -> Operand -> Reg -> Reg -> Instr
VMINMAX MinOrMax
Min MinMaxType
FloatMinMax)
            VectorArithInstns
VA_Max -> (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr (MinOrMax -> MinMaxType -> Format -> Operand -> Reg -> Reg -> Instr
VMINMAX MinOrMax
Max MinMaxType
FloatMinMax)
            where
              -- opcode src2 src1 dst <==> dst = src1 `opcode` src2
              arithInstr :: (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Reg -> Reg -> Instr
instr = InstrBlock
exp1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
exp2 InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                 (Format -> Operand -> Reg -> Reg -> Instr
instr Format
format (Reg -> Operand
OpReg Reg
reg2) Reg
reg1 Reg
dst)
      return (Any format code)

    vector_float_op_sse :: VectorArithInstns
                        -> Length
                        -> Width
                        -> CmmExpr
                        -> CmmExpr
                        -> NatM Register
    vector_float_op_sse :: VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
op Int
l Width
w CmmExpr
expr1 CmmExpr
expr2 = do
      (reg1, exp1) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
expr1
      (reg2, exp2) <- getSomeReg expr2
      let format   = case Width
w of
                       Width
W32 -> Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtFloat
                       Width
W64 -> Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtDouble
                       Width
_ -> String -> SDoc -> Format
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Floating-point SSE vector operation not supported at this width"
                             (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"width:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Width -> SDoc
forall a. Outputable a => a -> SDoc
ppr Width
w)
          code Reg
dst = case VectorArithInstns
op of
            VectorArithInstns
VA_Add -> (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Operand -> Instr
ADD
            VectorArithInstns
VA_Sub -> (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Operand -> Instr
SUB
            VectorArithInstns
VA_Mul -> (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Operand -> Instr
MUL
            VectorArithInstns
VA_Div -> (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Operand -> Instr
FDIV
            VectorArithInstns
VA_Min -> (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr (MinOrMax -> MinMaxType -> Format -> Operand -> Operand -> Instr
MINMAX MinOrMax
Min MinMaxType
FloatMinMax)
            VectorArithInstns
VA_Max -> (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr (MinOrMax -> MinMaxType -> Format -> Operand -> Operand -> Instr
MINMAX MinOrMax
Max MinMaxType
FloatMinMax)
            where
              -- opcode src2 src1 <==> src1 = src1 `opcode` src2
              arithInstr :: (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Operand -> Instr
instr
                = InstrBlock
exp1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
exp2 InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                  (Format -> Operand -> Operand -> Instr
MOVU Format
format (Reg -> Operand
OpReg Reg
reg1) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                  (Format -> Operand -> Operand -> Instr
instr Format
format (Reg -> Operand
OpReg Reg
reg2) (Reg -> Operand
OpReg Reg
dst))
      return (Any format code)
    --------------------
    vector_float_extract :: Length
                         -> Width
                         -> CmmExpr
                         -> CmmExpr
                         -> NatM Register
    vector_float_extract :: Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_extract Int
l Width
W32 CmmExpr
expr (CmmLit CmmLit
lit) = do
      (r, exp) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
expr
      let format   = Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtFloat
          imm      = CmmLit -> Imm
litToImm CmmLit
lit
          code Reg
dst
            = case CmmLit
lit of
                CmmInt Integer
0 Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` (Format -> Operand -> Operand -> Instr
MOV Format
FF32 (Reg -> Operand
OpReg Reg
r) (Reg -> Operand
OpReg Reg
dst))
                CmmInt Integer
_ Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` (Format -> Imm -> Operand -> Reg -> Instr
VPSHUFD Format
format Imm
imm (Reg -> Operand
OpReg Reg
r) Reg
dst)
                CmmLit
_          -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported AVX floating-point vector extract offset" (CmmLit -> SDoc
forall a. Outputable a => a -> SDoc
ppr CmmLit
lit)
      return (Any FF32 code)
    vector_float_extract Int
l Width
W64 CmmExpr
expr (CmmLit CmmLit
lit) = do
      (r, exp) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
expr
      let format   = Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtDouble
          code Reg
dst
            = case CmmLit
lit of
                CmmInt Integer
0 Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                              (Format -> Operand -> Operand -> Instr
MOV Format
FF64 (Reg -> Operand
OpReg Reg
r) (Reg -> Operand
OpReg Reg
dst))
                CmmInt Integer
1 Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                              (Format -> Reg -> Reg -> Instr
MOVHLPS Format
format Reg
r Reg
dst)
                CmmLit
_          -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported AVX floating-point vector extract offset" (CmmLit -> SDoc
forall a. Outputable a => a -> SDoc
ppr CmmLit
lit)
      return (Any FF64 code)
    vector_float_extract Int
_ Width
w CmmExpr
c CmmExpr
e =
      String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported AVX floating-point vector extract" (Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
c SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
e SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ Width -> SDoc
forall a. Outputable a => a -> SDoc
ppr Width
w)
    -----------------------

    vector_float_extract_sse :: Length
                             -> Width
                             -> CmmExpr
                             -> CmmExpr
                             -> NatM Register
    vector_float_extract_sse :: Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_extract_sse Int
l Width
W32 CmmExpr
expr (CmmLit CmmLit
lit)
      = do
      (r,exp) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
expr
      let format   = Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtFloat
          imm      = CmmLit -> Imm
litToImm CmmLit
lit
          code Reg
dst
            = case CmmLit
lit of
                CmmInt Integer
0 Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` (Format -> Operand -> Operand -> Instr
MOVU Format
format (Reg -> Operand
OpReg Reg
r) (Reg -> Operand
OpReg Reg
dst))
                CmmInt Integer
_ Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` (Format -> Imm -> Operand -> Reg -> Instr
PSHUFD Format
format Imm
imm (Reg -> Operand
OpReg Reg
r) Reg
dst)
                CmmLit
_          -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported SSE floating-point vector extract offset" (CmmLit -> SDoc
forall a. Outputable a => a -> SDoc
ppr CmmLit
lit)
      return (Any FF32 code)
    vector_float_extract_sse Int
_ Width
w CmmExpr
c CmmExpr
e
      = String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported SSE floating-point vector extract" (Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
c SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
e SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ Width -> SDoc
forall a. Outputable a => a -> SDoc
ppr Width
w)
    -----------------------

    vector_int_extract_sse :: Length
                           -> Width
                           -> CmmExpr
                           -> CmmExpr
                           -> NatM Register
    vector_int_extract_sse :: Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_int_extract_sse l :: Int
l@Int
2 Width
W64 CmmExpr
expr (CmmLit CmmLit
lit)
      = do
      (r, exp) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
expr
      let fmt = Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtInt64
      tmp <- getNewRegNat fmt
      let code Reg
dst =
            case CmmLit
lit of
              CmmInt Integer
0 Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                            (Format -> Operand -> Operand -> Instr
MOVD Format
II64 (Reg -> Operand
OpReg Reg
r) (Reg -> Operand
OpReg Reg
dst))
              CmmInt Integer
1 Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                            (Format -> Reg -> Reg -> Instr
MOVHLPS Format
fmt Reg
r Reg
tmp) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                            (Format -> Operand -> Operand -> Instr
MOVD Format
II64 (Reg -> Operand
OpReg Reg
tmp) (Reg -> Operand
OpReg Reg
dst))
              CmmLit
_          -> String -> InstrBlock
forall a. HasCallStack => String -> a
panic String
"Error in offset while unpacking"
      return (Any II64 code)
    vector_int_extract_sse Int
_ Width
w CmmExpr
c CmmExpr
e
      = String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported SSE floating-point vector extract" (Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
c SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
e SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ Width -> SDoc
forall a. Outputable a => a -> SDoc
ppr Width
w)

    vector_shuffle_float :: Length -> Width -> CmmExpr -> CmmExpr -> [Int] -> NatM Register
    vector_shuffle_float :: Int -> Width -> CmmExpr -> CmmExpr -> [Int] -> NatM Register
vector_shuffle_float Int
l Width
w CmmExpr
v1 CmmExpr
v2 [Int]
is = do
      (r1, exp1) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
v1
      (r2, exp2) <- getSomeReg v2
      let fmt = Int -> ScalarFormat -> Format
VecFormat Int
l (if Width
w Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
W32 then ScalarFormat
FmtFloat else ScalarFormat
FmtDouble)
          code Reg
dst
            = InstrBlock
exp1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` (InstrBlock
exp2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Format -> Reg -> Reg -> [Int] -> Reg -> InstrBlock
shuffleInstructions Format
fmt Reg
r1 Reg
r2 [Int]
is Reg
dst)
      return (Any fmt code)

    shuffleInstructions :: Format -> Reg -> Reg -> [Int] -> Reg -> OrdList Instr
    shuffleInstructions :: Format -> Reg -> Reg -> [Int] -> Reg -> InstrBlock
shuffleInstructions Format
fmt Reg
v1 Reg
v2 [Int]
is Reg
dst =
      case Format
fmt of
        VecFormat Int
2 ScalarFormat
FmtDouble ->
          case [Int]
is of
            [Int
i1, Int
i2] -> case (Int
i1, Int
i2) of
              (Int
0,Int
0) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b00) (Reg -> Operand
OpReg Reg
v1) Reg
v1 Reg
dst)
              (Int
1,Int
1) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b11) (Reg -> Operand
OpReg Reg
v1) Reg
v1 Reg
dst)
              (Int
2,Int
2) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b00) (Reg -> Operand
OpReg Reg
v2) Reg
v2 Reg
dst)
              (Int
3,Int
3) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b11) (Reg -> Operand
OpReg Reg
v2) Reg
v2 Reg
dst)
              (Int
0,Int
1) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
VMOVU Format
fmt (Reg -> Operand
OpReg Reg
v1) (Reg -> Operand
OpReg Reg
dst))
              (Int
2,Int
3) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
VMOVU Format
fmt (Reg -> Operand
OpReg Reg
v2) (Reg -> Operand
OpReg Reg
dst))
              (Int
1,Int
0) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b01) (Reg -> Operand
OpReg Reg
v1) Reg
v1 Reg
dst)
              (Int
3,Int
2) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b01) (Reg -> Operand
OpReg Reg
v2) Reg
v2 Reg
dst)
              (Int
0,Int
2) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b00) (Reg -> Operand
OpReg Reg
v2) Reg
v1 Reg
dst)
              (Int
2,Int
0) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b00) (Reg -> Operand
OpReg Reg
v1) Reg
v2 Reg
dst)
              (Int
0,Int
3) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b10) (Reg -> Operand
OpReg Reg
v2) Reg
v1 Reg
dst)
              (Int
3,Int
0) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b01) (Reg -> Operand
OpReg Reg
v1) Reg
v2 Reg
dst)
              (Int
1,Int
2) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b01) (Reg -> Operand
OpReg Reg
v2) Reg
v1 Reg
dst)
              (Int
2,Int
1) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b10) (Reg -> Operand
OpReg Reg
v1) Reg
v2 Reg
dst)
              (Int
1,Int
3) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b11) (Reg -> Operand
OpReg Reg
v2) Reg
v1 Reg
dst)
              (Int
3,Int
1) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b11) (Reg -> Operand
OpReg Reg
v1) Reg
v2 Reg
dst)
              (Int, Int)
_ -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"vector shuffle: indices out of bounds 0 <= i <= 3" ([Int] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [Int]
is)
            [Int]
_ -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"vector shuffle: wrong number of indices (expected 2)" ([Int] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [Int]
is)
        VecFormat Int
4 ScalarFormat
FmtFloat
          -- indices 0 <= i <= 7
          | (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ( (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0) (Int -> Bool) -> (Int -> Bool) -> Int -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
<&&> (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
7) ) [Int]
is ->
          case [Int]
is of
            [Int
i1, Int
i2, Int
i3, Int
i4]
              | (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ( Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
3 ) [Int]
is
              , let imm :: Int
imm = Int
i1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i2 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i3 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
4 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i4 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6
              -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
imm) (Reg -> Operand
OpReg Reg
v1) Reg
v1 Reg
dst)
              | (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ( Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
4 ) [Int]
is
              , let [Int
j1, Int
j2, Int
j3, Int
j4] = (Int -> Int) -> [Int] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ( Int -> Int -> Int
forall a. Num a => a -> a -> a
subtract Int
4 ) [Int]
is
                    imm :: Int
imm = Int
j1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j2 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j3 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
4 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j4 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6
              -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
imm) (Reg -> Operand
OpReg Reg
v2) Reg
v2 Reg
dst)
              | Int
i1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
3, Int
i2 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
3
              , Int
i3 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
4, Int
i4 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
4
              , let imm :: Int
imm = Int
i1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i2 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i3 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
4) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
4 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i4 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
4) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6
              -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
imm) (Reg -> Operand
OpReg Reg
v2) Reg
v1 Reg
dst)
              | Int
i1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
4, Int
i2 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
4
              , Int
i3 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
3, Int
i4 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
3
              , let imm :: Int
imm = (Int
i1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
4) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
4) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i3 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
4 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i4 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6
              -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
imm) (Reg -> Operand
OpReg Reg
v1) Reg
v2 Reg
dst)
              | Bool
otherwise
              ->
              -- Fall-back code with 4 INSERTPS operations.
              -- SIMD NCG TODO: handle more cases with better lowering.
              let -- bits: ss_dd_zzzz
                  -- ss: pick source location
                  -- dd: pick destination location
                  -- zzzz: pick locations to be zeroed
                  insertImm :: a -> a -> a
insertImm a
src a
dst = a -> Int -> a
forall a. Bits a => a -> Int -> a
shiftL   ( a
src a -> a -> a
forall a. Integral a => a -> a -> a
`mod` a
4 ) Int
6
                                    a -> a -> a
forall a. Bits a => a -> a -> a
.|. a -> Int -> a
forall a. Bits a => a -> Int -> a
shiftL a
dst Int
4
                  vec :: Int -> Reg
vec Int
src = if Int
src Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
4 then Reg
v2 else Reg
v1
              in Instr -> InstrBlock
forall a. a -> OrdList a
unitOL
                (Format -> Imm -> Operand -> Reg -> Instr
INSERTPS Format
fmt (Int -> Imm
ImmInt (Int -> Imm) -> Int -> Imm
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall {a}. (Bits a, Integral a) => a -> a -> a
insertImm Int
i1 Int
0 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
0b1110) (Reg -> Operand
OpReg (Reg -> Operand) -> Reg -> Operand
forall a b. (a -> b) -> a -> b
$ Int -> Reg
vec Int
i1) Reg
dst)
                InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                (Format -> Imm -> Operand -> Reg -> Instr
INSERTPS Format
fmt (Int -> Imm
ImmInt (Int -> Imm) -> Int -> Imm
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall {a}. (Bits a, Integral a) => a -> a -> a
insertImm Int
i2 Int
1) (Reg -> Operand
OpReg (Reg -> Operand) -> Reg -> Operand
forall a b. (a -> b) -> a -> b
$ Int -> Reg
vec Int
i2) Reg
dst)
                InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                (Format -> Imm -> Operand -> Reg -> Instr
INSERTPS Format
fmt (Int -> Imm
ImmInt (Int -> Imm) -> Int -> Imm
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall {a}. (Bits a, Integral a) => a -> a -> a
insertImm Int
i3 Int
2) (Reg -> Operand
OpReg (Reg -> Operand) -> Reg -> Operand
forall a b. (a -> b) -> a -> b
$ Int -> Reg
vec Int
i3) Reg
dst)
                InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                (Format -> Imm -> Operand -> Reg -> Instr
INSERTPS Format
fmt (Int -> Imm
ImmInt (Int -> Imm) -> Int -> Imm
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall {a}. (Bits a, Integral a) => a -> a -> a
insertImm Int
i4 Int
3) (Reg -> Operand
OpReg (Reg -> Operand) -> Reg -> Operand
forall a b. (a -> b) -> a -> b
$ Int -> Reg
vec Int
i4) Reg
dst)
            [Int]
_ -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"vector shuffle: wrong number of indices (expected 4)" ([Int] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [Int]
is)
          | Bool
otherwise
          -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"vector shuffle: indices out of bounds 0 <= i <= 7" ([Int] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [Int]
is)
        Format
_ ->
          String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"vector shuffle: unsupported format" (Format -> SDoc
forall a. Outputable a => a -> SDoc
ppr Format
fmt)

getRegister' Platform
platform Bool
_is32Bit (CmmMachOp MachOp
mop [CmmExpr
x, CmmExpr
y, CmmExpr
z]) = do -- ternary MachOps
  avx    <- NatM Bool
avxEnabled
  sse4_1 <- sse4_1Enabled
  case mop of
      -- Floating point fused multiply-add operations @ ± x*y ± z@
      MO_FMA FMASign
var Int
l Width
w
        | Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
* Width -> Int
widthInBits Width
w Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
256
        -> String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"Please use -fllvm for wide vector FMA support"
        | Bool
otherwise
        -> Int
-> Width
-> FMASign
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM Register
genFMA3Code Int
l Width
w FMASign
var CmmExpr
x CmmExpr
y CmmExpr
z

      -- Ternary vector operations
      MO_VF_Insert Int
l Width
W32  | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
4 -> Bool -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
vector_floatx4_insert_sse Bool
sse4_1 CmmExpr
x CmmExpr
y CmmExpr
z
                          | Bool
otherwise ->
         String -> NatM Register
forall a. HasCallStack => String -> a
sorry (String -> NatM Register) -> String -> NatM Register
forall a b. (a -> b) -> a -> b
$ String
"FloatX" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
l String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"# insert operations require -fllvm"
           -- SIMD NCG TODO:
           --
           --   - add support for FloatX8, FloatX16.
      MO_VF_Insert Int
l Width
W64  -> Bool -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
vector_double_insert Bool
avx Int
l CmmExpr
x CmmExpr
y CmmExpr
z
      MO_V_Insert Int
l Width
W64   -> HasCallStack =>
Int -> Width -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
Int -> Width -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
vector_int_insert_sse Int
l Width
W64 CmmExpr
x CmmExpr
y CmmExpr
z

      MachOp
_other -> String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"getRegister(x86) - ternary CmmMachOp (1)"
                  (MachOp -> SDoc
pprMachOp MachOp
mop)

  where
    -- SIMD NCG TODO:
    --
    --   - add support for FloatX8, FloatX16.
    vector_floatx4_insert_sse :: Bool
                              -> CmmExpr
                              -> CmmExpr
                              -> CmmExpr
                              -> NatM Register
    vector_floatx4_insert_sse :: Bool -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
vector_floatx4_insert_sse Bool
sse4_1 CmmExpr
vecExpr CmmExpr
valExpr (CmmLit (CmmInt Integer
offset Width
_))
      | Bool
sse4_1 = do
        (r, exp)    <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
valExpr
        fn          <- getAnyReg vecExpr
        let fmt      = Int -> ScalarFormat -> Format
VecFormat Int
4 ScalarFormat
FmtFloat
            imm      = CmmLit -> Imm
litToImm (Integer -> Width -> CmmLit
CmmInt (Integer
offset Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
4) Width
W32)
            code Reg
dst = InstrBlock
exp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                      (Reg -> InstrBlock
fn Reg
dst) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                      (Format -> Imm -> Operand -> Reg -> Instr
INSERTPS Format
fmt Imm
imm (Reg -> Operand
OpReg Reg
r) Reg
dst)
         in return $ Any fmt code
      | Bool
otherwise = do -- SSE <= 3
        (r, exp)    <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
valExpr
        fn          <- getAnyReg vecExpr
        let fmt      = Int -> ScalarFormat -> Format
VecFormat Int
4 ScalarFormat
FmtFloat
        tmp <- getNewRegNat fmt
        let code Reg
dst
              = case Integer
offset of
                  Integer
0 -> InstrBlock
exp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                      (Reg -> InstrBlock
fn Reg
dst) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                      -- The following MOV compiles to MOVSS instruction and merges two vectors
                      (Format -> Operand -> Operand -> Instr
MOV Format
fmt (Reg -> Operand
OpReg Reg
r) (Reg -> Operand
OpReg Reg
dst))  -- dst <- (r[0],dst[1],dst[2],dst[3])
                  Integer
1 -> InstrBlock
exp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                      (Reg -> InstrBlock
fn Reg
dst) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                      (Format -> Operand -> Operand -> Instr
MOVU Format
fmt (Reg -> Operand
OpReg Reg
dst) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`  -- tmp <- dst
                      (Format -> Operand -> Reg -> Instr
UNPCKL Format
fmt (Reg -> Operand
OpReg Reg
r) Reg
dst) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`          -- dst <- (dst[0],r[0],dst[1],r[1])
                      (Format -> Imm -> Operand -> Reg -> Instr
SHUF Format
fmt (Int -> Imm
ImmInt Int
0xe4) (Reg -> Operand
OpReg Reg
tmp) Reg
dst)     -- dst <- (dst[0],dst[1],tmp[2],tmp[3])
                  Integer
2 -> InstrBlock
exp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                       (Reg -> InstrBlock
fn Reg
dst) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                       (Format -> Operand -> Operand -> Instr
MOVU Format
fmt (Reg -> Operand
OpReg Reg
dst) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`  -- tmp <- dst
                       (Format -> Operand -> Operand -> Instr
MOV Format
fmt (Reg -> Operand
OpReg Reg
r) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`     -- tmp <- (r[0],tmp[1],tmp[2],tmp[3]) with MOVSS
                       (Format -> Imm -> Operand -> Reg -> Instr
SHUF Format
fmt (Int -> Imm
ImmInt Int
0xc4) (Reg -> Operand
OpReg Reg
tmp) Reg
dst)     -- dst <- (dst[0],dst[1],tmp[0],tmp[3])
                  Integer
3 -> InstrBlock
exp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                       (Reg -> InstrBlock
fn Reg
dst) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                       (Format -> Operand -> Operand -> Instr
MOVU Format
fmt (Reg -> Operand
OpReg Reg
dst) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`  -- tmp <- dst
                       (Format -> Operand -> Operand -> Instr
MOV Format
fmt (Reg -> Operand
OpReg Reg
r) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`     -- tmp <- (r[0],tmp[1],tmp[2],tmp[3]) with MOVSS
                       (Format -> Imm -> Operand -> Reg -> Instr
SHUF Format
fmt (Int -> Imm
ImmInt Int
0x24) (Reg -> Operand
OpReg Reg
tmp) Reg
dst)     -- dst <- (dst[0],dst[1],tmp[2],tmp[0])
                  Integer
_ -> String -> InstrBlock
forall a. HasCallStack => String -> a
panic String
"MO_VF_Insert FloatX4: unsupported offset"
         in return $ Any fmt code
    vector_floatx4_insert_sse Bool
_ CmmExpr
_ CmmExpr
_ CmmExpr
offset
      = String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported vector insert operation" (SDoc -> NatM Register) -> SDoc -> NatM Register
forall a b. (a -> b) -> a -> b
$
          [SDoc] -> SDoc
forall doc. IsDoc doc => [doc] -> doc
vcat
            [ String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"FloatX4#"
            , String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"offset:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
offset ]


    -- SIMD NCG TODO:
    --
    --   - add support for DoubleX4#, DoubleX8#.
    vector_double_insert :: Bool
                         -> Length
                         -> CmmExpr
                         -> CmmExpr
                         -> CmmExpr
                         -> NatM Register
    -- DoubleX2
    vector_double_insert :: Bool -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
vector_double_insert Bool
avx len :: Int
len@Int
2 CmmExpr
vecExpr CmmExpr
valExpr (CmmLit CmmLit
offset)
      = do
        (valReg, valExp) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
valExpr
        (vecReg, vecExp) <- getSomeReg vecExpr -- NB: vector regs never clobbered by instruction
        let movu = if Bool
avx then Format -> Operand -> Operand -> Instr
VMOVU else Format -> Operand -> Operand -> Instr
MOVU
            fmt = Int -> ScalarFormat -> Format
VecFormat Int
len ScalarFormat
FmtDouble
            code Reg
dst
              = case CmmLit
offset of
                  CmmInt Integer
0 Width
_ -> InstrBlock
valExp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                                InstrBlock
vecExp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                (Format -> Operand -> Operand -> Instr
movu (Int -> ScalarFormat -> Format
VecFormat Int
2 ScalarFormat
FmtDouble) (Reg -> Operand
OpReg Reg
vecReg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                -- The following MOV compiles to MOVSD instruction and merges two vectors
                                (Format -> Operand -> Operand -> Instr
MOV (Int -> ScalarFormat -> Format
VecFormat Int
2 ScalarFormat
FmtDouble) (Reg -> Operand
OpReg Reg
valReg) (Reg -> Operand
OpReg Reg
dst))
                  CmmInt Integer
1 Width
_ -> InstrBlock
valExp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                                InstrBlock
vecExp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                (Format -> Operand -> Operand -> Instr
movu (Int -> ScalarFormat -> Format
VecFormat Int
2 ScalarFormat
FmtDouble) (Reg -> Operand
OpReg Reg
vecReg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                (Format -> Imm -> Operand -> Reg -> Instr
SHUF Format
fmt (Int -> Imm
ImmInt Int
0b00) (Reg -> Operand
OpReg Reg
valReg) Reg
dst)
                  CmmLit
_ -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"MO_VF_Insert DoubleX2: unsupported offset" (CmmLit -> SDoc
forall a. Outputable a => a -> SDoc
ppr CmmLit
offset)
         in return $ Any fmt code
    vector_double_insert Bool
_ Int
_ CmmExpr
_ CmmExpr
_ CmmExpr
_ =
      String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"Unsupported floating-point vector insert operation; please use -fllvm"
    -- For DoubleX4: use VSHUFPD.
    -- For DoubleX8: use something like vinsertf64x2 followed by vpblendd?

    -- SIMD NCG TODO:
    --
    --   - only supports Int64X2, add support for everything else:
    --     (Int32X{4,2}, Int16X{8,4,2}, Int8X{16,8,4,2})
    vector_int_insert_sse :: HasCallStack => Length
                          -> Width
                          -> CmmExpr
                          -> CmmExpr
                          -> CmmExpr
                          -> NatM Register
    -- Int64X2
    vector_int_insert_sse :: HasCallStack =>
Int -> Width -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
vector_int_insert_sse len :: Int
len@Int
2 Width
W64 CmmExpr
vecExpr CmmExpr
valExpr (CmmLit CmmLit
offset)
      = do
        (valReg, valExp) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
valExpr
        (vecReg, vecExp) <- getSomeReg vecExpr -- NB: vector regs never clobbered by instruction
        let fmt = Int -> ScalarFormat -> Format
VecFormat Int
len ScalarFormat
FmtInt64
        tmp <- getNewRegNat fmt
        let code Reg
dst
              = case CmmLit
offset of
                  CmmInt Integer
0 Width
_ -> InstrBlock
valExp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                                InstrBlock
vecExp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                (Format -> Reg -> Reg -> Instr
MOVHLPS Format
fmt Reg
vecReg Reg
tmp) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                (Format -> Operand -> Operand -> Instr
MOVD Format
II64 (Reg -> Operand
OpReg Reg
valReg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                (Format -> Operand -> Reg -> Instr
PUNPCKLQDQ Format
fmt (Reg -> Operand
OpReg Reg
tmp) Reg
dst)
                  CmmInt Integer
1 Width
_ -> InstrBlock
valExp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                                InstrBlock
vecExp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                (Format -> Operand -> Operand -> Instr
MOV Format
II64 (Reg -> Operand
OpReg Reg
vecReg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                (Format -> Operand -> Operand -> Instr
MOVD Format
II64 (Reg -> Operand
OpReg Reg
valReg) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                                (Format -> Operand -> Reg -> Instr
PUNPCKLQDQ Format
fmt (Reg -> Operand
OpReg Reg
tmp) Reg
dst)
                  CmmLit
_ -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"MO_V_Insert Int64X2: unsupported offset" (CmmLit -> SDoc
forall a. Outputable a => a -> SDoc
ppr CmmLit
offset)
         in return $ Any fmt code
    vector_int_insert_sse Int
_ Width
_ CmmExpr
_ CmmExpr
_ CmmExpr
_ =
      String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"Unsupported integer vector insert operation; please use -fllvm"

getRegister' Platform
_ Bool
_ (CmmMachOp MachOp
mop (CmmExpr
_:CmmExpr
_:CmmExpr
_:CmmExpr
_:[CmmExpr]
_)) =
  String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"getRegister(x86): MachOp with >= 4 arguments" (String -> SDoc
forall doc. IsLine doc => String -> doc
text (String -> SDoc) -> String -> SDoc
forall a b. (a -> b) -> a -> b
$ MachOp -> String
forall a. Show a => a -> String
show MachOp
mop)

getRegister' Platform
platform Bool
is32Bit load :: CmmExpr
load@(CmmLoad CmmExpr
mem CmmType
ty AlignmentSpec
_)
  | CmmType -> Bool
isVecType CmmType
ty
  = do
    config <- NatM NCGConfig
getConfig
    Amode addr mem_code <- getAmode mem
    let code Reg
dst =
          InstrBlock
mem_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
            HasDebugCallStack =>
NCGConfig -> Format -> Operand -> Operand -> Instr
NCGConfig -> Format -> Operand -> Operand -> Instr
movInstr NCGConfig
config Format
format (AddrMode -> Operand
OpAddr AddrMode
addr) (Reg -> Operand
OpReg Reg
dst)
    return (Any format code)
  | CmmType -> Bool
isFloatType CmmType
ty
  = do
    Amode addr mem_code <- CmmExpr -> NatM Amode
getAmode CmmExpr
mem
    loadAmode (floatFormat width) addr mem_code

  | Bool
is32Bit Bool -> Bool -> Bool
&& Bool -> Bool
not (CmmType -> Bool
isWord64 CmmType
ty)
  = do
    let
      instr :: Operand -> Operand -> Instr
instr = case Width
width of
                Width
W8     -> Format -> Operand -> Operand -> Instr
MOVZxL Format
II8
                  -- We always zero-extend 8-bit loads, if we
                  -- can't think of anything better.  This is because
                  -- we can't guarantee access to an 8-bit variant of every register
                  -- (esi and edi don't have 8-bit variants), so to make things
                  -- simpler we do our 8-bit arithmetic with full 32-bit registers.
                Width
_other -> Format -> Operand -> Operand -> Instr
MOV Format
format
    code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode Operand -> Operand -> Instr
instr CmmExpr
mem
    return (Any format code)

  | Bool -> Bool
not Bool
is32Bit
  -- Simpler memory load code on x86_64
  = do
    code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOV Format
format) CmmExpr
mem
    return (Any format code)

  | Bool
otherwise
  = String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"getRegister(x86) CmmLoad" (Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
load)
  where
    format :: Format
format = CmmType -> Format
cmmTypeFormat CmmType
ty
    width :: Width
width = CmmType -> Width
typeWidth CmmType
ty

-- Handle symbol references with LEA and %rip-relative addressing.
-- See Note [%rip-relative addressing on x86-64].
getRegister' Platform
platform Bool
is32Bit (CmmLit CmmLit
lit)
  | CmmLit -> Bool
is_label CmmLit
lit
  , Bool -> Bool
not Bool
is32Bit
  = do let format :: Format
format = CmmType -> Format
cmmTypeFormat (Platform -> CmmLit -> CmmType
cmmLitType Platform
platform CmmLit
lit)
           imm :: Imm
imm = CmmLit -> Imm
litToImm CmmLit
lit
           op :: Operand
op = AddrMode -> Operand
OpAddr (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex EABase
EABaseRip EAIndex
EAIndexNone Imm
imm)
           code :: Reg -> InstrBlock
code Reg
dst = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
LEA Format
format Operand
op (Reg -> Operand
OpReg Reg
dst))
       Register -> NatM Register
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Format -> (Reg -> InstrBlock) -> Register
Any Format
format Reg -> InstrBlock
code)
  where
    is_label :: CmmLit -> Bool
is_label (CmmLabel {})        = Bool
True
    is_label (CmmLabelOff {})     = Bool
True
    is_label (CmmLabelDiffOff {}) = Bool
True
    is_label CmmLit
_                    = Bool
False

getRegister' Platform
platform Bool
is32Bit (CmmLit CmmLit
lit) = do
  avx <- NatM Bool
avxEnabled

  -- NB: it is important that the code produced here (to load a literal into
  -- a register) doesn't clobber any registers other than the destination
  -- register; the code for generating C calls relies on this property.
  --
  -- In particular, we have:
  --
  -- > loadIntoRegMightClobberOtherReg (CmmLit _) = False
  --
  -- which means that we assume that loading a literal into a register
  -- will not clobber any other registers.

  -- TODO: this function mishandles floating-point negative zero,
  -- because -0.0 == 0.0 returns True and because we represent CmmFloat as
  -- Rational, which can't properly represent negative zero.

  if
    -- Zero: use XOR.
    | isZeroLit lit
    -> let code Reg
dst
             | Format -> Bool
isIntFormat Format
fmt
             = let fmt' :: Format
fmt'
                     | Bool
is32Bit
                     = Format
fmt
                     | Bool
otherwise
                     -- x86_64: 32-bit xor is one byte shorter,
                     -- and zero-extends to 64 bits
                     = case Format
fmt of
                         Format
II64 -> Format
II32
                         Format
_ -> Format
fmt
               in Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
XOR Format
fmt' (Reg -> Operand
OpReg Reg
dst) (Reg -> Operand
OpReg Reg
dst))
             | Bool
avx
             = if Bool
float_or_floatvec
               then Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Reg -> Reg -> Instr
VXOR Format
fmt (Reg -> Operand
OpReg Reg
dst) Reg
dst Reg
dst)
               else Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Reg -> Reg -> Reg -> Instr
VPXOR Format
fmt Reg
dst Reg
dst Reg
dst)
             | Bool
otherwise
             = if Bool
float_or_floatvec
               then Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
XOR Format
fmt (Reg -> Operand
OpReg Reg
dst) (Reg -> Operand
OpReg Reg
dst))
               else Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Reg -> Instr
PXOR Format
fmt (Reg -> Operand
OpReg Reg
dst) Reg
dst)
       in return $ Any fmt code

    -- Constant vector: use broadcast.
    | VecFormat l sFmt <- fmt
    , CmmVec (f:fs) <- lit
    , all (== f) fs
    -> do let w = ScalarFormat -> Width
scalarWidth ScalarFormat
sFmt
              broadcast = if ScalarFormat -> Bool
isFloatScalarFormat ScalarFormat
sFmt
                          then Int -> Width -> MachOp
MO_VF_Broadcast Int
l Width
w
                          else Int -> Width -> MachOp
MO_V_Broadcast Int
l Width
w
          valCode <- getAnyReg (CmmMachOp broadcast [CmmLit f])
          return $ Any fmt valCode

    -- Optimisation for loading small literals on x86_64: take advantage
    -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
    -- instruction forms are shorter.
    | not is32Bit, isWord64 cmmTy, not (isBigLit lit)
    -> let
          imm = CmmLit -> Imm
litToImm CmmLit
lit
          code Reg
dst = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm Imm
imm) (Reg -> Operand
OpReg Reg
dst))
      in
          return (Any II64 code)

    -- Scalar integer: use an immediate.
    | isIntFormat fmt
    -> let imm = CmmLit -> Imm
litToImm CmmLit
lit
           code Reg
dst = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
fmt (Imm -> Operand
OpImm Imm
imm) (Reg -> Operand
OpReg Reg
dst))
       in return (Any fmt code)

    -- General case: load literal from data address.
    | otherwise
    -> do let w = Format -> Width
formatToWidth Format
fmt
          Amode addr addr_code <- memConstant (mkAlignment $ widthInBytes w) lit
          loadAmode fmt addr addr_code

    where
      cmmTy :: CmmType
cmmTy = Platform -> CmmLit -> CmmType
cmmLitType Platform
platform CmmLit
lit
      fmt :: Format
fmt = CmmType -> Format
cmmTypeFormat CmmType
cmmTy
      float_or_floatvec :: Bool
float_or_floatvec = Format -> Bool
isFloatOrFloatVecFormat Format
fmt
      isZeroLit :: CmmLit -> Bool
isZeroLit (CmmInt Integer
i Width
_) = Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0
      isZeroLit (CmmFloat Rational
f Width
_) = Rational
f Rational -> Rational -> Bool
forall a. Eq a => a -> a -> Bool
== Rational
0 -- TODO: mishandles negative zero
      isZeroLit (CmmVec [CmmLit]
fs) = (CmmLit -> Bool) -> [CmmLit] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all CmmLit -> Bool
isZeroLit [CmmLit]
fs
      isZeroLit CmmLit
_ = Bool
False

      isBigLit :: CmmLit -> Bool
isBigLit (CmmInt Integer
i Width
_) = Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
< Integer
0 Bool -> Bool -> Bool
|| Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
> Integer
0xffffffff
      isBigLit CmmLit
_ = Bool
False
        -- note1: not the same as (not.is32BitLit), because that checks for
        -- signed literals that fit in 32 bits, but we want unsigned
        -- literals here.
        -- note2: all labels are small, because we're assuming the
        -- small memory model. See Note [%rip-relative addressing on x86-64].

getRegister' Platform
platform Bool
_ slot :: CmmExpr
slot@(CmmStackSlot {}) =
  String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"getRegister(x86) CmmStackSlot" (Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
slot)

intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
   -> NatM (Reg -> InstrBlock)
intLoadCode :: (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode Operand -> Operand -> Instr
instr CmmExpr
mem = do
  Amode src mem_code <- CmmExpr -> NatM Amode
getAmode CmmExpr
mem
  return (\Reg
dst -> InstrBlock
mem_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` Operand -> Operand -> Instr
instr (AddrMode -> Operand
OpAddr AddrMode
src) (Reg -> Operand
OpReg Reg
dst))

-- Compute an expression into *any* register, adding the appropriate
-- move instruction if necessary.
getAnyReg :: HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg :: HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
expr = do
  r <- HasDebugCallStack => CmmExpr -> NatM Register
CmmExpr -> NatM Register
getRegister CmmExpr
expr
  anyReg r

anyReg :: HasDebugCallStack => Register -> NatM (Reg -> InstrBlock)
anyReg :: HasDebugCallStack => Register -> NatM (Reg -> InstrBlock)
anyReg (Any Format
_ Reg -> InstrBlock
code)          = (Reg -> InstrBlock) -> NatM (Reg -> InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return Reg -> InstrBlock
code
anyReg (Fixed Format
rep Reg
reg InstrBlock
fcode) = do
  config <- NatM NCGConfig
getConfig
  return (\Reg
dst -> InstrBlock
fcode InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` HasDebugCallStack => NCGConfig -> Format -> Reg -> Reg -> Instr
NCGConfig -> Format -> Reg -> Reg -> Instr
mkRegRegMoveInstr NCGConfig
config Format
rep Reg
reg Reg
dst)

-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
-- Fixed registers might not be byte-addressable, so we make sure we've
-- got a temporary, inserting an extra reg copy if necessary.
getByteReg :: HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
getByteReg :: HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
getByteReg CmmExpr
expr = do
  config <- NatM NCGConfig
getConfig
  is32Bit <- is32BitPlatform
  if is32Bit
      then do r <- getRegister expr
              case r of
                Any Format
rep Reg -> InstrBlock
code -> do
                    tmp <- Format -> NatM Reg
getNewRegNat Format
rep
                    return (tmp, code tmp)
                Fixed Format
rep Reg
reg InstrBlock
code
                    | Reg -> Bool
isVirtualReg Reg
reg -> (Reg, InstrBlock) -> NatM (Reg, InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Reg
reg,InstrBlock
code)
                    | Bool
otherwise -> do
                        tmp <- Format -> NatM Reg
getNewRegNat Format
rep
                        return (tmp, code `snocOL` mkRegRegMoveInstr config rep reg tmp)
                    -- ToDo: could optimise slightly by checking for
                    -- byte-addressable real registers, but that will
                    -- happen very rarely if at all.
      else getSomeReg expr -- all regs are byte-addressable on x86_64

-- Another variant: this time we want the result in a register that cannot
-- be modified by code to evaluate an arbitrary expression.
getNonClobberedReg :: HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg :: HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
expr = do
  r <- HasDebugCallStack => CmmExpr -> NatM Register
CmmExpr -> NatM Register
getRegister CmmExpr
expr
  config <- getConfig
  let platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config
  case r of
    Any Format
rep Reg -> InstrBlock
code -> do
        tmp <- Format -> NatM Reg
getNewRegNat Format
rep
        return (tmp, code tmp)
    Fixed Format
rep Reg
reg InstrBlock
code
        -- only certain regs can be clobbered
        | Reg
reg Reg -> [Reg] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` Platform -> [Reg]
instrClobberedRegs Platform
platform
        -> do
                tmp <- Format -> NatM Reg
getNewRegNat Format
rep
                return (tmp, code `snocOL` mkRegRegMoveInstr config rep reg tmp)
        | Bool
otherwise ->
                (Reg, InstrBlock) -> NatM (Reg, InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Reg
reg, InstrBlock
code)

--------------------------------------------------------------------------------

-- | Convert a 'CmmExpr' representing a memory address into an 'Amode'.
--
-- An 'Amode' is a datatype representing a valid address form for the target
-- (e.g. "Base + Index + disp" or immediate) and the code to compute it.
getAmode :: CmmExpr -> NatM Amode
getAmode :: CmmExpr -> NatM Amode
getAmode CmmExpr
e = do
   platform <- NatM Platform
getPlatform
   let is32Bit = Platform -> Bool
target32Bit Platform
platform

   case e of
      CmmRegOff CmmReg
r Int
n
         -> CmmExpr -> NatM Amode
getAmode (CmmExpr -> NatM Amode) -> CmmExpr -> NatM Amode
forall a b. (a -> b) -> a -> b
$ CmmReg -> Int -> CmmExpr
mangleIndexTree CmmReg
r Int
n

      CmmMachOp (MO_Add Width
W64) [CmmReg (CmmGlobal (GlobalRegUse GlobalReg
PicBaseReg CmmType
_)), CmmLit CmmLit
displacement]
         | Bool -> Bool
not Bool
is32Bit
         -> Amode -> NatM Amode
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Amode -> NatM Amode) -> Amode -> NatM Amode
forall a b. (a -> b) -> a -> b
$ AddrMode -> InstrBlock -> Amode
Amode (Imm -> AddrMode
ripRel (CmmLit -> Imm
litToImm CmmLit
displacement)) InstrBlock
forall a. OrdList a
nilOL

      -- This is all just ridiculous, since it carefully undoes
      -- what mangleIndexTree has just done.
      CmmMachOp (MO_Sub Width
_rep) [CmmExpr
x, CmmLit lit :: CmmLit
lit@(CmmInt Integer
i Width
_)]
         | Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
lit
         -- assert (rep == II32)???
         -> do
            (x_reg, x_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
x
            let off = Int -> Imm
ImmInt (-(Integer -> Int
forall a. Num a => Integer -> a
fromInteger Integer
i))
            return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)

      CmmMachOp (MO_Add Width
_rep) [CmmExpr
x, CmmLit CmmLit
lit]
         | Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
lit
         -- assert (rep == II32)???
         -> do
            (x_reg, x_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
x
            let off = CmmLit -> Imm
litToImm CmmLit
lit
            return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)

      -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
      -- recognised by the next rule.
      CmmMachOp (MO_Add Width
rep) [a :: CmmExpr
a@(CmmMachOp (MO_Shl Width
_) [CmmExpr]
_), b :: CmmExpr
b@(CmmLit CmmLit
_)]
         -> CmmExpr -> NatM Amode
getAmode (MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp (Width -> MachOp
MO_Add Width
rep) [CmmExpr
b,CmmExpr
a])

      -- Matches: (x + offset) + (y << shift)
      CmmMachOp (MO_Add Width
_) [CmmRegOff CmmReg
x Int
offset, CmmMachOp (MO_Shl Width
_) [CmmExpr
y, CmmLit (CmmInt Integer
shift Width
_)]]
         | Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
1 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
2 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
3
         -> CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode (CmmReg -> CmmExpr
CmmReg CmmReg
x) CmmExpr
y Integer
shift (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
offset)

      CmmMachOp (MO_Add Width
_) [CmmExpr
x, CmmMachOp (MO_Shl Width
_) [CmmExpr
y, CmmLit (CmmInt Integer
shift Width
_)]]
         | Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
1 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
2 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
3
         -> CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode CmmExpr
x CmmExpr
y Integer
shift Integer
0

      CmmMachOp (MO_Add Width
_) [CmmExpr
x, CmmMachOp (MO_Add Width
_) [CmmMachOp (MO_Shl Width
_)
                                                    [CmmExpr
y, CmmLit (CmmInt Integer
shift Width
_)], CmmLit (CmmInt Integer
offset Width
_)]]
         | Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
1 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
2 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
3
         Bool -> Bool -> Bool
&& Integer -> Bool
is32BitInteger Integer
offset
         -> CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode CmmExpr
x CmmExpr
y Integer
shift Integer
offset

      CmmMachOp (MO_Add Width
_) [CmmExpr
x,CmmExpr
y]
         | Bool -> Bool
not (CmmExpr -> Bool
isLit CmmExpr
y) -- we already handle valid literals above.
         -> CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode CmmExpr
x CmmExpr
y Integer
0 Integer
0

      CmmLit lit :: CmmLit
lit@(CmmFloat {})
        -> String -> SDoc -> NatM Amode
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"X86 CodeGen: attempt to use floating-point value as a memory address"
             (CmmLit -> SDoc
forall a. Outputable a => a -> SDoc
ppr CmmLit
lit)

      -- Handle labels with %rip-relative addressing since in general the image
      -- may be loaded anywhere in the 64-bit address space (e.g. on Windows
      -- with high-entropy ASLR). See Note [%rip-relative addressing on x86-64].
      CmmLit CmmLit
lit
         | Bool -> Bool
not Bool
is32Bit
         , CmmLit -> Bool
is_label CmmLit
lit
         -> Amode -> NatM Amode
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (AddrMode -> InstrBlock -> Amode
Amode (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex EABase
EABaseRip EAIndex
EAIndexNone (CmmLit -> Imm
litToImm CmmLit
lit)) InstrBlock
forall a. OrdList a
nilOL)

      CmmLit CmmLit
lit
         | Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
lit
         -> Amode -> NatM Amode
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (AddrMode -> InstrBlock -> Amode
Amode (Imm -> Int -> AddrMode
ImmAddr (CmmLit -> Imm
litToImm CmmLit
lit) Int
0) InstrBlock
forall a. OrdList a
nilOL)

      -- Literal with offsets too big (> 32 bits) fails during the linking phase
      -- (#15570). We already handled valid literals above so we don't have to
      -- test anything here.
      CmmLit (CmmLabelOff CLabel
l Int
off)
         -> CmmExpr -> NatM Amode
getAmode (MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp (Width -> MachOp
MO_Add Width
W64) [ CmmLit -> CmmExpr
CmmLit (CLabel -> CmmLit
CmmLabel CLabel
l)
                                             , CmmLit -> CmmExpr
CmmLit (Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
off) Width
W64)
                                             ])
      CmmLit (CmmLabelDiffOff CLabel
l1 CLabel
l2 Int
off Width
w)
         -> CmmExpr -> NatM Amode
getAmode (MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp (Width -> MachOp
MO_Add Width
W64) [ CmmLit -> CmmExpr
CmmLit (CLabel -> CLabel -> Int -> Width -> CmmLit
CmmLabelDiffOff CLabel
l1 CLabel
l2 Int
0 Width
w)
                                             , CmmLit -> CmmExpr
CmmLit (Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
off) Width
W64)
                                             ])

      -- in case we can't do something better, we just compute the expression
      -- and put the result in a register
      CmmExpr
_ -> do
        (reg,code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
e
        return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
  where
    is_label :: CmmLit -> Bool
is_label (CmmLabel{}) = Bool
True
    is_label (CmmLabelOff{}) = Bool
True
    is_label (CmmLabelDiffOff{}) = Bool
True
    is_label CmmLit
_ = Bool
False


-- | Like 'getAmode', but on 32-bit use simple register addressing
-- (i.e. no index register). This stops us from running out of
-- registers on x86 when using instructions such as cmpxchg, which can
-- use up to three virtual registers and one fixed register.
getSimpleAmode :: CmmExpr -> NatM Amode
getSimpleAmode :: CmmExpr -> NatM Amode
getSimpleAmode CmmExpr
addr = NatM Bool
is32BitPlatform NatM Bool -> (Bool -> NatM Amode) -> NatM Amode
forall a b. NatM a -> (a -> NatM b) -> NatM b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
  Bool
False -> CmmExpr -> NatM Amode
getAmode CmmExpr
addr
  Bool
True  -> do
    addr_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
addr
    config <- getConfig
    addr_r <- getNewRegNat (intFormat (ncgWordWidth config))
    let amode = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
addr_r) EAIndex
EAIndexNone (Int -> Imm
ImmInt Int
0)
    return $! Amode amode (addr_code addr_r)

x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode CmmExpr
base CmmExpr
index Integer
shift Integer
offset
  = do (x_reg, x_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
base
        -- x must be in a temp, because it has to stay live over y_code
        -- we could compare x_reg and y_reg and do something better here...
       (y_reg, y_code) <- getSomeReg index
       let
           code = InstrBlock
x_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
y_code
           base = case Integer
shift of Integer
0 -> Int
1; Integer
1 -> Int
2; Integer
2 -> Int
4; Integer
3 -> Int
8;
                                Integer
n -> String -> Int
forall a. HasCallStack => String -> a
panic (String -> Int) -> String -> Int
forall a b. (a -> b) -> a -> b
$ String
"x86_complex_amode: unhandled shift! (" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Integer -> String
forall a. Show a => a -> String
show Integer
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")"
       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
               code)




-- -----------------------------------------------------------------------------
-- getOperand: sometimes any operand will do.

-- getNonClobberedOperand: the value of the operand will remain valid across
-- the computation of an arbitrary expression, unless the expression
-- is computed directly into a register which the operand refers to
-- (see trivialCode where this function is used for an example).

getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand (CmmLit CmmLit
lit) =
  if CmmLit -> Bool
isSuitableFloatingPointLit CmmLit
lit
  then do
    let CmmFloat Rational
_ Width
w = CmmLit
lit
    Amode addr code <- Alignment -> CmmLit -> NatM Amode
memConstant (Int -> Alignment
mkAlignment (Int -> Alignment) -> Int -> Alignment
forall a b. (a -> b) -> a -> b
$ Width -> Int
widthInBytes Width
w) CmmLit
lit
    return (OpAddr addr, code)
  else do
    platform <- NatM Platform
getPlatform
    if is32BitLit platform lit && isIntFormat (cmmTypeFormat (cmmLitType platform lit))
    then return (OpImm (litToImm lit), nilOL)
    else getNonClobberedOperand_generic (CmmLit lit)

getNonClobberedOperand (CmmLoad CmmExpr
mem CmmType
ty AlignmentSpec
_) = do
  is32Bit <- NatM Bool
is32BitPlatform
  -- this logic could be simplified
  -- TODO FIXME
  if   (if is32Bit then not (isWord64 ty) else True)
      -- if 32bit and ty is at float/double/simd value
      -- or if 64bit
      --  this could use some eyeballs or i'll need to stare at it more later
    then do
      platform <- ncgPlatform <$> getConfig
      Amode src mem_code <- getAmode mem
      (src',save_code) <-
        if (amodeCouldBeClobbered platform src)
                then do
                   tmp <- getNewRegNat (archWordFormat is32Bit)
                   return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
                           unitOL (LEA (archWordFormat is32Bit)
                                       (OpAddr src)
                                       (OpReg tmp)))
                else
                   return (src, nilOL)
      return (OpAddr src', mem_code `appOL` save_code)
    else
      -- if its a word or gcptr on 32bit?
      getNonClobberedOperand_generic (CmmLoad mem ty NaturallyAligned)

getNonClobberedOperand CmmExpr
e = CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand_generic CmmExpr
e

getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand_generic CmmExpr
e = do
  (reg, code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
e
  return (OpReg reg, code)

amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
amodeCouldBeClobbered Platform
platform AddrMode
amode = (Reg -> Bool) -> [Reg] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Platform -> Reg -> Bool
regClobbered Platform
platform) (AddrMode -> [Reg]
addrModeRegs AddrMode
amode)

regClobbered :: Platform -> Reg -> Bool
regClobbered :: Platform -> Reg -> Bool
regClobbered Platform
platform (RegReal (RealRegSingle Int
rr)) = Platform -> Int -> Bool
freeReg Platform
platform Int
rr
regClobbered Platform
_ Reg
_ = Bool
False

-- getOperand: the operand is not required to remain valid across the
-- computation of an arbitrary expression.
getOperand :: CmmExpr -> NatM (Operand, InstrBlock)

getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
getOperand (CmmLit CmmLit
lit) = do
  if CmmLit -> Bool
isSuitableFloatingPointLit CmmLit
lit
    then do
      let CmmFloat Rational
_ Width
w = CmmLit
lit
      Amode addr code <- Alignment -> CmmLit -> NatM Amode
memConstant (Int -> Alignment
mkAlignment (Int -> Alignment) -> Int -> Alignment
forall a b. (a -> b) -> a -> b
$ Width -> Int
widthInBytes Width
w) CmmLit
lit
      return (OpAddr addr, code)
    else do

  platform <- NatM Platform
getPlatform
  if is32BitLit platform lit && (isIntFormat $ cmmTypeFormat (cmmLitType platform lit))
    then return (OpImm (litToImm lit), nilOL)
    else getOperand_generic (CmmLit lit)

getOperand (CmmLoad CmmExpr
mem CmmType
ty AlignmentSpec
_) = do
  is32Bit <- NatM Bool
is32BitPlatform
  if isIntFormat (cmmTypeFormat ty) && (if is32Bit then not (isWord64 ty) else True)
     then do
       Amode src mem_code <- getAmode mem
       return (OpAddr src, mem_code)
     else
       getOperand_generic (CmmLoad mem ty NaturallyAligned)

getOperand CmmExpr
e = CmmExpr -> NatM (Operand, InstrBlock)
getOperand_generic CmmExpr
e

getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
getOperand_generic CmmExpr
e = do
    (reg, code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
e
    return (OpReg reg, code)

isOperand :: Platform -> CmmExpr -> Bool
isOperand :: Platform -> CmmExpr -> Bool
isOperand Platform
_ (CmmLoad CmmExpr
_ CmmType
_ AlignmentSpec
_) = Bool
True
isOperand Platform
platform (CmmLit CmmLit
lit)
                          = Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
lit
                          Bool -> Bool -> Bool
|| CmmLit -> Bool
isSuitableFloatingPointLit CmmLit
lit
isOperand Platform
_ CmmExpr
_            = Bool
False

-- | Given a 'Register', produce a new 'Register' with an instruction block
-- which will check the value for alignment. Used for @-falignment-sanitisation@.
addAlignmentCheck :: Int -> Register -> Register
addAlignmentCheck :: Int -> Register -> Register
addAlignmentCheck Int
align Register
reg =
    case Register
reg of
      Fixed Format
fmt Reg
reg InstrBlock
code -> Format -> Reg -> InstrBlock -> Register
Fixed Format
fmt Reg
reg (InstrBlock
code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Format -> Reg -> InstrBlock
check Format
fmt Reg
reg)
      Any Format
fmt Reg -> InstrBlock
f          -> Format -> (Reg -> InstrBlock) -> Register
Any Format
fmt (\Reg
reg -> Reg -> InstrBlock
f Reg
reg InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Format -> Reg -> InstrBlock
check Format
fmt Reg
reg)
  where
    check :: Format -> Reg -> InstrBlock
    check :: Format -> Reg -> InstrBlock
check Format
fmt Reg
reg =
        Bool -> InstrBlock -> InstrBlock
forall a. HasCallStack => Bool -> a -> a
assert (Format -> Bool
isIntFormat Format
fmt) (InstrBlock -> InstrBlock) -> InstrBlock -> InstrBlock
forall a b. (a -> b) -> a -> b
$
        [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
TEST Format
fmt (Imm -> Operand
OpImm (Imm -> Operand) -> Imm -> Operand
forall a b. (a -> b) -> a -> b
$ Int -> Imm
ImmInt (Int -> Imm) -> Int -> Imm
forall a b. (a -> b) -> a -> b
$ Int
alignInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) (Reg -> Operand
OpReg Reg
reg)
             , Cond -> Imm -> Instr
JXX_GBL Cond
NE (Imm -> Instr) -> Imm -> Instr
forall a b. (a -> b) -> a -> b
$ CLabel -> Imm
ImmCLbl CLabel
mkBadAlignmentLabel
             ]

memConstant :: Alignment -> CmmLit -> NatM Amode
memConstant :: Alignment -> CmmLit -> NatM Amode
memConstant Alignment
align CmmLit
lit = do
  lbl <- NatM CLabel
getNewLabelNat
  let rosection = SectionType -> CLabel -> Section
Section SectionType
ReadOnlyData CLabel
lbl
  config <- getConfig
  platform <- getPlatform
  (addr, addr_code) <- if target32Bit platform
                       then do dynRef <- cmmMakeDynamicReference
                                             config
                                             DataReference
                                             lbl
                               Amode addr addr_code <- getAmode dynRef
                               return (addr, addr_code)
                       else return (ripRel (ImmCLbl lbl), nilOL)
  let code =
        Section -> (Alignment, RawCmmStatics) -> Instr
LDATA Section
rosection (Alignment
align, CLabel -> [CmmStatic] -> RawCmmStatics
forall (rawOnly :: Bool).
CLabel -> [CmmStatic] -> GenCmmStatics rawOnly
CmmStaticsRaw CLabel
lbl [CmmLit -> CmmStatic
CmmStaticLit CmmLit
lit])
        Instr -> InstrBlock -> InstrBlock
forall a. a -> OrdList a -> OrdList a
`consOL` InstrBlock
addr_code
  return (Amode addr code)

-- | Load the value at the given address into any register.
loadAmode :: Format -> AddrMode -> InstrBlock -> NatM Register
loadAmode :: Format -> AddrMode -> InstrBlock -> NatM Register
loadAmode Format
fmt AddrMode
addr InstrBlock
addr_code = do
  config <- NatM NCGConfig
getConfig
  let load Reg
dst = HasDebugCallStack =>
NCGConfig -> Format -> Operand -> Operand -> Instr
NCGConfig -> Format -> Operand -> Operand -> Instr
movInstr NCGConfig
config Format
fmt (AddrMode -> Operand
OpAddr AddrMode
addr) (Reg -> Operand
OpReg Reg
dst)
  return $ Any fmt (\ Reg
dst -> InstrBlock
addr_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` Reg -> Instr
load Reg
dst)

-- if we want a floating-point literal as an operand, we can
-- use it directly from memory.  However, if the literal is
-- zero, we're better off generating it into a register using
-- xor.
isSuitableFloatingPointLit :: CmmLit -> Bool
isSuitableFloatingPointLit :: CmmLit -> Bool
isSuitableFloatingPointLit (CmmFloat Rational
f Width
_) = Rational
f Rational -> Rational -> Bool
forall a. Eq a => a -> a -> Bool
/= Rational
0.0
isSuitableFloatingPointLit CmmLit
_ = Bool
False

getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem e :: CmmExpr
e@(CmmLoad CmmExpr
mem CmmType
ty AlignmentSpec
_) = do
  is32Bit <- NatM Bool
is32BitPlatform
  if isIntFormat (cmmTypeFormat ty) && (if is32Bit then not (isWord64 ty) else True)
     then do
       Amode src mem_code <- getAmode mem
       return (OpAddr src, mem_code)
     else do
       (reg, code) <- getNonClobberedReg e
       return (OpReg reg, code)
getRegOrMem CmmExpr
e = do
    (reg, code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
e
    return (OpReg reg, code)

is32BitLit :: Platform -> CmmLit -> Bool
is32BitLit :: Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
_lit
   | Platform -> Bool
target32Bit Platform
platform = Bool
True
is32BitLit Platform
platform CmmLit
lit =
   case CmmLit
lit of
      CmmInt Integer
i Width
W64              -> Integer -> Bool
is32BitInteger Integer
i
      -- Except on Windows, assume that labels are in the range 0-2^31-1: this
      -- assumes the small memory model. Note [%rip-relative addressing on
      -- x86-64].
      CmmLabel CLabel
_                -> Bool
low_image
      -- however we can't assume that label offsets are in this range
      -- (see #15570)
      CmmLabelOff CLabel
_ Int
off         -> Bool
low_image Bool -> Bool -> Bool
&& Integer -> Bool
is32BitInteger (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
off)
      CmmLabelDiffOff CLabel
_ CLabel
_ Int
off Width
_ -> Bool
low_image Bool -> Bool -> Bool
&& Integer -> Bool
is32BitInteger (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
off)
      CmmLit
_                         -> Bool
True
  where
    -- Is the executable image certain to be located below 4GB? As noted in
    -- Note [%rip-relative addressing on x86-64], this is not true on Windows.
    low_image :: Bool
low_image =
      case Platform -> OS
platformOS Platform
platform of
        OS
OSMinGW32 -> Bool
False   -- See Note [%rip-relative addressing on x86-64]
        OS
_         -> Bool
True


-- Set up a condition code for a conditional branch.

getCondCode :: CmmExpr -> NatM CondCode

-- yes, they really do seem to want exactly the same!

getCondCode :: CmmExpr -> NatM CondCode
getCondCode (CmmMachOp MachOp
mop [CmmExpr
x, CmmExpr
y])
  =
    case MachOp
mop of
      MO_F_Eq Width
W32 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
EQQ CmmExpr
x CmmExpr
y
      MO_F_Ne Width
W32 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
NE  CmmExpr
x CmmExpr
y
      MO_F_Gt Width
W32 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GTT CmmExpr
x CmmExpr
y
      MO_F_Ge Width
W32 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GE  CmmExpr
x CmmExpr
y
      -- Invert comparison condition and swap operands
      -- See Note [SSE Parity Checks]
      MO_F_Lt Width
W32 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GTT  CmmExpr
y CmmExpr
x
      MO_F_Le Width
W32 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GE   CmmExpr
y CmmExpr
x

      MO_F_Eq Width
W64 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
EQQ CmmExpr
x CmmExpr
y
      MO_F_Ne Width
W64 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
NE  CmmExpr
x CmmExpr
y
      MO_F_Gt Width
W64 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GTT CmmExpr
x CmmExpr
y
      MO_F_Ge Width
W64 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GE  CmmExpr
x CmmExpr
y
      MO_F_Lt Width
W64 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GTT CmmExpr
y CmmExpr
x
      MO_F_Le Width
W64 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GE  CmmExpr
y CmmExpr
x

      MachOp
_ -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condIntCode (MachOp -> Cond
machOpToCond MachOp
mop) CmmExpr
x CmmExpr
y

getCondCode CmmExpr
other = do
   platform <- NatM Platform
getPlatform
   pprPanic "getCondCode(2)(x86,x86_64)" (pdoc platform other)

machOpToCond :: MachOp -> Cond
machOpToCond :: MachOp -> Cond
machOpToCond MachOp
mo = case MachOp
mo of
  MO_Eq Width
_   -> Cond
EQQ
  MO_Ne Width
_   -> Cond
NE
  MO_S_Gt Width
_ -> Cond
GTT
  MO_S_Ge Width
_ -> Cond
GE
  MO_S_Lt Width
_ -> Cond
LTT
  MO_S_Le Width
_ -> Cond
LE
  MO_U_Gt Width
_ -> Cond
GU
  MO_U_Ge Width
_ -> Cond
GEU
  MO_U_Lt Width
_ -> Cond
LU
  MO_U_Le Width
_ -> Cond
LEU
  MachOp
_other -> String -> SDoc -> Cond
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"machOpToCond" (MachOp -> SDoc
pprMachOp MachOp
mo)

{-  Note [64-bit integer comparisons on 32-bit]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    When doing these comparisons there are 2 kinds of
    comparisons.

    * Comparison for equality (or lack thereof)

    We use xor to check if high/low bits are
    equal. Then combine the results using or.

    * Other comparisons:

    We first compare the low registers
    and use a subtraction with borrow to compare the high registers.

    For signed numbers the condition is determined by
    the sign and overflow flags agreeing or not
    and for unsigned numbers the condition is the carry flag.

-}

-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
-- passed back up the tree.

condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condIntCode Cond
cond CmmExpr
x CmmExpr
y = do platform <- NatM Platform
getPlatform
                          condIntCode' platform cond x y

condIntCode' :: Platform -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode

-- 64-bit integer comparisons on 32-bit
-- See Note [64-bit integer comparisons on 32-bit]
condIntCode' :: Platform -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condIntCode' Platform
platform Cond
cond CmmExpr
x CmmExpr
y
  | Platform -> Bool
target32Bit Platform
platform Bool -> Bool -> Bool
&& CmmType -> Bool
isWord64 (Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
x) = do

  RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
x
  RegCode64 code2 r2hi r2lo <- iselExpr64 y

  -- we mustn't clobber r1/r2 so we use temporaries
  tmp1 <- getNewRegNat II32
  tmp2 <- getNewRegNat II32

  let (cond', cmpCode) = intComparison cond r1hi r1lo r2hi r2lo tmp1 tmp2
  return $ CondCode False cond' (code1 `appOL` code2 `appOL` cmpCode)

  where
    intComparison :: Cond
-> Reg -> Reg -> Reg -> Reg -> Reg -> Reg -> (Cond, InstrBlock)
intComparison Cond
cond Reg
r1_hi Reg
r1_lo Reg
r2_hi Reg
r2_lo Reg
tmp1 Reg
tmp2 =
      case Cond
cond of
        -- These don't occur as argument of condIntCode'
        Cond
ALWAYS  -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
"impossible"
        Cond
NEG     -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
"impossible"
        Cond
POS     -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
"impossible"
        Cond
CARRY   -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
"impossible"
        Cond
OFLO    -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
"impossible"
        Cond
PARITY  -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
"impossible"
        Cond
NOTPARITY -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
"impossible"
        -- Special case #1 x == y and x != y
        Cond
EQQ -> (Cond
EQQ, InstrBlock
cmpExact)
        Cond
NE  -> (Cond
NE, InstrBlock
cmpExact)
        -- [x >= y]
        Cond
GE  -> (Cond
GE, InstrBlock
cmpGE)
        Cond
GEU -> (Cond
GEU, InstrBlock
cmpGE)
        -- [x >  y]
        Cond
GTT -> (Cond
LTT, InstrBlock
cmpLE)
        Cond
GU  -> (Cond
LU, InstrBlock
cmpLE)
        -- [x <= y]
        Cond
LE  -> (Cond
GE, InstrBlock
cmpLE)
        Cond
LEU -> (Cond
GEU, InstrBlock
cmpLE)
        -- [x <  y]
        Cond
LTT -> (Cond
LTT, InstrBlock
cmpGE)
        Cond
LU  -> (Cond
LU, InstrBlock
cmpGE)
      where
        cmpExact :: OrdList Instr
        cmpExact :: InstrBlock
cmpExact =
          [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL
            [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1_hi) (Reg -> Operand
OpReg Reg
tmp1)
            , Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1_lo) (Reg -> Operand
OpReg Reg
tmp2)
            , Format -> Operand -> Operand -> Instr
XOR Format
II32 (Reg -> Operand
OpReg Reg
r2_hi) (Reg -> Operand
OpReg Reg
tmp1)
            , Format -> Operand -> Operand -> Instr
XOR Format
II32 (Reg -> Operand
OpReg Reg
r2_lo) (Reg -> Operand
OpReg Reg
tmp2)
            , Format -> Operand -> Operand -> Instr
OR  Format
II32 (Reg -> Operand
OpReg Reg
tmp1)  (Reg -> Operand
OpReg Reg
tmp2)
            ]
        cmpGE :: InstrBlock
cmpGE = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL
            [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1_hi) (Reg -> Operand
OpReg Reg
tmp1)
            , Format -> Operand -> Operand -> Instr
CMP Format
II32 (Reg -> Operand
OpReg Reg
r2_lo) (Reg -> Operand
OpReg Reg
r1_lo)
            , Format -> Operand -> Operand -> Instr
SBB Format
II32 (Reg -> Operand
OpReg Reg
r2_hi) (Reg -> Operand
OpReg Reg
tmp1)
            ]
        cmpLE :: InstrBlock
cmpLE = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL
            [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r2_hi) (Reg -> Operand
OpReg Reg
tmp1)
            , Format -> Operand -> Operand -> Instr
CMP Format
II32 (Reg -> Operand
OpReg Reg
r1_lo) (Reg -> Operand
OpReg Reg
r2_lo)
            , Format -> Operand -> Operand -> Instr
SBB Format
II32 (Reg -> Operand
OpReg Reg
r1_hi) (Reg -> Operand
OpReg Reg
tmp1)
            ]

-- memory vs immediate
condIntCode' Platform
platform Cond
cond (CmmLoad CmmExpr
x CmmType
ty AlignmentSpec
_) (CmmLit CmmLit
lit)
 | Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
lit = do
    Amode x_addr x_code <- CmmExpr -> NatM Amode
getAmode CmmExpr
x
    let
        imm  = CmmLit -> Imm
litToImm CmmLit
lit
        code = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                  Format -> Operand -> Operand -> Instr
CMP (CmmType -> Format
cmmTypeFormat CmmType
ty) (Imm -> Operand
OpImm Imm
imm) (AddrMode -> Operand
OpAddr AddrMode
x_addr)
    --
    return (CondCode False cond code)

-- anything vs zero, using a mask
-- TODO: Add some sanity checking!!!!
condIntCode' Platform
platform Cond
cond (CmmMachOp (MO_And Width
_) [CmmExpr
x,CmmExpr
o2]) (CmmLit (CmmInt Integer
0 Width
ty))
    | (CmmLit lit :: CmmLit
lit@(CmmInt Integer
mask Width
_)) <- CmmExpr
o2, Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
lit
    = do
      (x_reg, x_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
x
      let
         code = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                Format -> Operand -> Operand -> Instr
TEST (Width -> Format
intFormat Width
ty) (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
mask)) (Reg -> Operand
OpReg Reg
x_reg)
      --
      return (CondCode False cond code)

-- anything vs zero
condIntCode' Platform
_ Cond
cond CmmExpr
x (CmmLit (CmmInt Integer
0 Width
ty)) = do
    (x_reg, x_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
x
    let
        code = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                  Format -> Operand -> Operand -> Instr
TEST (Width -> Format
intFormat Width
ty) (Reg -> Operand
OpReg Reg
x_reg) (Reg -> Operand
OpReg Reg
x_reg)
    --
    return (CondCode False cond code)

-- anything vs operand
condIntCode' Platform
platform Cond
cond CmmExpr
x CmmExpr
y
 | Platform -> CmmExpr -> Bool
isOperand Platform
platform CmmExpr
y = do
    (x_reg, x_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
x
    (y_op,  y_code) <- getOperand y
    let
        code = InstrBlock
x_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
y_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                  Format -> Operand -> Operand -> Instr
CMP (CmmType -> Format
cmmTypeFormat (Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
x)) Operand
y_op (Reg -> Operand
OpReg Reg
x_reg)
    return (CondCode False cond code)
-- operand vs. anything: invert the comparison so that we can use a
-- single comparison instruction.
 | Platform -> CmmExpr -> Bool
isOperand Platform
platform CmmExpr
x
 , Just Cond
revcond <- Cond -> Maybe Cond
maybeFlipCond Cond
cond = do
    (y_reg, y_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
y
    (x_op,  x_code) <- getOperand x
    let
        code = InstrBlock
y_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                  Format -> Operand -> Operand -> Instr
CMP (CmmType -> Format
cmmTypeFormat (Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
x)) Operand
x_op (Reg -> Operand
OpReg Reg
y_reg)
    return (CondCode False revcond code)

-- anything vs anything
condIntCode' Platform
platform Cond
cond CmmExpr
x CmmExpr
y = do
  (y_reg, y_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
y
  (x_op, x_code) <- getRegOrMem x
  let
        code = InstrBlock
y_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
               InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                  Format -> Operand -> Operand -> Instr
CMP (CmmType -> Format
cmmTypeFormat (Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
x)) (Reg -> Operand
OpReg Reg
y_reg) Operand
x_op
  return (CondCode False cond code)



--------------------------------------------------------------------------------
condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode

condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
cond CmmExpr
x CmmExpr
y
  =  NatM CondCode
condFltCode_sse2
  where


  -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
  -- an operand, but the right must be a reg.  We can probably do better
  -- than this general case...
  condFltCode_sse2 :: NatM CondCode
condFltCode_sse2 = do
    platform <- NatM Platform
getPlatform
    (x_reg, x_code) <- getNonClobberedReg x
    (y_op, y_code) <- getOperand y
    let
        code = InstrBlock
x_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
               InstrBlock
y_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                  Format -> Operand -> Operand -> Instr
CMP (Width -> Format
floatFormat (Width -> Format) -> Width -> Format
forall a b. (a -> b) -> a -> b
$ Platform -> CmmExpr -> Width
cmmExprWidth Platform
platform CmmExpr
x) Operand
y_op (Reg -> Operand
OpReg Reg
x_reg)
        -- NB(1): we need to use the unsigned comparison operators on the
        -- result of this comparison.
    return (CondCode True (condToUnsigned cond) code)

-- -----------------------------------------------------------------------------
-- Generating assignments

-- Assignments are really at the heart of the whole code generation
-- business.  Almost all top-level nodes of any real importance are
-- assignments, which correspond to loads, stores, or register
-- transfers.  If we're really lucky, some of the register transfers
-- will go away, because we can use the destination register to
-- complete the code generation for the right hand side.  This only
-- fails when the right hand side is forced into a fixed register
-- (e.g. the result of a call).

assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_IntCode ::           CmmReg  -> CmmExpr -> NatM InstrBlock

assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_FltCode ::           CmmReg  -> CmmExpr -> NatM InstrBlock

assignMem_VecCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_VecCode ::           CmmReg  -> CmmExpr -> NatM InstrBlock

-- integer assignment to memory

-- specific case of adding/subtracting an integer to a particular address.
-- ToDo: catch other cases where we can use an operation directly on a memory
-- address.
assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_IntCode Format
ty CmmExpr
addr (CmmMachOp MachOp
op [CmmLoad CmmExpr
addr2 CmmType
_ AlignmentSpec
_,
                                                 CmmLit (CmmInt Integer
i Width
_)])
   | CmmExpr
addr CmmExpr -> CmmExpr -> Bool
forall a. Eq a => a -> a -> Bool
== CmmExpr
addr2, Format
ty Format -> Format -> Bool
forall a. Eq a => a -> a -> Bool
/= Format
II64 Bool -> Bool -> Bool
|| Integer -> Bool
is32BitInteger Integer
i,
     Just Format -> Operand -> Operand -> Instr
instr <- MachOp -> Maybe (Format -> Operand -> Operand -> Instr)
check MachOp
op
   = do Amode amode code_addr <- CmmExpr -> NatM Amode
getAmode CmmExpr
addr
        let code = InstrBlock
code_addr InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                   Format -> Operand -> Operand -> Instr
instr Format
ty (Imm -> Operand
OpImm (Int -> Imm
ImmInt (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
i))) (AddrMode -> Operand
OpAddr AddrMode
amode)
        return code
   where
        check :: MachOp -> Maybe (Format -> Operand -> Operand -> Instr)
check (MO_Add Width
_) = (Format -> Operand -> Operand -> Instr)
-> Maybe (Format -> Operand -> Operand -> Instr)
forall a. a -> Maybe a
Just Format -> Operand -> Operand -> Instr
ADD
        check (MO_Sub Width
_) = (Format -> Operand -> Operand -> Instr)
-> Maybe (Format -> Operand -> Operand -> Instr)
forall a. a -> Maybe a
Just Format -> Operand -> Operand -> Instr
SUB
        check MachOp
_ = Maybe (Format -> Operand -> Operand -> Instr)
forall a. Maybe a
Nothing
        -- ToDo: more?

-- general case
assignMem_IntCode Format
ty CmmExpr
addr CmmExpr
src = do
    platform <- NatM Platform
getPlatform
    Amode addr code_addr <- getAmode addr
    (code_src, op_src)   <- get_op_RI platform src
    let
        code = InstrBlock
code_src InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
               InstrBlock
code_addr InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                  Format -> Operand -> Operand -> Instr
MOV Format
ty Operand
op_src (AddrMode -> Operand
OpAddr AddrMode
addr)
        -- NOTE: op_src is stable, so it will still be valid
        -- after code_addr.  This may involve the introduction
        -- of an extra MOV to a temporary register, but we hope
        -- the register allocator will get rid of it.
    --
    return code
  where
    get_op_RI :: Platform -> CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
    get_op_RI :: Platform -> CmmExpr -> NatM (InstrBlock, Operand)
get_op_RI Platform
platform (CmmLit CmmLit
lit) | Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
lit
      = (InstrBlock, Operand) -> NatM (InstrBlock, Operand)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock
forall a. OrdList a
nilOL, Imm -> Operand
OpImm (CmmLit -> Imm
litToImm CmmLit
lit))
    get_op_RI Platform
_ CmmExpr
op
      = do (reg,code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
op
           return (code, OpReg reg)


-- Assign; dst is a reg, rhs is mem
assignReg_IntCode :: CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_IntCode CmmReg
reg (CmmLoad CmmExpr
src CmmType
_ AlignmentSpec
_) = do
  let ty :: Format
ty = CmmType -> Format
cmmTypeFormat (CmmType -> Format) -> CmmType -> Format
forall a b. (a -> b) -> a -> b
$ CmmReg -> CmmType
cmmRegType CmmReg
reg
  load_code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOV Format
ty) CmmExpr
src
  platform <- ncgPlatform <$> getConfig
  return (load_code (getRegisterReg platform reg))

-- dst is a reg, but src could be anything
assignReg_IntCode CmmReg
reg CmmExpr
src = do
  platform <- NCGConfig -> Platform
ncgPlatform (NCGConfig -> Platform) -> NatM NCGConfig -> NatM Platform
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NatM NCGConfig
getConfig
  code <- getAnyReg src
  return (code (getRegisterReg platform reg))


-- Floating point assignment to memory
assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_FltCode Format
ty CmmExpr
addr CmmExpr
src = do
  (src_reg, src_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
src
  Amode addr addr_code <- getAmode addr
  let
        code = InstrBlock
src_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
               InstrBlock
addr_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
               Format -> Operand -> Operand -> Instr
MOV Format
ty (Reg -> Operand
OpReg Reg
src_reg) (AddrMode -> Operand
OpAddr AddrMode
addr)

  return code

-- Floating point assignment to a register/temporary
assignReg_FltCode :: CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_FltCode CmmReg
reg CmmExpr
src = do
  src_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
src
  platform <- ncgPlatform <$> getConfig
  return (src_code (getRegisterReg platform reg))

-- Vector assignment to a register/temporary
assignMem_VecCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_VecCode Format
ty CmmExpr
addr CmmExpr
src = do
  (src_reg, src_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
src
  Amode addr addr_code <- getAmode addr
  config <- getConfig
  let
    code = InstrBlock
src_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
           InstrBlock
addr_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
           HasDebugCallStack =>
NCGConfig -> Format -> Operand -> Operand -> Instr
NCGConfig -> Format -> Operand -> Operand -> Instr
movInstr NCGConfig
config Format
ty (Reg -> Operand
OpReg Reg
src_reg) (AddrMode -> Operand
OpAddr AddrMode
addr)
  return code

assignReg_VecCode :: CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_VecCode CmmReg
reg CmmExpr
src = do
  platform <- NCGConfig -> Platform
ncgPlatform (NCGConfig -> Platform) -> NatM NCGConfig -> NatM Platform
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NatM NCGConfig
getConfig
  src_code <- getAnyReg src
  return (src_code (getRegisterReg platform reg))

genJump :: CmmExpr{-the branch target-} -> [RegWithFormat] -> NatM InstrBlock

genJump :: CmmExpr -> [RegWithFormat] -> NatM InstrBlock
genJump (CmmLoad CmmExpr
mem CmmType
_ AlignmentSpec
_) [RegWithFormat]
regs = do
  Amode target code <- CmmExpr -> NatM Amode
getAmode CmmExpr
mem
  return (code `snocOL` JMP (OpAddr target) regs)

genJump (CmmLit CmmLit
lit) [RegWithFormat]
regs =
  InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Operand -> [RegWithFormat] -> Instr
JMP (Imm -> Operand
OpImm (CmmLit -> Imm
litToImm CmmLit
lit)) [RegWithFormat]
regs))

genJump CmmExpr
expr [RegWithFormat]
regs = do
  (reg,code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
expr
  return (code `snocOL` JMP (OpReg reg) regs)


-- -----------------------------------------------------------------------------
--  Unconditional branches

genBranch :: BlockId -> InstrBlock
genBranch :: Label -> InstrBlock
genBranch = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL ([Instr] -> InstrBlock)
-> (Label -> [Instr]) -> Label -> InstrBlock
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Label -> [Instr]
mkJumpInstr



-- -----------------------------------------------------------------------------
--  Conditional jumps/branches

{-
Conditional jumps are always to local labels, so we can use branch
instructions.  We peek at the arguments to decide what kind of
comparison to do.

I386: First, we have to ensure that the condition
codes are set according to the supplied comparison operation.
-}

genCondBranch
    :: BlockId      -- the source of the jump
    -> BlockId      -- the true branch target
    -> BlockId      -- the false branch target
    -> CmmExpr      -- the condition on which to branch
    -> NatM InstrBlock -- Instructions

genCondBranch :: Label -> Label -> Label -> CmmExpr -> NatM InstrBlock
genCondBranch Label
bid Label
id Label
false CmmExpr
expr = do
  is32Bit <- NatM Bool
is32BitPlatform
  genCondBranch' is32Bit bid id false expr

-- | We return the instructions generated.
genCondBranch' :: Bool -> BlockId -> BlockId -> BlockId -> CmmExpr
               -> NatM InstrBlock

genCondBranch' :: Bool -> Label -> Label -> Label -> CmmExpr -> NatM InstrBlock
genCondBranch' Bool
_ Label
bid Label
id Label
false CmmExpr
bool = do
  CondCode is_float cond cond_code <- CmmExpr -> NatM CondCode
getCondCode CmmExpr
bool
  if not is_float
    then
        return (cond_code `snocOL` JXX cond id `appOL` genBranch false)
    else do
        -- See Note [SSE Parity Checks]
        let jmpFalse = Label -> InstrBlock
genBranch Label
false
            code
                = case Cond
cond of
                  Cond
NE  -> InstrBlock
or_unordered
                  Cond
GU  -> InstrBlock
plain_test
                  Cond
GEU -> InstrBlock
plain_test
                  -- Use ASSERT so we don't break releases if
                  -- LTT/LE creep in somehow.
                  Cond
LTT ->
                    Bool -> SDoc -> InstrBlock -> InstrBlock
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr Bool
False (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"Should have been turned into >")
                    InstrBlock
and_ordered
                  Cond
LE  ->
                    Bool -> SDoc -> InstrBlock -> InstrBlock
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr Bool
False (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"Should have been turned into >=")
                    InstrBlock
and_ordered
                  Cond
_   -> InstrBlock
and_ordered

            plain_test = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (
                  Cond -> Label -> Instr
JXX Cond
cond Label
id
                ) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
jmpFalse
            or_unordered = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                  Cond -> Label -> Instr
JXX Cond
cond Label
id,
                  Cond -> Label -> Instr
JXX Cond
PARITY Label
id
                ] InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
jmpFalse
            and_ordered = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                  Cond -> Label -> Instr
JXX Cond
PARITY Label
false,
                  Cond -> Label -> Instr
JXX Cond
cond Label
id,
                  Cond -> Label -> Instr
JXX Cond
ALWAYS Label
false
                ]
        updateCfgNat (\CFG
cfg -> CFG -> (EdgeWeight -> EdgeWeight) -> Label -> Label -> CFG
adjustEdgeWeight CFG
cfg (EdgeWeight -> EdgeWeight -> EdgeWeight
forall a. Num a => a -> a -> a
+EdgeWeight
3) Label
bid Label
false)
        return (cond_code `appOL` code)

{-  Note [Introducing cfg edges inside basic blocks]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    During instruction selection a statement `s`
    in a block B with control of the sort: B -> C
    will sometimes result in control
    flow of the sort:

            ┌ < ┐
            v   ^
      B ->  B1  ┴ -> C

    as is the case for some atomic operations.

    Now to keep the CFG in sync when introducing B1 we clearly
    want to insert it between B and C. However there is
    a catch when we have to deal with self loops.

    We might start with code and a CFG of these forms:

    loop:
        stmt1               ┌ < ┐
        ....                v   ^
        stmtX              loop ┘
        stmtY
        ....
        goto loop:

    Now we introduce B1:
                            ┌ ─ ─ ─ ─ ─┐
        loop:               │   ┌ <  ┐ │
        instrs              v   │    │ ^
        ....               loop ┴ B1 ┴ ┘
        instrsFromX
        stmtY
        goto loop:

    This is simple, all outgoing edges from loop now simply
    start from B1 instead and the code generator knows which
    new edges it introduced for the self loop of B1.

    Disaster strikes if the statement Y follows the same pattern.
    If we apply the same rule that all outgoing edges change then
    we end up with:

        loop ─> B1 ─> B2 ┬─┐
          │      │    └─<┤ │
          │      └───<───┘ │
          └───────<────────┘

    This is problematic. The edge B1->B1 is modified as expected.
    However the modification is wrong!

    The assembly in this case looked like this:

    _loop:
        <instrs>
    _B1:
        ...
        cmpxchgq ...
        jne _B1
        <instrs>
        <end _B1>
    _B2:
        ...
        cmpxchgq ...
        jne _B2
        <instrs>
        jmp loop

    There is no edge _B2 -> _B1 here. It's still a self loop onto _B1.

    The problem here is that really B1 should be two basic blocks.
    Otherwise we have control flow in the *middle* of a basic block.
    A contradiction!

    So to account for this we add yet another basic block marker:

    _B:
        <instrs>
    _B1:
        ...
        cmpxchgq ...
        jne _B1
        jmp _B1'
    _B1':
        <instrs>
        <end _B1>
    _B2:
        ...

    Now when inserting B2 we will only look at the outgoing edges of B1' and
    everything will work out nicely.

    You might also wonder why we don't insert jumps at the end of _B1'. There is
    no way another block ends up jumping to the labels _B1 or _B2 since they are
    essentially invisible to other blocks. View them as control flow labels local
    to the basic block if you'd like.

    Not doing this ultimately caused (part 2 of) #17334.
-}


-- -----------------------------------------------------------------------------
--  Generating C calls

-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
-- @get_arg@, which moves the arguments to the correct registers/stack
-- locations.  Apart from that, the code is easy.
--
-- (If applicable) Do not fill the delay slots here; you will confuse the
-- register allocator.
--
-- See Note [Keeping track of the current block] for information why we need
-- to take/return a block id.

genForeignCall
    :: ForeignTarget -- ^ function to call
    -> [CmmFormal]   -- ^ where to put the result
    -> [CmmActual]   -- ^ arguments (of mixed type)
    -> BlockId       -- ^ The block we are in
    -> NatM (InstrBlock, Maybe BlockId)

genForeignCall :: ForeignTarget
-> [LocalReg]
-> [CmmExpr]
-> Label
-> NatM (InstrBlock, Maybe Label)
genForeignCall ForeignTarget
target [LocalReg]
dst [CmmExpr]
args Label
bid = do
  case ForeignTarget
target of
    PrimTarget CallishMachOp
prim         -> Label
-> CallishMachOp
-> [LocalReg]
-> [CmmExpr]
-> NatM (InstrBlock, Maybe Label)
genPrim Label
bid CallishMachOp
prim [LocalReg]
dst [CmmExpr]
args
    ForeignTarget CmmExpr
addr ForeignConvention
conv -> (,Maybe Label
forall a. Maybe a
Nothing) (InstrBlock -> (InstrBlock, Maybe Label))
-> NatM InstrBlock -> NatM (InstrBlock, Maybe Label)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Label
-> CmmExpr
-> ForeignConvention
-> [LocalReg]
-> [CmmExpr]
-> NatM InstrBlock
genCCall Label
bid CmmExpr
addr ForeignConvention
conv [LocalReg]
dst [CmmExpr]
args

genPrim
    :: BlockId       -- ^ The block we are in
    -> CallishMachOp -- ^ MachOp
    -> [CmmFormal]   -- ^ where to put the result
    -> [CmmActual]   -- ^ arguments (of mixed type)
    -> NatM (InstrBlock, Maybe BlockId)

-- First we deal with cases which might introduce new blocks in the stream.
genPrim :: Label
-> CallishMachOp
-> [LocalReg]
-> [CmmExpr]
-> NatM (InstrBlock, Maybe Label)
genPrim Label
bid (MO_AtomicRMW Width
width AtomicMachOp
amop) [LocalReg
dst] [CmmExpr
addr, CmmExpr
n]
  = Label
-> Width
-> AtomicMachOp
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM (InstrBlock, Maybe Label)
genAtomicRMW Label
bid Width
width AtomicMachOp
amop LocalReg
dst CmmExpr
addr CmmExpr
n
genPrim Label
bid (MO_Ctz Width
width) [LocalReg
dst] [CmmExpr
src]
  = Label
-> Width -> LocalReg -> CmmExpr -> NatM (InstrBlock, Maybe Label)
genCtz Label
bid Width
width LocalReg
dst CmmExpr
src

-- Then we deal with cases which not introducing new blocks in the stream.
genPrim Label
bid CallishMachOp
prim [LocalReg]
dst [CmmExpr]
args
  = (,Maybe Label
forall a. Maybe a
Nothing) (InstrBlock -> (InstrBlock, Maybe Label))
-> NatM InstrBlock -> NatM (InstrBlock, Maybe Label)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Label
-> CallishMachOp -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genSimplePrim Label
bid CallishMachOp
prim [LocalReg]
dst [CmmExpr]
args

genSimplePrim
    :: BlockId       -- ^ the block we are in
    -> CallishMachOp -- ^ MachOp
    -> [CmmFormal]   -- ^ where to put the result
    -> [CmmActual]   -- ^ arguments (of mixed type)
    -> NatM InstrBlock
genSimplePrim :: Label
-> CallishMachOp -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genSimplePrim Label
bid (MO_Memcpy Int
align)    []      [CmmExpr
dst,CmmExpr
src,CmmExpr
n]    = Label -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
genMemCpy  Label
bid Int
align CmmExpr
dst CmmExpr
src CmmExpr
n
genSimplePrim Label
bid (MO_Memmove Int
align)   []      [CmmExpr
dst,CmmExpr
src,CmmExpr
n]    = Label -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
forall p.
Label -> p -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
genMemMove Label
bid Int
align CmmExpr
dst CmmExpr
src CmmExpr
n
genSimplePrim Label
bid (MO_Memcmp Int
align)    [LocalReg
res]   [CmmExpr
dst,CmmExpr
src,CmmExpr
n]    = Label
-> Int
-> LocalReg
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
forall p.
Label
-> p
-> LocalReg
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genMemCmp  Label
bid Int
align LocalReg
res CmmExpr
dst CmmExpr
src CmmExpr
n
genSimplePrim Label
bid (MO_Memset Int
align)    []      [CmmExpr
dst,CmmExpr
c,CmmExpr
n]      = Label -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
genMemSet  Label
bid Int
align CmmExpr
dst CmmExpr
c CmmExpr
n
genSimplePrim Label
_   CallishMachOp
MO_AcquireFence      []      []             = InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a
nilOL -- barriers compile to no code on x86/x86-64;
genSimplePrim Label
_   CallishMachOp
MO_ReleaseFence      []      []             = InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a
nilOL -- we keep it this long in order to prevent earlier optimisations.
genSimplePrim Label
_   CallishMachOp
MO_SeqCstFence       []      []             = InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock -> NatM InstrBlock) -> InstrBlock -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ Instr -> InstrBlock
forall a. a -> OrdList a
unitOL Instr
MFENCE
genSimplePrim Label
_   CallishMachOp
MO_Touch             []      [CmmExpr
_]            = InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a
nilOL
genSimplePrim Label
_   (MO_Prefetch_Data Int
n) []      [CmmExpr
src]          = Int -> CmmExpr -> NatM InstrBlock
genPrefetchData Int
n CmmExpr
src
genSimplePrim Label
_   (MO_BSwap Width
width)     [LocalReg
dst]   [CmmExpr
src]          = Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genByteSwap Width
width LocalReg
dst CmmExpr
src
genSimplePrim Label
bid (MO_BRev Width
width)      [LocalReg
dst]   [CmmExpr
src]          = Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genBitRev Label
bid Width
width LocalReg
dst CmmExpr
src
genSimplePrim Label
bid (MO_PopCnt Width
width)    [LocalReg
dst]   [CmmExpr
src]          = Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genPopCnt Label
bid Width
width LocalReg
dst CmmExpr
src
genSimplePrim Label
bid (MO_Pdep Width
width)      [LocalReg
dst]   [CmmExpr
src,CmmExpr
mask]     = Label -> Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genPdep Label
bid Width
width LocalReg
dst CmmExpr
src CmmExpr
mask
genSimplePrim Label
bid (MO_Pext Width
width)      [LocalReg
dst]   [CmmExpr
src,CmmExpr
mask]     = Label -> Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genPext Label
bid Width
width LocalReg
dst CmmExpr
src CmmExpr
mask
genSimplePrim Label
bid (MO_Clz Width
width)       [LocalReg
dst]   [CmmExpr
src]          = Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genClz Label
bid Width
width LocalReg
dst CmmExpr
src
genSimplePrim Label
bid (MO_UF_Conv Width
width)   [LocalReg
dst]   [CmmExpr
src]          = Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genWordToFloat Label
bid Width
width LocalReg
dst CmmExpr
src
genSimplePrim Label
_   (MO_AtomicRead Width
w MemoryOrdering
mo)  [LocalReg
dst]  [CmmExpr
addr]         = Width -> MemoryOrdering -> LocalReg -> CmmExpr -> NatM InstrBlock
genAtomicRead Width
w MemoryOrdering
mo LocalReg
dst CmmExpr
addr
genSimplePrim Label
_   (MO_AtomicWrite Width
w MemoryOrdering
mo) []     [CmmExpr
addr,CmmExpr
val]     = Width -> MemoryOrdering -> CmmExpr -> CmmExpr -> NatM InstrBlock
genAtomicWrite Width
w MemoryOrdering
mo CmmExpr
addr CmmExpr
val
genSimplePrim Label
bid (MO_Cmpxchg Width
width)   [LocalReg
dst]   [CmmExpr
addr,CmmExpr
old,CmmExpr
new] = Label
-> Width
-> LocalReg
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genCmpXchg Label
bid Width
width LocalReg
dst CmmExpr
addr CmmExpr
old CmmExpr
new
genSimplePrim Label
_   (MO_Xchg Width
width)      [LocalReg
dst]   [CmmExpr
addr, CmmExpr
value]  = Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genXchg Width
width LocalReg
dst CmmExpr
addr CmmExpr
value
genSimplePrim Label
_   (MO_AddWordC Width
w)      [LocalReg
r,LocalReg
c]   [CmmExpr
x,CmmExpr
y]          = Width
-> (Format -> Operand -> Operand -> Instr)
-> (Format -> Maybe (Operand -> Operand -> Instr))
-> Cond
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genAddSubRetCarry Width
w Format -> Operand -> Operand -> Instr
ADD_CC (Maybe (Operand -> Operand -> Instr)
-> Format -> Maybe (Operand -> Operand -> Instr)
forall a b. a -> b -> a
const Maybe (Operand -> Operand -> Instr)
forall a. Maybe a
Nothing) Cond
CARRY LocalReg
r LocalReg
c CmmExpr
x CmmExpr
y
genSimplePrim Label
_   (MO_SubWordC Width
w)      [LocalReg
r,LocalReg
c]   [CmmExpr
x,CmmExpr
y]          = Width
-> (Format -> Operand -> Operand -> Instr)
-> (Format -> Maybe (Operand -> Operand -> Instr))
-> Cond
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genAddSubRetCarry Width
w Format -> Operand -> Operand -> Instr
SUB_CC (Maybe (Operand -> Operand -> Instr)
-> Format -> Maybe (Operand -> Operand -> Instr)
forall a b. a -> b -> a
const Maybe (Operand -> Operand -> Instr)
forall a. Maybe a
Nothing) Cond
CARRY LocalReg
r LocalReg
c CmmExpr
x CmmExpr
y
genSimplePrim Label
_   (MO_AddIntC Width
w)       [LocalReg
r,LocalReg
c]   [CmmExpr
x,CmmExpr
y]          = Width
-> (Format -> Operand -> Operand -> Instr)
-> (Format -> Maybe (Operand -> Operand -> Instr))
-> Cond
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genAddSubRetCarry Width
w Format -> Operand -> Operand -> Instr
ADD_CC ((Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
forall a. a -> Maybe a
Just ((Operand -> Operand -> Instr)
 -> Maybe (Operand -> Operand -> Instr))
-> (Format -> Operand -> Operand -> Instr)
-> Format
-> Maybe (Operand -> Operand -> Instr)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Format -> Operand -> Operand -> Instr
ADD_CC) Cond
OFLO  LocalReg
r LocalReg
c CmmExpr
x CmmExpr
y
genSimplePrim Label
_   (MO_SubIntC Width
w)       [LocalReg
r,LocalReg
c]   [CmmExpr
x,CmmExpr
y]          = Width
-> (Format -> Operand -> Operand -> Instr)
-> (Format -> Maybe (Operand -> Operand -> Instr))
-> Cond
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genAddSubRetCarry Width
w Format -> Operand -> Operand -> Instr
SUB_CC (Maybe (Operand -> Operand -> Instr)
-> Format -> Maybe (Operand -> Operand -> Instr)
forall a b. a -> b -> a
const Maybe (Operand -> Operand -> Instr)
forall a. Maybe a
Nothing) Cond
OFLO  LocalReg
r LocalReg
c CmmExpr
x CmmExpr
y
genSimplePrim Label
_   (MO_Add2 Width
w)          [LocalReg
h,LocalReg
l]   [CmmExpr
x,CmmExpr
y]          = Width
-> LocalReg -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genAddWithCarry Width
w LocalReg
h LocalReg
l CmmExpr
x CmmExpr
y
genSimplePrim Label
_   (MO_U_Mul2 Width
w)        [LocalReg
h,LocalReg
l]   [CmmExpr
x,CmmExpr
y]          = Width
-> LocalReg -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genUnsignedLargeMul Width
w LocalReg
h LocalReg
l CmmExpr
x CmmExpr
y
genSimplePrim Label
_   (MO_S_Mul2 Width
w)        [LocalReg
c,LocalReg
h,LocalReg
l] [CmmExpr
x,CmmExpr
y]          = Width
-> LocalReg
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genSignedLargeMul Width
w LocalReg
c LocalReg
h LocalReg
l CmmExpr
x CmmExpr
y
genSimplePrim Label
_   (MO_S_QuotRem Width
w)     [LocalReg
q,LocalReg
r]   [CmmExpr
x,CmmExpr
y]          = Width
-> Bool
-> LocalReg
-> LocalReg
-> Maybe CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genQuotRem Width
w Bool
True  LocalReg
q LocalReg
r Maybe CmmExpr
forall a. Maybe a
Nothing   CmmExpr
x  CmmExpr
y
genSimplePrim Label
_   (MO_U_QuotRem Width
w)     [LocalReg
q,LocalReg
r]   [CmmExpr
x,CmmExpr
y]          = Width
-> Bool
-> LocalReg
-> LocalReg
-> Maybe CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genQuotRem Width
w Bool
False LocalReg
q LocalReg
r Maybe CmmExpr
forall a. Maybe a
Nothing   CmmExpr
x  CmmExpr
y
genSimplePrim Label
_   (MO_U_QuotRem2 Width
w)    [LocalReg
q,LocalReg
r]   [CmmExpr
hx,CmmExpr
lx,CmmExpr
y]      = Width
-> Bool
-> LocalReg
-> LocalReg
-> Maybe CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genQuotRem Width
w Bool
False LocalReg
q LocalReg
r (CmmExpr -> Maybe CmmExpr
forall a. a -> Maybe a
Just CmmExpr
hx) CmmExpr
lx CmmExpr
y
genSimplePrim Label
_   CallishMachOp
MO_F32_Fabs          [LocalReg
dst]   [CmmExpr
src]          = Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatAbs Width
W32 LocalReg
dst CmmExpr
src
genSimplePrim Label
_   CallishMachOp
MO_F64_Fabs          [LocalReg
dst]   [CmmExpr
src]          = Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatAbs Width
W64 LocalReg
dst CmmExpr
src
genSimplePrim Label
_   CallishMachOp
MO_F32_Sqrt          [LocalReg
dst]   [CmmExpr
src]          = Format -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatSqrt Format
FF32 LocalReg
dst CmmExpr
src
genSimplePrim Label
_   CallishMachOp
MO_F64_Sqrt          [LocalReg
dst]   [CmmExpr
src]          = Format -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatSqrt Format
FF64 LocalReg
dst CmmExpr
src
genSimplePrim Label
bid CallishMachOp
MO_F32_Sin           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"sinf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Cos           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"cosf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Tan           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"tanf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Exp           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"expf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_ExpM1         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"expm1f") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Log           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"logf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Log1P         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"log1pf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Asin          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"asinf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Acos          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"acosf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Atan          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"atanf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Sinh          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"sinhf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Cosh          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"coshf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Tanh          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"tanhf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Pwr           [LocalReg
dst]   [CmmExpr
x,CmmExpr
y]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"powf")  [LocalReg
dst] [CmmExpr
x,CmmExpr
y]
genSimplePrim Label
bid CallishMachOp
MO_F32_Asinh         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"asinhf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Acosh         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"acoshf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F32_Atanh         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"atanhf") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Sin           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"sin") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Cos           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"cos") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Tan           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"tan") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Exp           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"exp") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_ExpM1         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"expm1") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Log           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"log") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Log1P         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"log1p") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Asin          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"asin") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Acos          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"acos") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Atan          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"atan") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Sinh          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"sinh") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Cosh          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"cosh") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Tanh          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"tanh") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Pwr           [LocalReg
dst]   [CmmExpr
x,CmmExpr
y]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"pow")  [LocalReg
dst] [CmmExpr
x,CmmExpr
y]
genSimplePrim Label
bid CallishMachOp
MO_F64_Asinh         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"asinh") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Acosh         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"acosh") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_F64_Atanh         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"atanh") [LocalReg
dst] [CmmExpr
src]
genSimplePrim Label
bid CallishMachOp
MO_SuspendThread     [LocalReg
tok]   [CmmExpr
rs,CmmExpr
i]         = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genRTSCCall Label
bid (String -> FastString
fsLit String
"suspendThread") [LocalReg
tok] [CmmExpr
rs,CmmExpr
i]
genSimplePrim Label
bid CallishMachOp
MO_ResumeThread      [LocalReg
rs]    [CmmExpr
tok]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genRTSCCall Label
bid (String -> FastString
fsLit String
"resumeThread") [LocalReg
rs] [CmmExpr
tok]
genSimplePrim Label
bid CallishMachOp
MO_I64_Quot          [LocalReg
dst]   [CmmExpr
x,CmmExpr
y]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (String -> FastString
fsLit String
"hs_quotInt64") [LocalReg
dst] [CmmExpr
x,CmmExpr
y]
genSimplePrim Label
bid CallishMachOp
MO_I64_Rem           [LocalReg
dst]   [CmmExpr
x,CmmExpr
y]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (String -> FastString
fsLit String
"hs_remInt64") [LocalReg
dst] [CmmExpr
x,CmmExpr
y]
genSimplePrim Label
bid CallishMachOp
MO_W64_Quot          [LocalReg
dst]   [CmmExpr
x,CmmExpr
y]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (String -> FastString
fsLit String
"hs_quotWord64") [LocalReg
dst] [CmmExpr
x,CmmExpr
y]
genSimplePrim Label
bid CallishMachOp
MO_W64_Rem           [LocalReg
dst]   [CmmExpr
x,CmmExpr
y]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (String -> FastString
fsLit String
"hs_remWord64") [LocalReg
dst] [CmmExpr
x,CmmExpr
y]
genSimplePrim Label
_   CallishMachOp
op                   [LocalReg]
dst     [CmmExpr]
args           = do
  platform <- NCGConfig -> Platform
ncgPlatform (NCGConfig -> Platform) -> NatM NCGConfig -> NatM Platform
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NatM NCGConfig
getConfig
  pprPanic "genSimplePrim: unhandled primop" (ppr (pprCallishMachOp op, dst, fmap (pdoc platform) args))

{- Note [Evaluate C-call arguments before placing in destination registers]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When producing code for C calls we must take care when placing arguments
in their final registers. Specifically, we must ensure that temporary register
usage due to evaluation of one argument does not clobber a register in which we
already placed a previous argument (e.g. as the code generation logic for
MO_Shl can clobber %rcx due to x86 instruction limitations).

This is precisely what happened in #18527. Consider this C--:

    (result::I64) = call "ccall" doSomething(_s2hp::I64, 2244, _s2hq::I64, _s2hw::I64 | (1 << _s2hz::I64));

Here we are calling the C function `doSomething` with three arguments, the last
involving a non-trivial expression involving MO_Shl. In this case the NCG could
naively generate the following assembly (where $tmp denotes some temporary
register and $argN denotes the register for argument N, as dictated by the
platform's calling convention):

    mov _s2hp, $arg1   # place first argument
    mov _s2hq, $arg2   # place second argument

    # Compute 1 << _s2hz
    mov _s2hz, %rcx
    shl %cl, $tmp

    # Compute (_s2hw | (1 << _s2hz))
    mov _s2hw, $arg3
    or $tmp, $arg3

    # Perform the call
    call func

This code is outright broken on Windows which assigns $arg1 to %rcx. This means
that the evaluation of the last argument clobbers the first argument.

To avoid this we use a rather awful hack: when producing code for a C call with
at least one non-trivial argument, we first evaluate all of the arguments into
local registers before moving them into their final calling-convention-defined
homes.  This is performed by 'evalArgs'. Here we define "non-trivial" to be an
expression which might contain a MachOp since these are the only cases which
might clobber registers. Furthermore, we use a conservative approximation of
this condition (only looking at the top-level of CmmExprs) to avoid spending
too much effort trying to decide whether we want to take the fast path.

Note that this hack *also* applies to calls to out-of-line PrimTargets (which
are lowered via a C call), which will ultimately end up in
genForeignCall{32,64}.
-}

-- | See Note [Evaluate C-call arguments before placing in destination registers]
evalArgs :: BlockId -> [CmmActual] -> NatM (InstrBlock, [CmmActual])
evalArgs :: Label -> [CmmExpr] -> NatM (InstrBlock, [CmmExpr])
evalArgs Label
bid [CmmExpr]
actuals
  | (CmmExpr -> Bool) -> [CmmExpr] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any CmmExpr -> Bool
loadIntoRegMightClobberOtherReg [CmmExpr]
actuals = do
      regs_blks <- (CmmExpr -> NatM (InstrBlock, CmmExpr))
-> [CmmExpr] -> NatM [(InstrBlock, CmmExpr)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM CmmExpr -> NatM (InstrBlock, CmmExpr)
evalArg [CmmExpr]
actuals
      return (concatOL $ map fst regs_blks, map snd regs_blks)
  | Bool
otherwise = (InstrBlock, [CmmExpr]) -> NatM (InstrBlock, [CmmExpr])
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock
forall a. OrdList a
nilOL, [CmmExpr]
actuals)
  where

    evalArg :: CmmActual -> NatM (InstrBlock, CmmExpr)
    evalArg :: CmmExpr -> NatM (InstrBlock, CmmExpr)
evalArg CmmExpr
actual = do
        platform <- NatM Platform
getPlatform
        lreg <- newLocalReg $ cmmExprType platform actual
        (instrs, bid1) <- stmtToInstrs bid $ CmmAssign (CmmLocal lreg) actual
        -- The above assignment shouldn't change the current block
        massert (isNothing bid1)
        return (instrs, CmmReg $ CmmLocal lreg)

    newLocalReg :: CmmType -> NatM LocalReg
    newLocalReg :: CmmType -> NatM LocalReg
newLocalReg CmmType
ty = Unique -> CmmType -> LocalReg
LocalReg (Unique -> CmmType -> LocalReg)
-> NatM Unique -> NatM (CmmType -> LocalReg)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NatM Unique
forall (m :: * -> *). MonadGetUnique m => m Unique
getUniqueM NatM (CmmType -> LocalReg) -> NatM CmmType -> NatM LocalReg
forall a b. NatM (a -> b) -> NatM a -> NatM b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> CmmType -> NatM CmmType
forall a. a -> NatM a
forall (f :: * -> *) a. Applicative f => a -> f a
pure CmmType
ty

-- | Might the code to put this expression into a register
-- clobber any other registers?
loadIntoRegMightClobberOtherReg :: CmmExpr -> Bool
loadIntoRegMightClobberOtherReg :: CmmExpr -> Bool
loadIntoRegMightClobberOtherReg (CmmReg CmmReg
_)      = Bool
False
loadIntoRegMightClobberOtherReg (CmmRegOff CmmReg
_ Int
_) = Bool
False
loadIntoRegMightClobberOtherReg (CmmLit CmmLit
_)      = Bool
False
  -- NB: this last 'False' is slightly risky, because the code for loading
  -- a literal into a register is not entirely trivial.
loadIntoRegMightClobberOtherReg CmmExpr
_               = Bool
True

-- Note [DIV/IDIV for bytes]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~
-- IDIV reminder:
--   Size    Dividend   Divisor   Quotient    Remainder
--   byte    %ax         r/m8      %al          %ah
--   word    %dx:%ax     r/m16     %ax          %dx
--   dword   %edx:%eax   r/m32     %eax         %edx
--   qword   %rdx:%rax   r/m64     %rax         %rdx
--
-- We do a special case for the byte division because the current
-- codegen doesn't deal well with accessing %ah register (also,
-- accessing %ah in 64-bit mode is complicated because it cannot be an
-- operand of many instructions). So we just widen operands to 16 bits
-- and get the results from %al, %dl. This is not optimal, but a few
-- register moves are probably not a huge deal when doing division.


-- | Generate C call to the given function in ghc-prim
genPrimCCall
  :: BlockId
  -> FastString
  -> [CmmFormal]
  -> [CmmActual]
  -> NatM InstrBlock
genPrimCCall :: Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid FastString
lbl_txt [LocalReg]
dsts [CmmExpr]
args = do
  config <- NatM NCGConfig
getConfig
  -- FIXME: we should use mkForeignLabel instead of mkCmmCodeLabel
  let lbl = UnitId -> FastString -> CLabel
mkCmmCodeLabel UnitId
primUnitId FastString
lbl_txt
  addr <- cmmMakeDynamicReference config CallReference lbl
  let conv = CCallConv
-> [ForeignHint]
-> [ForeignHint]
-> CmmReturnInfo
-> ForeignConvention
ForeignConvention CCallConv
CCallConv [] [] CmmReturnInfo
CmmMayReturn
  genCCall bid addr conv dsts args

-- | Generate C call to the given function in libc
genLibCCall
  :: BlockId
  -> FastString
  -> [CmmFormal]
  -> [CmmActual]
  -> NatM InstrBlock
genLibCCall :: Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid FastString
lbl_txt [LocalReg]
dsts [CmmExpr]
args = do
  config <- NatM NCGConfig
getConfig
  -- Assume we can call these functions directly, and that they're not in a dynamic library.
  -- TODO: Why is this ok? Under linux this code will be in libm.so
  --       Is it because they're really implemented as a primitive instruction by the assembler??  -- BL 2009/12/31
  let lbl = FastString -> ForeignLabelSource -> FunctionOrData -> CLabel
mkForeignLabel FastString
lbl_txt ForeignLabelSource
ForeignLabelInThisPackage FunctionOrData
IsFunction
  addr <- cmmMakeDynamicReference config CallReference lbl
  let conv = CCallConv
-> [ForeignHint]
-> [ForeignHint]
-> CmmReturnInfo
-> ForeignConvention
ForeignConvention CCallConv
CCallConv [] [] CmmReturnInfo
CmmMayReturn
  genCCall bid addr conv dsts args

-- | Generate C call to the given function in the RTS
genRTSCCall
  :: BlockId
  -> FastString
  -> [CmmFormal]
  -> [CmmActual]
  -> NatM InstrBlock
genRTSCCall :: Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genRTSCCall Label
bid FastString
lbl_txt [LocalReg]
dsts [CmmExpr]
args = do
  config <- NatM NCGConfig
getConfig
  -- Assume we can call these functions directly, and that they're not in a dynamic library.
  let lbl = FastString -> ForeignLabelSource -> FunctionOrData -> CLabel
mkForeignLabel FastString
lbl_txt ForeignLabelSource
ForeignLabelInThisPackage FunctionOrData
IsFunction
  addr <- cmmMakeDynamicReference config CallReference lbl
  let conv = CCallConv
-> [ForeignHint]
-> [ForeignHint]
-> CmmReturnInfo
-> ForeignConvention
ForeignConvention CCallConv
CCallConv [] [] CmmReturnInfo
CmmMayReturn
  genCCall bid addr conv dsts args

-- | Generate a real C call to the given address with the given convention
genCCall
  :: BlockId
  -> CmmExpr
  -> ForeignConvention
  -> [CmmFormal]
  -> [CmmActual]
  -> NatM InstrBlock
genCCall :: Label
-> CmmExpr
-> ForeignConvention
-> [LocalReg]
-> [CmmExpr]
-> NatM InstrBlock
genCCall Label
bid CmmExpr
addr conv :: ForeignConvention
conv@(ForeignConvention CCallConv
_ [ForeignHint]
argHints [ForeignHint]
_ CmmReturnInfo
_) [LocalReg]
dest_regs [CmmExpr]
args = do
  platform <- NatM Platform
getPlatform
  is32Bit <- is32BitPlatform
  let args_hints = [CmmExpr] -> [ForeignHint] -> [(CmmExpr, ForeignHint)]
forall a b. [a] -> [b] -> [(a, b)]
zip [CmmExpr]
args ([ForeignHint]
argHints [ForeignHint] -> [ForeignHint] -> [ForeignHint]
forall a. [a] -> [a] -> [a]
++ ForeignHint -> [ForeignHint]
forall a. a -> [a]
repeat ForeignHint
NoHint)
      prom_args = ((CmmExpr, ForeignHint) -> CmmExpr)
-> [(CmmExpr, ForeignHint)] -> [CmmExpr]
forall a b. (a -> b) -> [a] -> [b]
map (Platform -> (CmmExpr, ForeignHint) -> CmmExpr
maybePromoteCArgToW32 Platform
platform) [(CmmExpr, ForeignHint)]
args_hints
  (instrs0, args') <- evalArgs bid prom_args
  instrs1 <- if is32Bit
    then genCCall32 addr conv dest_regs args'
    else genCCall64 addr conv dest_regs args'
  return (instrs0 `appOL` instrs1)

maybePromoteCArgToW32 :: Platform -> (CmmExpr, ForeignHint) -> CmmExpr
maybePromoteCArgToW32 :: Platform -> (CmmExpr, ForeignHint) -> CmmExpr
maybePromoteCArgToW32 Platform
platform (CmmExpr
arg, ForeignHint
hint)
 | Width
wfrom Width -> Width -> Bool
forall a. Ord a => a -> a -> Bool
< Width
wto =
    -- As wto=W32, we only need to handle integer conversions,
    -- never Float -> Double.
    case ForeignHint
hint of
      ForeignHint
SignedHint -> MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp (Width -> Width -> MachOp
MO_SS_Conv Width
wfrom Width
wto) [CmmExpr
arg]
      ForeignHint
_          -> MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp (Width -> Width -> MachOp
MO_UU_Conv Width
wfrom Width
wto) [CmmExpr
arg]
 | Bool
otherwise   = CmmExpr
arg
 where
   ty :: CmmType
ty = Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
arg
   wfrom :: Width
wfrom = CmmType -> Width
typeWidth CmmType
ty
   wto :: Width
wto = Width
W32

genCCall32 :: CmmExpr           -- ^ address of the function to call
           -> ForeignConvention -- ^ calling convention
           -> [CmmFormal]       -- ^ where to put the result
           -> [CmmActual]       -- ^ arguments (of mixed type)
           -> NatM InstrBlock
genCCall32 :: CmmExpr
-> ForeignConvention -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genCCall32 CmmExpr
addr ForeignConvention
_conv [LocalReg]
dest_regs [CmmExpr]
args = do
        config <- NatM NCGConfig
getConfig
        let platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config

            -- If the size is smaller than the word, we widen things (see maybePromoteCArg)
            arg_size_bytes :: CmmType -> Int
            arg_size_bytes CmmType
ty = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Width -> Int
widthInBytes (CmmType -> Width
typeWidth CmmType
ty)) (Width -> Int
widthInBytes (Platform -> Width
wordWidth Platform
platform))

            roundTo a
a a
x | a
x a -> a -> a
forall a. Integral a => a -> a -> a
`mod` a
a a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0 = a
x
                        | Bool
otherwise = a
x a -> a -> a
forall a. Num a => a -> a -> a
+ a
a a -> a -> a
forall a. Num a => a -> a -> a
- (a
x a -> a -> a
forall a. Integral a => a -> a -> a
`mod` a
a)

            push_arg :: CmmActual {-current argument-}
                            -> NatM InstrBlock  -- code

            push_arg  CmmExpr
arg -- we don't need the hints on x86
              | CmmType -> Bool
isWord64 CmmType
arg_ty = do
                RegCode64 code r_hi r_lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
arg
                delta <- getDeltaNat
                setDeltaNat (delta - 8)
                return (       code `appOL`
                               toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
                                     PUSH II32 (OpReg r_lo), DELTA (delta - 8),
                                     DELTA (delta-8)]
                    )

              | CmmType -> Bool
isFloatType CmmType
arg_ty Bool -> Bool -> Bool
|| CmmType -> Bool
isVecType CmmType
arg_ty = do
                (reg, code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
arg
                delta <- getDeltaNat
                setDeltaNat (delta-size)
                return (code `appOL`
                                toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
                                      DELTA (delta-size),
                                      let addr = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
esp)
                                                                EAIndex
EAIndexNone
                                                                (Int -> Imm
ImmInt Int
0)
                                          format = CmmType -> Format
cmmTypeFormat CmmType
arg_ty
                                      in

                                       movInstr config format (OpReg reg) (OpAddr addr)

                                     ]
                               )

              | Bool
otherwise = do
                -- Arguments can be smaller than 32-bit, but we still use @PUSH
                -- II32@ - the usual calling conventions expect integers to be
                -- 4-byte aligned.
                Bool -> NatM ()
forall (m :: * -> *). (HasCallStack, Applicative m) => Bool -> m ()
massert ((CmmType -> Width
typeWidth CmmType
arg_ty) Width -> Width -> Bool
forall a. Ord a => a -> a -> Bool
<= Width
W32)
                (operand, code) <- CmmExpr -> NatM (Operand, InstrBlock)
getOperand CmmExpr
arg
                delta <- getDeltaNat
                setDeltaNat (delta-size)
                return (code `snocOL`
                        PUSH II32 operand `snocOL`
                        DELTA (delta-size))

              where
                 arg_ty :: CmmType
arg_ty = Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
arg
                 size :: Int
size = CmmType -> Int
arg_size_bytes CmmType
arg_ty -- Byte size

        let
            -- Align stack to 16n for calls, assuming a starting stack
            -- alignment of 16n - word_size on procedure entry. Which we
            -- maintain. See Note [Stack Alignment on X86] in rts/StgCRun.c.
            sizes               = (CmmExpr -> Int) -> [CmmExpr] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (CmmType -> Int
arg_size_bytes (CmmType -> Int) -> (CmmExpr -> CmmType) -> CmmExpr -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform) ([CmmExpr] -> [CmmExpr]
forall a. [a] -> [a]
reverse [CmmExpr]
args)
            raw_arg_size        = [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
sizes Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Platform -> Int
platformWordSizeInBytes Platform
platform
            arg_pad_size        = (Int -> Int -> Int
forall a. Integral a => a -> a -> a
roundTo Int
16 (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int
raw_arg_size) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
raw_arg_size
            tot_arg_size        = Int
raw_arg_size Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
arg_pad_size Int -> Int -> Int
forall a. Num a => a -> a -> a
- Platform -> Int
platformWordSizeInBytes Platform
platform


        delta0 <- getDeltaNat
        setDeltaNat (delta0 - arg_pad_size)

        push_codes <- mapM push_arg (reverse args)
        delta <- getDeltaNat
        massert (delta == delta0 - tot_arg_size)

        -- deal with static vs dynamic call targets
        callinsns <-
          case addr of
            CmmLit (CmmLabel CLabel
lbl)
               -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock -> NatM InstrBlock) -> InstrBlock -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Either Imm Reg -> [RegWithFormat] -> Instr
CALL (Imm -> Either Imm Reg
forall a b. a -> Either a b
Left Imm
fn_imm) [])
               where fn_imm :: Imm
fn_imm = CLabel -> Imm
ImmCLbl CLabel
lbl
            CmmExpr
_
               -> do { (dyn_r, dyn_c) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
addr
                     ; massert (isWord32 (cmmExprType platform addr))
                     ; return $ dyn_c `snocOL` CALL (Right dyn_r) [] }
        let push_code
                | Int
arg_pad_size Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0
                = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [Format -> Operand -> Operand -> Instr
SUB Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
arg_pad_size)) (Reg -> Operand
OpReg Reg
esp),
                        Int -> Instr
DELTA (Int
delta0 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
arg_pad_size)]
                  InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` [InstrBlock] -> InstrBlock
forall a. [OrdList a] -> OrdList a
concatOL [InstrBlock]
push_codes
                | Bool
otherwise
                = [InstrBlock] -> InstrBlock
forall a. [OrdList a] -> OrdList a
concatOL [InstrBlock]
push_codes

            call = InstrBlock
callinsns InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                   [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL (
                      (if Int
tot_arg_size Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then [] else
                       [Format -> Operand -> Operand -> Instr
ADD Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
tot_arg_size)) (Reg -> Operand
OpReg Reg
esp)])
                      [Instr] -> [Instr] -> [Instr]
forall a. [a] -> [a] -> [a]
++
                      [Int -> Instr
DELTA Int
delta0]
                   )
        setDeltaNat delta0

        let
            -- assign the results, if necessary
            assign_code []     = InstrBlock
forall a. OrdList a
nilOL
            assign_code [LocalReg
dest]
              | CmmType -> Bool
isVecType CmmType
ty
              = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (HasDebugCallStack => NCGConfig -> Format -> Reg -> Reg -> Instr
NCGConfig -> Format -> Reg -> Reg -> Instr
mkRegRegMoveInstr NCGConfig
config (CmmType -> Format
cmmTypeFormat CmmType
ty) Reg
xmm0 Reg
r_dest)
              | CmmType -> Bool
isFloatType CmmType
ty =
                  -- we assume SSE2
                  let tmp_amode :: AddrMode
tmp_amode = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
esp)
                                                       EAIndex
EAIndexNone
                                                       (Int -> Imm
ImmInt Int
0)
                      fmt :: Format
fmt = Width -> Format
floatFormat Width
w
                         in [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
SUB Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
b)) (Reg -> Operand
OpReg Reg
esp),
                                   Int -> Instr
DELTA (Int
delta0 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
b),
                                   Format -> AddrMode -> Instr
X87Store Format
fmt  AddrMode
tmp_amode,
                                   -- X87Store only supported for the CDECL ABI
                                   -- NB: This code will need to be
                                   -- revisited once GHC does more work around
                                   -- SIGFPE f
                                   Format -> Operand -> Operand -> Instr
MOV Format
fmt (AddrMode -> Operand
OpAddr AddrMode
tmp_amode) (Reg -> Operand
OpReg Reg
r_dest),
                                   Format -> Operand -> Operand -> Instr
ADD Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
b)) (Reg -> Operand
OpReg Reg
esp),
                                   Int -> Instr
DELTA Int
delta0]
              | CmmType -> Bool
isWord64 CmmType
ty    = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
eax) (Reg -> Operand
OpReg Reg
r_dest),
                                        Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
edx) (Reg -> Operand
OpReg Reg
r_dest_hi)]
              | Bool
otherwise      = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV (Width -> Format
intFormat Width
w)
                                             (Reg -> Operand
OpReg Reg
eax)
                                             (Reg -> Operand
OpReg Reg
r_dest))
              where
                    ty :: CmmType
ty = LocalReg -> CmmType
localRegType LocalReg
dest
                    w :: Width
w  = CmmType -> Width
typeWidth CmmType
ty
                    b :: Int
b  = Width -> Int
widthInBytes Width
w
                    r_dest_hi :: Reg
r_dest_hi = Reg -> Reg
getHiVRegFromLo Reg
r_dest
                    r_dest :: Reg
r_dest    = LocalReg -> Reg
getLocalRegReg LocalReg
dest
            assign_code [LocalReg]
many = String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"genForeignCall.assign_code - too many return values:" ([LocalReg] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [LocalReg]
many)

        return (push_code `appOL`
                call `appOL`
                assign_code dest_regs)

genCCall64 :: CmmExpr           -- ^ address of function to call
           -> ForeignConvention -- ^ calling convention
           -> [CmmFormal]       -- ^ where to put the result
           -> [CmmActual]       -- ^ arguments (of mixed type)
           -> NatM InstrBlock
genCCall64 :: CmmExpr
-> ForeignConvention -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genCCall64 CmmExpr
addr ForeignConvention
conv [LocalReg]
dest_regs [CmmExpr]
args = do
    config <- NatM NCGConfig
getConfig
    let platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config
        word_size = Platform -> Int
platformWordSizeInBytes Platform
platform
        wordFmt = Bool -> Format
archWordFormat (Platform -> Bool
target32Bit Platform
platform)

    -- Compute the code for loading arguments into registers,
    -- returning the leftover arguments that will need to be passed on the stack.
    --
    -- NB: the code for loading references to data into registers is computed
    -- later (in 'pushArgs'), because we don't yet know where the data will be
    -- placed (due to alignment requirements).
    LoadArgs
      { stackArgs       = proper_stack_args
      , stackDataArgs   = stack_data_args
      , usedRegs        = arg_regs_used
      , assignArgsCode  = assign_args_code
      }
      <- loadArgs config args

    let

    -- Pad all arguments and data passed on stack to align them properly.
        (stk_args_with_padding, args_aligned_16) =
          padStackArgs platform (proper_stack_args, stack_data_args)

    -- Align stack to 16n for calls, assuming a starting stack
    -- alignment of 16n - word_size on procedure entry. Which we
    -- maintain. See Note [Stack Alignment on X86] in rts/StgCRun.c
        need_realign_call = Bool
args_aligned_16
    align_call_code <-
      if need_realign_call
      then addStackPadding word_size
      else return nilOL

    -- Compute the code that pushes data to the stack, and also
    -- the code that loads references to that data into registers,
    -- when the data is passed by reference in a register.
    (load_data_refs, push_code) <-
      pushArgs config proper_stack_args stk_args_with_padding

    -- On Windows, leave stack space for the arguments that we are passing
    -- in registers (the so-called shadow space).
    let shadow_space =
          if Platform -> OS
platformOS Platform
platform OS -> OS -> Bool
forall a. Eq a => a -> a -> Bool
== OS
OSMinGW32
          then Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
* [(Reg, Reg)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (Platform -> [(Reg, Reg)]
allArgRegs Platform
platform)
            -- NB: the shadow store is always 8 * 4 = 32 bytes large,
            -- i.e. the cumulative size of rcx, rdx, r8, r9 (see 'allArgRegs').
          else Int
0
    shadow_space_code <- addStackPadding shadow_space

    let total_args_size
          = Int
shadow_space
          Int -> Int -> Int
forall a. Num a => a -> a -> a
+ [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((StackArg -> Int) -> [StackArg] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Platform -> StackArg -> Int
stackArgSpace Platform
platform) [StackArg]
stk_args_with_padding)
        real_size =
          Int
total_args_size Int -> Int -> Int
forall a. Num a => a -> a -> a
+ if Bool
need_realign_call then Int
word_size else Int
0

    -- End of argument passing.
    --
    -- Next step: emit the appropriate call instruction.
    delta <- getDeltaNat

    let -- The System V AMD64 ABI requires us to set %al to the number of SSE2
        -- registers that contain arguments, if the called routine
        -- is a varargs function.  We don't know whether it's a
        -- varargs function or not, so we have to assume it is.
        --
        -- It's not safe to omit this assignment, even if the number
        -- of SSE2 regs in use is zero.  If %al is larger than 8
        -- on entry to a varargs function, seg faults ensue.
        nb_sse_regs_used = (RegWithFormat -> Bool) -> [RegWithFormat] -> Int
forall a. (a -> Bool) -> [a] -> Int
count (Format -> Bool
isFloatFormat (Format -> Bool)
-> (RegWithFormat -> Format) -> RegWithFormat -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. RegWithFormat -> Format
regWithFormat_format) [RegWithFormat]
arg_regs_used
        assign_eax_sse_regs
          = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
nb_sse_regs_used)) (Reg -> Operand
OpReg Reg
eax))
          -- Note: we do this on Windows as well. It's not entirely clear why
          -- it's needed (the Windows X86_64 calling convention does not
          -- dictate it), but we get segfaults without it.
          --
          -- One test case exhibiting the issue is T20030_test1j;
          -- if you change this, make sure to run it in a loop for a while
          -- with at least -j8 to check.

        -- Live registers we are annotating the call instruction with
        arg_regs = [Reg -> Format -> RegWithFormat
RegWithFormat Reg
eax Format
wordFmt] [RegWithFormat] -> [RegWithFormat] -> [RegWithFormat]
forall a. [a] -> [a] -> [a]
++ [RegWithFormat]
arg_regs_used

    -- deal with static vs dynamic call targets
    (callinsns,_cconv) <- case addr of
      CmmLit (CmmLabel CLabel
lbl) ->
        (InstrBlock, ForeignConvention)
-> NatM (InstrBlock, ForeignConvention)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Either Imm Reg -> [RegWithFormat] -> Instr
CALL (Imm -> Either Imm Reg
forall a b. a -> Either a b
Left (CLabel -> Imm
ImmCLbl CLabel
lbl)) [RegWithFormat]
arg_regs), ForeignConvention
conv)
      CmmExpr
_ -> do
        (dyn_r, dyn_c) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
addr
        return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)

    let call = InstrBlock
callinsns InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
               [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL (
                    -- Deallocate parameters after call for ccall
                  (if Int
real_sizeInt -> Int -> Bool
forall a. Eq a => a -> a -> Bool
==Int
0 then [] else
                   [Format -> Operand -> Operand -> Instr
ADD (Width -> Format
intFormat (Platform -> Width
platformWordWidth Platform
platform)) (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
real_size)) (Reg -> Operand
OpReg Reg
esp)])
                  [Instr] -> [Instr] -> [Instr]
forall a. [a] -> [a] -> [a]
++
                  [Int -> Instr
DELTA (Int
delta Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
real_size)]
               )
    setDeltaNat (delta + real_size)

    let
        -- assign the results, if necessary
        assign_code []     = InstrBlock
forall a. OrdList a
nilOL
        assign_code [LocalReg
dest] =
          Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$
            HasDebugCallStack => NCGConfig -> Format -> Reg -> Reg -> Instr
NCGConfig -> Format -> Reg -> Reg -> Instr
mkRegRegMoveInstr NCGConfig
config Format
fmt Reg
reg Reg
r_dest
          where
            reg :: Reg
reg = if Format -> Bool
isIntFormat Format
fmt then Reg
rax else Reg
xmm0
            fmt :: Format
fmt = CmmType -> Format
cmmTypeFormat CmmType
rep
            rep :: CmmType
rep = LocalReg -> CmmType
localRegType LocalReg
dest
            r_dest :: Reg
r_dest = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform (LocalReg -> CmmReg
CmmLocal LocalReg
dest)
        assign_code [LocalReg]
_many = String -> InstrBlock
forall a. HasCallStack => String -> a
panic String
"genForeignCall.assign_code many"

    return (align_call_code     `appOL`
            push_code           `appOL`
            assign_args_code    `appOL`
            load_data_refs      `appOL`
            shadow_space_code   `appOL`
            assign_eax_sse_regs `appOL`
            call                `appOL`
            assign_code dest_regs)

-- -----------------------------------------------------------------------------
-- Loading arguments into registers for 64-bit C calls.

-- | Information needed to know how to pass arguments in a C call,
-- and in particular how to load them into registers.
data LoadArgs
  = LoadArgs
  -- | Arguments that should be passed on the stack
  { LoadArgs -> [RawStackArg]
stackArgs     :: [RawStackArg]
  -- | Additional values to store onto the stack.
  , LoadArgs -> [CmmExpr]
stackDataArgs :: [CmmExpr]
  -- | Which registers are we using for argument passing?
  , LoadArgs -> [RegWithFormat]
usedRegs      :: [RegWithFormat]
  -- | The code to assign arguments to registers used for argument passing.
  , LoadArgs -> InstrBlock
assignArgsCode :: InstrBlock
  }
instance Semigroup LoadArgs where
  LoadArgs [RawStackArg]
a1 [CmmExpr]
d1 [RegWithFormat]
r1 InstrBlock
j1 <> :: LoadArgs -> LoadArgs -> LoadArgs
<> LoadArgs [RawStackArg]
a2 [CmmExpr]
d2 [RegWithFormat]
r2 InstrBlock
j2
    = [RawStackArg]
-> [CmmExpr] -> [RegWithFormat] -> InstrBlock -> LoadArgs
LoadArgs ([RawStackArg]
a1 [RawStackArg] -> [RawStackArg] -> [RawStackArg]
forall a. [a] -> [a] -> [a]
++ [RawStackArg]
a2) ([CmmExpr]
d1 [CmmExpr] -> [CmmExpr] -> [CmmExpr]
forall a. [a] -> [a] -> [a]
++ [CmmExpr]
d2) ([RegWithFormat]
r1 [RegWithFormat] -> [RegWithFormat] -> [RegWithFormat]
forall a. [a] -> [a] -> [a]
++ [RegWithFormat]
r2) (InstrBlock
j1 InstrBlock -> InstrBlock -> InstrBlock
forall a. Semigroup a => a -> a -> a
S.<> InstrBlock
j2)
instance Monoid LoadArgs where
  mempty :: LoadArgs
mempty = [RawStackArg]
-> [CmmExpr] -> [RegWithFormat] -> InstrBlock -> LoadArgs
LoadArgs [] [] [] InstrBlock
forall a. OrdList a
nilOL

-- | An argument passed on the stack, either directly or by reference.
--
-- The padding information hasn't yet been computed (see 'StackArg').
data RawStackArg
  -- | Pass the argument on the stack directly.
  = RawStackArg { RawStackArg -> CmmExpr
stackArgExpr :: CmmExpr }
  -- | Pass the argument by reference.
  | RawStackArgRef
    { RawStackArg -> StackRef
stackRef :: StackRef
       -- ^ is the reference passed in a register, or on the stack?
    , RawStackArg -> Int
stackRefArgSize :: Int
        -- ^ the size of the data pointed to
    }
  deriving ( Int -> RawStackArg -> String -> String
[RawStackArg] -> String -> String
RawStackArg -> String
(Int -> RawStackArg -> String -> String)
-> (RawStackArg -> String)
-> ([RawStackArg] -> String -> String)
-> Show RawStackArg
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
$cshowsPrec :: Int -> RawStackArg -> String -> String
showsPrec :: Int -> RawStackArg -> String -> String
$cshow :: RawStackArg -> String
show :: RawStackArg -> String
$cshowList :: [RawStackArg] -> String -> String
showList :: [RawStackArg] -> String -> String
Show )

-- | An argument passed on the stack, either directly or by reference,
-- with additional padding information.
data StackArg
  -- | Pass the argument on the stack directly.
  = StackArg
      { StackArg -> CmmExpr
stackArgExpr :: CmmExpr
      , StackArg -> Int
stackArgPadding :: Int
        -- ^ padding required (in bytes)
      }
  -- | Pass the argument by reference.
  | StackArgRef
     { StackArg -> StackRef
stackRef :: StackRef
        -- ^ where the reference is passed
     , StackArg -> Int
stackRefArgSize :: Int
        -- ^ the size of the data pointed to
     , StackArg -> Int
stackRefArgPadding :: Int
       -- ^ padding of the data pointed to
       -- (the reference itself never requires padding)
     }
  deriving ( Int -> StackArg -> String -> String
[StackArg] -> String -> String
StackArg -> String
(Int -> StackArg -> String -> String)
-> (StackArg -> String)
-> ([StackArg] -> String -> String)
-> Show StackArg
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
$cshowsPrec :: Int -> StackArg -> String -> String
showsPrec :: Int -> StackArg -> String -> String
$cshow :: StackArg -> String
show :: StackArg -> String
$cshowList :: [StackArg] -> String -> String
showList :: [StackArg] -> String -> String
Show )

-- | Where is a reference to data on the stack passed?
data StackRef
  -- | In a register.
  = InReg Reg
  -- | On the stack.
  | OnStack
  deriving ( StackRef -> StackRef -> Bool
(StackRef -> StackRef -> Bool)
-> (StackRef -> StackRef -> Bool) -> Eq StackRef
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: StackRef -> StackRef -> Bool
== :: StackRef -> StackRef -> Bool
$c/= :: StackRef -> StackRef -> Bool
/= :: StackRef -> StackRef -> Bool
Eq, Eq StackRef
Eq StackRef =>
(StackRef -> StackRef -> Ordering)
-> (StackRef -> StackRef -> Bool)
-> (StackRef -> StackRef -> Bool)
-> (StackRef -> StackRef -> Bool)
-> (StackRef -> StackRef -> Bool)
-> (StackRef -> StackRef -> StackRef)
-> (StackRef -> StackRef -> StackRef)
-> Ord StackRef
StackRef -> StackRef -> Bool
StackRef -> StackRef -> Ordering
StackRef -> StackRef -> StackRef
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: StackRef -> StackRef -> Ordering
compare :: StackRef -> StackRef -> Ordering
$c< :: StackRef -> StackRef -> Bool
< :: StackRef -> StackRef -> Bool
$c<= :: StackRef -> StackRef -> Bool
<= :: StackRef -> StackRef -> Bool
$c> :: StackRef -> StackRef -> Bool
> :: StackRef -> StackRef -> Bool
$c>= :: StackRef -> StackRef -> Bool
>= :: StackRef -> StackRef -> Bool
$cmax :: StackRef -> StackRef -> StackRef
max :: StackRef -> StackRef -> StackRef
$cmin :: StackRef -> StackRef -> StackRef
min :: StackRef -> StackRef -> StackRef
Ord, Int -> StackRef -> String -> String
[StackRef] -> String -> String
StackRef -> String
(Int -> StackRef -> String -> String)
-> (StackRef -> String)
-> ([StackRef] -> String -> String)
-> Show StackRef
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
$cshowsPrec :: Int -> StackRef -> String -> String
showsPrec :: Int -> StackRef -> String -> String
$cshow :: StackRef -> String
show :: StackRef -> String
$cshowList :: [StackRef] -> String -> String
showList :: [StackRef] -> String -> String
Show )

newtype Padding = Padding { Padding -> Int
paddingBytes :: Int }
  deriving ( Int -> Padding -> String -> String
[Padding] -> String -> String
Padding -> String
(Int -> Padding -> String -> String)
-> (Padding -> String)
-> ([Padding] -> String -> String)
-> Show Padding
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
$cshowsPrec :: Int -> Padding -> String -> String
showsPrec :: Int -> Padding -> String -> String
$cshow :: Padding -> String
show :: Padding -> String
$cshowList :: [Padding] -> String -> String
showList :: [Padding] -> String -> String
Show, Padding -> Padding -> Bool
(Padding -> Padding -> Bool)
-> (Padding -> Padding -> Bool) -> Eq Padding
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Padding -> Padding -> Bool
== :: Padding -> Padding -> Bool
$c/= :: Padding -> Padding -> Bool
/= :: Padding -> Padding -> Bool
Eq, Eq Padding
Eq Padding =>
(Padding -> Padding -> Ordering)
-> (Padding -> Padding -> Bool)
-> (Padding -> Padding -> Bool)
-> (Padding -> Padding -> Bool)
-> (Padding -> Padding -> Bool)
-> (Padding -> Padding -> Padding)
-> (Padding -> Padding -> Padding)
-> Ord Padding
Padding -> Padding -> Bool
Padding -> Padding -> Ordering
Padding -> Padding -> Padding
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Padding -> Padding -> Ordering
compare :: Padding -> Padding -> Ordering
$c< :: Padding -> Padding -> Bool
< :: Padding -> Padding -> Bool
$c<= :: Padding -> Padding -> Bool
<= :: Padding -> Padding -> Bool
$c> :: Padding -> Padding -> Bool
> :: Padding -> Padding -> Bool
$c>= :: Padding -> Padding -> Bool
>= :: Padding -> Padding -> Bool
$cmax :: Padding -> Padding -> Padding
max :: Padding -> Padding -> Padding
$cmin :: Padding -> Padding -> Padding
min :: Padding -> Padding -> Padding
Ord )

-- | How much space does this 'StackArg' take up on the stack?
--
-- Only counts the "reference" part for references, not the data it points to.
stackArgSpace :: Platform -> StackArg -> Int
stackArgSpace :: Platform -> StackArg -> Int
stackArgSpace Platform
platform = \case
  StackArg CmmExpr
arg Int
padding ->
    Platform -> CmmExpr -> Int
argSize Platform
platform CmmExpr
arg Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
padding
  StackArgRef { stackRef :: StackArg -> StackRef
stackRef = StackRef
ref } ->
    case StackRef
ref of
      InReg   {} -> Int
0
      OnStack {} -> Int
8

-- | Pad arguments, assuming we start aligned to a 16-byte boundary.
--
-- Returns padded arguments, together with whether we end up aligned
-- to a 16-byte boundary.
padStackArgs :: Platform
             -> ([RawStackArg], [CmmExpr])
             -> ([StackArg], Bool)
padStackArgs :: Platform -> ([RawStackArg], [CmmExpr]) -> ([StackArg], Bool)
padStackArgs Platform
platform ([RawStackArg]
args0, [CmmExpr]
data_args0) =
  let
    -- Pad the direct args
    ([(RawStackArg, Padding)]
args, Bool
align_16_mid) = Bool -> [RawStackArg] -> ([(RawStackArg, Padding)], Bool)
pad_args Bool
True [RawStackArg]
args0

    -- Pad the data section
    ([(RawStackArg, Padding)]
data_args, Bool
align_16_end) = Bool -> [RawStackArg] -> ([(RawStackArg, Padding)], Bool)
pad_args Bool
align_16_mid ((CmmExpr -> RawStackArg) -> [CmmExpr] -> [RawStackArg]
forall a b. (a -> b) -> [a] -> [b]
map CmmExpr -> RawStackArg
RawStackArg [CmmExpr]
data_args0)

    -- Now figure out where the data is placed relative to the direct arguments,
    -- in order to resolve references.
    resolve_args :: [(RawStackArg, Padding)] -> [Padding] -> [StackArg]
    resolve_args :: [(RawStackArg, Padding)] -> [Padding] -> [StackArg]
resolve_args [] [Padding]
_ = []
    resolve_args ((RawStackArg
stk_arg, Padding Int
pad):[(RawStackArg, Padding)]
rest) [Padding]
pads =
      let (StackArg
this_arg, [Padding]
pads') =
            case RawStackArg
stk_arg of
              RawStackArg CmmExpr
arg -> (CmmExpr -> Int -> StackArg
StackArg CmmExpr
arg Int
pad, [Padding]
pads)
              RawStackArgRef StackRef
ref Int
size ->
                let (Padding Int
arg_pad : [Padding]
rest_pads) = [Padding]
pads
                    arg :: StackArg
arg =
                      StackArgRef
                        { stackRef :: StackRef
stackRef = StackRef
ref
                        , stackRefArgSize :: Int
stackRefArgSize = Int
size
                        , stackRefArgPadding :: Int
stackRefArgPadding = Int
arg_pad }
                in (StackArg
arg, [Padding]
rest_pads)
      in StackArg
this_arg StackArg -> [StackArg] -> [StackArg]
forall a. a -> [a] -> [a]
: [(RawStackArg, Padding)] -> [Padding] -> [StackArg]
resolve_args [(RawStackArg, Padding)]
rest [Padding]
pads'

  in
    ( [(RawStackArg, Padding)] -> [Padding] -> [StackArg]
resolve_args [(RawStackArg, Padding)]
args (((RawStackArg, Padding) -> Padding)
-> [(RawStackArg, Padding)] -> [Padding]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (RawStackArg, Padding) -> Padding
forall a b. (a, b) -> b
snd [(RawStackArg, Padding)]
data_args) [StackArg] -> [StackArg] -> [StackArg]
forall a. [a] -> [a] -> [a]
++
        [ case RawStackArg
data_arg of
            RawStackArg CmmExpr
arg -> CmmExpr -> Int -> StackArg
StackArg CmmExpr
arg Int
pad
            RawStackArgRef {} -> String -> StackArg
forall a. HasCallStack => String -> a
panic String
"padStackArgs: reference in data section"
        | (RawStackArg
data_arg, Padding Int
pad) <- [(RawStackArg, Padding)]
data_args
        ]
    , Bool
align_16_end )

  where
    pad_args :: Bool -> [RawStackArg] -> ([(RawStackArg, Padding)], Bool)
    pad_args :: Bool -> [RawStackArg] -> ([(RawStackArg, Padding)], Bool)
pad_args Bool
aligned_16 [] = ([], Bool
aligned_16)
    pad_args Bool
aligned_16 (RawStackArg
arg:[RawStackArg]
args)
      | Int
needed_alignment Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
16
      -- We don't know if the stack is aligned to 8 (mod 32) or 24 (mod 32).
      -- This makes aligning the stack to a 32 or 64 byte boundary more
      -- complicated, in particular with DELTA.
      = String -> ([(RawStackArg, Padding)], Bool)
forall a. HasCallStack => String -> a
sorry (String -> ([(RawStackArg, Padding)], Bool))
-> String -> ([(RawStackArg, Padding)], Bool)
forall a b. (a -> b) -> a -> b
$ [String] -> String
unlines
        [ String
"X86_86 C call: unsupported argument."
        , String
"  Alignment requirement: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
needed_alignment String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" bytes."
        , if Platform -> OS
platformOS Platform
platform OS -> OS -> Bool
forall a. Eq a => a -> a -> Bool
== OS
OSMinGW32
          then String
"  The X86_64 NCG does not (yet) support Windows C calls with 256/512 bit vectors."
          else String
"  The X86_64 NCG cannot (yet) pass 256/512 bit vectors on the stack for C calls."
        , String
"  Please use the LLVM backend (-fllvm)." ]
      | Bool
otherwise
      = let ( [(RawStackArg, Padding)]
rest, Bool
final_align_16 ) = Bool -> [RawStackArg] -> ([(RawStackArg, Padding)], Bool)
pad_args Bool
next_aligned_16 [RawStackArg]
args
        in  ( (RawStackArg
arg, Int -> Padding
Padding Int
padding) (RawStackArg, Padding)
-> [(RawStackArg, Padding)] -> [(RawStackArg, Padding)]
forall a. a -> [a] -> [a]
: [(RawStackArg, Padding)]
rest, Bool
final_align_16 )

      where
        needed_alignment :: Int
needed_alignment = case RawStackArg
arg of
          RawStackArg CmmExpr
arg   -> Platform -> CmmExpr -> Int
argSize Platform
platform CmmExpr
arg
          RawStackArgRef {} -> Platform -> Int
platformWordSizeInBytes Platform
platform
        padding :: Int
padding
          | Int
needed_alignment Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
16 Bool -> Bool -> Bool
|| Bool
aligned_16
          = Int
0
          | Bool
otherwise
          = Int
8
        next_aligned_16 :: Bool
next_aligned_16 = Bool -> Bool
not ( Bool
aligned_16 Bool -> Bool -> Bool
&& Int
needed_alignment Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
16 )

-- | Load arguments into available registers.
loadArgs :: NCGConfig -> [CmmExpr] -> NatM LoadArgs
loadArgs :: NCGConfig -> [CmmExpr] -> NatM LoadArgs
loadArgs NCGConfig
config [CmmExpr]
args
  | Platform -> OS
platformOS Platform
platform OS -> OS -> Bool
forall a. Eq a => a -> a -> Bool
== OS
OSMinGW32
  = StateT [(Reg, Reg)] NatM LoadArgs -> [(Reg, Reg)] -> NatM LoadArgs
forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT (NCGConfig -> [CmmExpr] -> StateT [(Reg, Reg)] NatM LoadArgs
loadArgsWin NCGConfig
config [CmmExpr]
args) (Platform -> [(Reg, Reg)]
allArgRegs Platform
platform)
  | Bool
otherwise
  = StateT ([Reg], [Reg]) NatM LoadArgs
-> ([Reg], [Reg]) -> NatM LoadArgs
forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT (NCGConfig -> [CmmExpr] -> StateT ([Reg], [Reg]) NatM LoadArgs
loadArgsSysV NCGConfig
config [CmmExpr]
args) (Platform -> [Reg]
allIntArgRegs Platform
platform
                                          ,Platform -> [Reg]
allFPArgRegs  Platform
platform)
  where
    platform :: Platform
platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config

-- | Load arguments into available registers (System V AMD64 ABI).
loadArgsSysV :: NCGConfig
             -> [CmmExpr]
             -> StateT ([Reg], [Reg]) NatM LoadArgs
loadArgsSysV :: NCGConfig -> [CmmExpr] -> StateT ([Reg], [Reg]) NatM LoadArgs
loadArgsSysV NCGConfig
_ [] = LoadArgs -> StateT ([Reg], [Reg]) NatM LoadArgs
forall a. a -> StateT ([Reg], [Reg]) NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return LoadArgs
forall a. Monoid a => a
mempty
loadArgsSysV NCGConfig
config (CmmExpr
arg:[CmmExpr]
rest) = do
  (iregs, fregs) <- StateT ([Reg], [Reg]) NatM ([Reg], [Reg])
forall (m :: * -> *) s. Monad m => StateT s m s
get
  -- No available registers: pass everything on the stack (shortcut).
  if null iregs && null fregs
  then return $
          LoadArgs
            { stackArgs       = map RawStackArg (arg:rest)
            , stackDataArgs   = []
            , assignArgsCode  = nilOL
            , usedRegs        = []
            }
  else do
    mbReg <-
      if
        | isIntFormat arg_fmt
        , ireg:iregs' <- iregs
        -> do put (iregs', fregs)
              return $ Just ireg
        | isFloatFormat arg_fmt || isVecFormat arg_fmt
        , freg:fregs' <- fregs
        -> do put (iregs, fregs')
              return $ Just freg
        | otherwise
        -> return Nothing
    this_arg <-
      case mbReg of
        Just Reg
reg -> do
          assign_code <- NatM InstrBlock -> StateT ([Reg], [Reg]) NatM InstrBlock
forall (m :: * -> *) a. Monad m => m a -> StateT ([Reg], [Reg]) m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (NatM InstrBlock -> StateT ([Reg], [Reg]) NatM InstrBlock)
-> NatM InstrBlock -> StateT ([Reg], [Reg]) NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ CmmExpr -> Reg -> NatM InstrBlock
loadArgIntoReg CmmExpr
arg Reg
reg
          return $
            LoadArgs
                { stackArgs       = [] -- passed in register
                , stackDataArgs   = []
                , assignArgsCode  = assign_code
                , usedRegs        = [RegWithFormat reg arg_fmt]
                }
        Maybe Reg
Nothing -> do
          LoadArgs -> StateT ([Reg], [Reg]) NatM LoadArgs
forall a. a -> StateT ([Reg], [Reg]) NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (LoadArgs -> StateT ([Reg], [Reg]) NatM LoadArgs)
-> LoadArgs -> StateT ([Reg], [Reg]) NatM LoadArgs
forall a b. (a -> b) -> a -> b
$
            -- No available register for this argument: pass it on the stack.
            LoadArgs
                { stackArgs :: [RawStackArg]
stackArgs       = [CmmExpr -> RawStackArg
RawStackArg CmmExpr
arg]
                , stackDataArgs :: [CmmExpr]
stackDataArgs   = []
                , assignArgsCode :: InstrBlock
assignArgsCode  = InstrBlock
forall a. OrdList a
nilOL
                , usedRegs :: [RegWithFormat]
usedRegs        = []
                }
    others <- loadArgsSysV config rest
    return $ this_arg S.<> others

  where
    platform :: Platform
platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config
    arg_fmt :: Format
arg_fmt = CmmType -> Format
cmmTypeFormat (Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
arg)

-- | Compute all things that will need to be pushed to the stack.
--
-- On Windows, an argument passed by reference will require two pieces of data:
--
--  - the reference (returned in the first position)
--  - the actual data (returned in the second position)
computeWinPushArgs :: Platform -> [CmmExpr] -> ([RawStackArg], [CmmExpr])
computeWinPushArgs :: Platform -> [CmmExpr] -> ([RawStackArg], [CmmExpr])
computeWinPushArgs Platform
platform = [CmmExpr] -> ([RawStackArg], [CmmExpr])
go
  where
    go :: [CmmExpr] -> ([RawStackArg], [CmmExpr])
    go :: [CmmExpr] -> ([RawStackArg], [CmmExpr])
go [] = ([], [])
    go (CmmExpr
arg:[CmmExpr]
args) =
      let
        arg_size :: Int
arg_size = Platform -> CmmExpr -> Int
argSize Platform
platform CmmExpr
arg
        (RawStackArg
this_arg, [CmmExpr] -> [CmmExpr]
add_this_arg)
          | Int
arg_size Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
8
          = ( StackRef -> Int -> RawStackArg
RawStackArgRef StackRef
OnStack Int
arg_size, (CmmExpr
arg CmmExpr -> [CmmExpr] -> [CmmExpr]
forall a. a -> [a] -> [a]
:) )
          | Bool
otherwise
          = ( CmmExpr -> RawStackArg
RawStackArg CmmExpr
arg, [CmmExpr] -> [CmmExpr]
forall a. a -> a
id )
        ([RawStackArg]
stk_args, [CmmExpr]
stk_data) = [CmmExpr] -> ([RawStackArg], [CmmExpr])
go [CmmExpr]
args
      in
        (RawStackArg
this_argRawStackArg -> [RawStackArg] -> [RawStackArg]
forall a. a -> [a] -> [a]
:[RawStackArg]
stk_args, [CmmExpr] -> [CmmExpr]
add_this_arg [CmmExpr]
stk_data)

-- | Load arguments into available registers (Windows C X64 calling convention).
loadArgsWin :: NCGConfig -> [CmmExpr] -> StateT [(Reg,Reg)] NatM LoadArgs
loadArgsWin :: NCGConfig -> [CmmExpr] -> StateT [(Reg, Reg)] NatM LoadArgs
loadArgsWin NCGConfig
_ [] = LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs
forall a. a -> StateT [(Reg, Reg)] NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return LoadArgs
forall a. Monoid a => a
mempty
loadArgsWin NCGConfig
config (CmmExpr
arg:[CmmExpr]
rest) = do
  regs <- StateT [(Reg, Reg)] NatM [(Reg, Reg)]
forall (m :: * -> *) s. Monad m => StateT s m s
get
  case regs of
    (Reg, Reg)
reg:[(Reg, Reg)]
regs' -> do
      [(Reg, Reg)] -> StateT [(Reg, Reg)] NatM ()
forall (m :: * -> *) s. Monad m => s -> StateT s m ()
put [(Reg, Reg)]
regs'
      this_arg <- NatM LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs
forall (m :: * -> *) a. Monad m => m a -> StateT [(Reg, Reg)] m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (NatM LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs)
-> NatM LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs
forall a b. (a -> b) -> a -> b
$ (Reg, Reg) -> NatM LoadArgs
load_arg_win (Reg, Reg)
reg
      rest <- loadArgsWin config rest
      return $ this_arg S.<> rest
    [] -> do
      -- No more registers available: pass all (remaining) arguments on the stack.
      let ([RawStackArg]
stk_args, [CmmExpr]
data_args) = Platform -> [CmmExpr] -> ([RawStackArg], [CmmExpr])
computeWinPushArgs Platform
platform (CmmExpr
argCmmExpr -> [CmmExpr] -> [CmmExpr]
forall a. a -> [a] -> [a]
:[CmmExpr]
rest)
      LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs
forall a. a -> StateT [(Reg, Reg)] NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs)
-> LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs
forall a b. (a -> b) -> a -> b
$
        LoadArgs
          { stackArgs :: [RawStackArg]
stackArgs       = [RawStackArg]
stk_args
          , stackDataArgs :: [CmmExpr]
stackDataArgs   = [CmmExpr]
data_args
          , assignArgsCode :: InstrBlock
assignArgsCode  = InstrBlock
forall a. OrdList a
nilOL
          , usedRegs :: [RegWithFormat]
usedRegs        = []
          }
  where
    platform :: Platform
platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config
    arg_fmt :: Format
arg_fmt = CmmType -> Format
cmmTypeFormat (CmmType -> Format) -> CmmType -> Format
forall a b. (a -> b) -> a -> b
$ Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
arg
    load_arg_win :: (Reg, Reg) -> NatM LoadArgs
load_arg_win (Reg
ireg, Reg
freg)
      | Format -> Bool
isVecFormat Format
arg_fmt
       -- Vectors are passed by reference.
       -- See Note [The Windows X64 C calling convention].
      = do LoadArgs -> NatM LoadArgs
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (LoadArgs -> NatM LoadArgs) -> LoadArgs -> NatM LoadArgs
forall a b. (a -> b) -> a -> b
$
             LoadArgs
                -- Pass the reference in a register,
                -- and the argument data on the stack.
                { stackArgs :: [RawStackArg]
stackArgs       = [StackRef -> Int -> RawStackArg
RawStackArgRef (Reg -> StackRef
InReg Reg
ireg) (Platform -> CmmExpr -> Int
argSize Platform
platform CmmExpr
arg)]
                , stackDataArgs :: [CmmExpr]
stackDataArgs   = [CmmExpr
arg] -- we don't yet know where the data will reside,
                , assignArgsCode :: InstrBlock
assignArgsCode  = InstrBlock
forall a. OrdList a
nilOL -- so we defer computing the reference and storing it
                                          -- in the register until later
                , usedRegs :: [RegWithFormat]
usedRegs        = [Reg -> Format -> RegWithFormat
RegWithFormat Reg
ireg Format
II64]
                }
      | Bool
otherwise
      = do let arg_reg :: Reg
arg_reg
                  | Format -> Bool
isFloatFormat Format
arg_fmt
                  = Reg
freg
                  | Bool
otherwise
                  = Reg
ireg
           assign_code <- CmmExpr -> Reg -> NatM InstrBlock
loadArgIntoReg CmmExpr
arg Reg
arg_reg
           -- Recall that, for varargs, we must pass floating-point
           -- arguments in both fp and integer registers.
           let (assign_code', regs')
                | isFloatFormat arg_fmt =
                    ( assign_code `snocOL` MOVD FF64 (OpReg freg) (OpReg ireg),
                      [ RegWithFormat freg FF64
                      , RegWithFormat ireg II64 ])
                | otherwise = (assign_code, [RegWithFormat ireg II64])
           return $
             LoadArgs
               { stackArgs       = [] -- passed in register
               , stackDataArgs   = []
               , assignArgsCode = assign_code'
               , usedRegs = regs'
               }

-- | Load an argument into a register.
--
-- Assumes that the expression does not contain any MachOps,
-- as per Note [Evaluate C-call arguments before placing in destination registers].
loadArgIntoReg :: CmmExpr -> Reg -> NatM InstrBlock
loadArgIntoReg :: CmmExpr -> Reg -> NatM InstrBlock
loadArgIntoReg CmmExpr
arg Reg
reg = do
  Bool -> NatM () -> NatM ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool
debugIsOn Bool -> Bool -> Bool
&& CmmExpr -> Bool
loadIntoRegMightClobberOtherReg CmmExpr
arg) (NatM () -> NatM ()) -> NatM () -> NatM ()
forall a b. (a -> b) -> a -> b
$ do
    platform <- NatM Platform
getPlatform
    massertPpr False $
      vcat [ text "loadArgIntoReg: arg might contain MachOp"
           , text "arg:" <+> pdoc platform arg ]
  arg_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
arg
  return $ arg_code reg

-- -----------------------------------------------------------------------------
-- Pushing arguments onto the stack for 64-bit C calls.

-- | The size of an argument (in bytes).
--
-- Never smaller than the platform word width.
argSize :: Platform -> CmmExpr -> Int
argSize :: Platform -> CmmExpr -> Int
argSize Platform
platform CmmExpr
arg =
  Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Platform -> Int
platformWordSizeInBytes Platform
platform) (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$
    Width -> Int
widthInBytes (CmmType -> Width
typeWidth (CmmType -> Width) -> CmmType -> Width
forall a b. (a -> b) -> a -> b
$ Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
arg)

-- | Add the given amount of padding on the stack.
addStackPadding :: Int -- ^ padding (in bytes)
                -> NatM InstrBlock
addStackPadding :: Int -> NatM InstrBlock
addStackPadding Int
pad_bytes
  | Int
pad_bytes Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
  = InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a
nilOL
  | Bool
otherwise
  = do delta <- NatM Int
getDeltaNat
       setDeltaNat (delta - pad_bytes)
       return $
         toOL [ SUB II64 (OpImm (ImmInt pad_bytes)) (OpReg rsp)
              , DELTA (delta - pad_bytes)
              ]

-- | Push one argument directly to the stack (by value).
--
-- Assumes the current stack pointer fulfills any necessary alignment requirements.
pushArgByValue :: NCGConfig -> CmmExpr -> NatM InstrBlock
pushArgByValue :: NCGConfig -> CmmExpr -> NatM InstrBlock
pushArgByValue NCGConfig
config CmmExpr
arg
   -- For 64-bit integer arguments, use PUSH II64.
   --
   -- Note: we *must not* do this for smaller arguments.
   -- For example, if we tried to push an argument such as @CmmLoad addr W32 aln@,
   -- we could end up reading unmapped memory and segfaulting.
   | Format -> Bool
isIntFormat Format
fmt
   , Format -> Int
formatInBytes Format
fmt Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
8
   = do
     (arg_op, arg_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getOperand CmmExpr
arg
     delta <- getDeltaNat
     setDeltaNat (delta-arg_size)
     return $
       arg_code `appOL` toOL
       [ PUSH II64 arg_op
       , DELTA (delta-arg_size) ]

   | Bool
otherwise
   = do
     (arg_reg, arg_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
arg
     delta <- getDeltaNat
     setDeltaNat (delta-arg_size)
     return $ arg_code `appOL` toOL
        [ SUB (intFormat (wordWidth platform)) (OpImm (ImmInt arg_size)) (OpReg rsp)
        , DELTA (delta-arg_size)
        , movInstr config fmt (OpReg arg_reg) (OpAddr (spRel platform 0)) ]

    where
      platform :: Platform
platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config
      arg_size :: Int
arg_size = Platform -> CmmExpr -> Int
argSize Platform
platform CmmExpr
arg
      arg_rep :: CmmType
arg_rep = Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
arg
      fmt :: Format
fmt = CmmType -> Format
cmmTypeFormat CmmType
arg_rep

-- | Load an argument into a register or push it to the stack.
loadOrPushArg :: NCGConfig -> (StackArg, Maybe Int) -> NatM (InstrBlock, InstrBlock)
loadOrPushArg :: NCGConfig -> (StackArg, Maybe Int) -> NatM (InstrBlock, InstrBlock)
loadOrPushArg NCGConfig
config (StackArg
stk_arg, Maybe Int
mb_off) =
  case StackArg
stk_arg of
    StackArg CmmExpr
arg Int
pad -> do
      push_code <- NCGConfig -> CmmExpr -> NatM InstrBlock
pushArgByValue NCGConfig
config CmmExpr
arg
      pad_code  <- addStackPadding pad
      return (nilOL, push_code `appOL` pad_code)
    StackArgRef { stackRef :: StackArg -> StackRef
stackRef = StackRef
ref } ->
      case StackRef
ref of
        -- Pass the reference in a register
        InReg Reg
ireg ->
          (InstrBlock, InstrBlock) -> NatM (InstrBlock, InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Format -> Operand -> Operand -> Instr
LEA Format
II64 (AddrMode -> Operand
OpAddr (Platform -> Int -> AddrMode
spRel Platform
platform Int
off)) (Reg -> Operand
OpReg Reg
ireg), InstrBlock
forall a. OrdList a
nilOL)
        -- Pass the reference on the stack
        OnStack {} -> do
          tmp <- Format -> NatM Reg
getNewRegNat Format
II64
          delta <- getDeltaNat
          setDeltaNat (delta-arg_ref_size)
          let push_code = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL
                [ Format -> Operand -> Operand -> Instr
SUB (Width -> Format
intFormat (Platform -> Width
wordWidth Platform
platform)) (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
arg_ref_size)) (Reg -> Operand
OpReg Reg
rsp)
                , Int -> Instr
DELTA (Int
deltaInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
arg_ref_size)
                , Format -> Operand -> Operand -> Instr
LEA Format
II64 (AddrMode -> Operand
OpAddr (Platform -> Int -> AddrMode
spRel Platform
platform Int
off)) (Reg -> Operand
OpReg Reg
tmp)
                , Format -> Operand -> Operand -> Instr
MOV Format
II64 (Reg -> Operand
OpReg Reg
tmp) (AddrMode -> Operand
OpAddr (Platform -> Int -> AddrMode
spRel Platform
platform Int
0)) ]
          return (nilOL, push_code)
      where off :: Int
off = String -> Maybe Int -> Int
forall a. HasDebugCallStack => String -> Maybe a -> a
expectJust String
"push_arg_win offset" Maybe Int
mb_off
    where
      arg_ref_size :: Int
arg_ref_size = Int
8 -- passing a reference to the argument
      platform :: Platform
platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config

-- | Push arguments to the stack, right to left.
--
-- On Windows, some arguments may need to be passed by reference,
-- which requires separately passing the data and the reference.
-- See Note [The Windows X64 C calling convention].
pushArgs :: NCGConfig
         -> [RawStackArg]
            -- ^ arguments proper (i.e. don't include the data for arguments passed by reference)
         -> [StackArg]
            -- ^ arguments we are passing on the stack
         -> NatM (InstrBlock, InstrBlock)
pushArgs :: NCGConfig
-> [RawStackArg] -> [StackArg] -> NatM (InstrBlock, InstrBlock)
pushArgs NCGConfig
config [RawStackArg]
proper_args [StackArg]
all_stk_args
  = do { let
            vec_offs :: [Maybe Int]
            vec_offs :: [Maybe Int]
vec_offs
              | Platform -> OS
platformOS Platform
platform OS -> OS -> Bool
forall a. Eq a => a -> a -> Bool
== OS
OSMinGW32
              = Int -> [StackArg] -> [Maybe Int]
go Int
stack_arg_size [StackArg]
all_stk_args
              | Bool
otherwise
              = Maybe Int -> [Maybe Int]
forall a. a -> [a]
repeat Maybe Int
forall a. Maybe a
Nothing

    ---------------------
    -- Windows-only code

            -- Size of the arguments we are passing on the stack, counting only
            -- the reference part for arguments passed by reference.
            stack_arg_size :: Int
stack_arg_size = Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (RawStackArg -> Bool) -> [RawStackArg] -> Int
forall a. (a -> Bool) -> [a] -> Int
count RawStackArg -> Bool
not_in_reg [RawStackArg]
proper_args
            not_in_reg :: RawStackArg -> Bool
not_in_reg (RawStackArg {}) = Bool
True
            not_in_reg (RawStackArgRef { stackRef :: RawStackArg -> StackRef
stackRef = StackRef
ref }) =
              case StackRef
ref of
                InReg {} -> Bool
False
                OnStack {} -> Bool
True

            -- Check an offset is valid (8-byte aligned), for assertions.
            ok :: a -> Bool
ok a
off = a
off a -> a -> a
forall a. Integral a => a -> a -> a
`rem` a
8 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0

            -- Tricky code: compute the stack offset to the vector data
            -- for this argument.
            --
            -- If you're confused, Note [The Windows X64 C calling convention]
            -- contains a helpful diagram.
            go :: Int -> [StackArg] -> [Maybe Int]
            go :: Int -> [StackArg] -> [Maybe Int]
go Int
_ [] = []
            go Int
off (StackArg
stk_arg:[StackArg]
args) =
              Bool -> SDoc -> [Maybe Int] -> [Maybe Int]
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr (Int -> Bool
forall {a}. Integral a => a -> Bool
ok Int
off) (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"unaligned offset:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Int -> SDoc
forall a. Outputable a => a -> SDoc
ppr Int
off) ([Maybe Int] -> [Maybe Int]) -> [Maybe Int] -> [Maybe Int]
forall a b. (a -> b) -> a -> b
$
              case StackArg
stk_arg of
                StackArg {} ->
                  -- Only account for the stack pointer movement.
                  let off' :: Int
off' = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
- Platform -> StackArg -> Int
stackArgSpace Platform
platform StackArg
stk_arg
                  in Maybe Int
forall a. Maybe a
Nothing Maybe Int -> [Maybe Int] -> [Maybe Int]
forall a. a -> [a] -> [a]
: Int -> [StackArg] -> [Maybe Int]
go Int
off' [StackArg]
args
                StackArgRef
                  { stackRefArgSize :: StackArg -> Int
stackRefArgSize    = Int
data_size
                  , stackRefArgPadding :: StackArg -> Int
stackRefArgPadding = Int
data_pad } ->
                  Bool -> SDoc -> [Maybe Int] -> [Maybe Int]
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr (Int -> Bool
forall {a}. Integral a => a -> Bool
ok Int
data_size) (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"unaligned data size:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Int -> SDoc
forall a. Outputable a => a -> SDoc
ppr Int
data_size) ([Maybe Int] -> [Maybe Int]) -> [Maybe Int] -> [Maybe Int]
forall a b. (a -> b) -> a -> b
$
                  Bool -> SDoc -> [Maybe Int] -> [Maybe Int]
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr (Int -> Bool
forall {a}. Integral a => a -> Bool
ok Int
data_pad) (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"unaligned data padding:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Int -> SDoc
forall a. Outputable a => a -> SDoc
ppr Int
data_pad) ([Maybe Int] -> [Maybe Int]) -> [Maybe Int] -> [Maybe Int]
forall a b. (a -> b) -> a -> b
$
                  let off' :: Int
off' = Int
off
                        -- Next piece of data is after the data for this reference
                           Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
data_size Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
data_pad
                        -- ... and account for the stack pointer movement.
                           Int -> Int -> Int
forall a. Num a => a -> a -> a
- Platform -> StackArg -> Int
stackArgSpace Platform
platform StackArg
stk_arg
                  in Int -> Maybe Int
forall a. a -> Maybe a
Just (Int
data_pad Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
off) Maybe Int -> [Maybe Int] -> [Maybe Int]
forall a. a -> [a] -> [a]
: Int -> [StackArg] -> [Maybe Int]
go Int
off' [StackArg]
args

    -- end of Windows-only code
    ----------------------------

         -- Push the stack arguments (right to left),
         -- including both the reference and the data for arguments passed by reference.
       ; (load_regs, push_args) <- ((StackArg, Maybe Int) -> NatM (InstrBlock, InstrBlock))
-> [(StackArg, Maybe Int)] -> NatM (InstrBlock, InstrBlock)
forall (m :: * -> *) (t :: * -> *) b a.
(Applicative m, Foldable t, Monoid b) =>
(a -> m b) -> t a -> m b
foldMapM (NCGConfig -> (StackArg, Maybe Int) -> NatM (InstrBlock, InstrBlock)
loadOrPushArg NCGConfig
config) ([(StackArg, Maybe Int)] -> [(StackArg, Maybe Int)]
forall a. [a] -> [a]
reverse ([(StackArg, Maybe Int)] -> [(StackArg, Maybe Int)])
-> [(StackArg, Maybe Int)] -> [(StackArg, Maybe Int)]
forall a b. (a -> b) -> a -> b
$ [StackArg] -> [Maybe Int] -> [(StackArg, Maybe Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [StackArg]
all_stk_args [Maybe Int]
vec_offs)
       ; return (load_regs, push_args) }
  where
    platform :: Platform
platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config

{- Note [The Windows X64 C calling convention]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Here are a few facts about the Windows X64 C calling convention that
are important:

  - any argument larger than 8 bytes must be passed by reference,
    and arguments smaller than 8 bytes are padded to 8 bytes.

  - the first four arguments are passed in registers:
      - floating-point scalar arguments are passed in %xmm0, %xmm1, %xmm2, %xmm3
      - other arguments are passed in %rcx, %rdx, %r8, %r9
        (this includes vector arguments, passed by reference)

    For variadic functions, it is additionally expected that floating point
    scalar arguments are copied to the corresponding integer register, e.g.
    the data in xmm2 should also be copied to r8.

    There is no requirement about setting %al like there is for the
    System V AMD64 ABI.

  - subsequent arguments are passed on the stack.

There are also alignment requirements:

  - the data for vectors must be aligned to the size of the vector,
    e.g. a 32 byte vector must be aligned on a 32 byte boundary,

  - the call instruction must be aligned to 16 bytes.
  (This differs from the System V AMD64 ABI, which mandates that the call
  instruction must be aligned to 32 bytes if there are any 32 byte vectors
  passed on the stack.)

This motivates our handling of vector values. Suppose we have a function call
with many arguments, several of them being vectors. We proceed as follows:

 - Add some padding, if necessary, to ensure the stack, when executing the call
    instruction, is 16-byte aligned. Whether this padding is necessary depends
    on what happens next. (Recall also that we start off at 8 (mod 16) alignment,
    as per Note [Stack Alignment on X86] in rts/StgCRun.c)
  - Push all the vectors to the stack first, adding padding after each one
    if necessary.
  - Then push the arguments:
      - for non-vectors, proceed as usual,
      - for vectors, push the address of the vector data we pushed above.
  - Then assign the registers:
      - for non-vectors, proceed as usual,
      - for vectors, store the address in a general-purpose register, as opposed
        to storing the data in an xmm register.

For a concrete example, suppose we have a call of the form:

  f x1 x2 x3 x4 x5 x6 x7

in which:

  - x2, x3, x5 and x7 are 16 byte vectors
  - the other arguments are all 8 byte wide

Now, x1, x2, x3, x4 will get passed in registers, except that we pass
x2 and x3 by reference, because they are vectors. We proceed as follows:

  - push the vectors to the stack: x7, x5, x3, x2 (in that order)
  - push the stack arguments in order: addr(x7), x6, addr(x5)
  - load the remaining arguments into registers: x4, addr(x3), addr(x2), x1

The tricky part is to get the right offsets for the addresses of the vector
data. The following visualisation will hopefully clear things up:

                                  ┌──┐
                                  │▓▓│ ─── padding to align the call instruction
                      ╭─╴         ╞══╡     (ensures Sp, below, is 16-byte aligned)
                      │           │  │
                      │  x7  ───╴ │  │
                      │           ├──┤
                      │           │  │
                      │  x5  ───╴ │  │
                      │           ├──┤
     vector data  ────┤           │  │
(individually padded) │  x3  ───╴ │  │
                      │           ├──┤
                      │           │  │
                      │  x2  ───╴ │  │
                      │           ├┄┄┤
                      │           │▓▓│ ─── padding to align x2 to 16 bytes
               ╭─╴    ╰─╴         ╞══╡
               │    addr(x7) ───╴ │  │    ╭─ from here: x7 is +64
               │                  ├──┤ ╾──╯    = 64 (position of x5)
     stack  ───┤         x6  ───╴ │  │         + 16 (size of x5) + 0 (padding of x7)
   arguments   │                  ├──┤         - 2 * 8 (x7 is 2 arguments higher than x5)
               │    addr(x5) ───╴ │  │
               ╰─╴            ╭─╴ ╞══╡ ╾─── from here:
                              │   │  │       - x2 is +32 = 24 (stack_arg_size) + 8 (padding of x2)
                   shadow  ───┤   │  │       - x3 is +48 = 32 (position of x2) + 16 (size of x2) + 0 (padding of x3)
                    space     │   │  │       - x5 is +64 = 48 (position of x3) + 16 (size of x3) + 0 (padding of x5)
                              │   │  │
                              ╰─╴ └──┘ ╾─── Sp

This is all tested in the simd013 test.
-}

-- -----------------------------------------------------------------------------
-- Generating a table-branch

{-
Note [Sub-word subtlety during jump-table indexing]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Offset the index by the start index of the jump table.
It's important that we do this *before* the widening below. To see
why, consider a switch with a sub-word, signed discriminant such as:

    switch [-5...+2] x::I16 {
        case -5: ...
        ...
        case +2: ...
    }

Consider what happens if we offset *after* widening in the case that
x=-4:

                                         // x == -4 == 0xfffc::I16
    indexWidened = UU_Conv(x);           // == 0xfffc::I64
    indexExpr    = indexWidened - (-5);  // == 0x10000::I64

This index is clearly nonsense given that the jump table only has
eight entries.

By contrast, if we widen *after* we offset then we get the correct
index (1),

                                         // x == -4 == 0xfffc::I16
    indexOffset  = x - (-5);             // == 1::I16
    indexExpr    = UU_Conv(indexOffset); // == 1::I64

See #21186.
-}

genSwitch :: CmmExpr -> SwitchTargets -> NatM InstrBlock

genSwitch :: CmmExpr -> SwitchTargets -> NatM InstrBlock
genSwitch CmmExpr
expr SwitchTargets
targets = do
  config <- NatM NCGConfig
getConfig
  let platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config
      expr_w = Platform -> CmmExpr -> Width
cmmExprWidth Platform
platform CmmExpr
expr
      indexExpr0 = Platform -> CmmExpr -> Int -> CmmExpr
cmmOffset Platform
platform CmmExpr
expr Int
offset
      -- We widen to a native-width register because we cannot use arbitrary sizes
      -- in x86 addressing modes.
      -- See Note [Sub-word subtlety during jump-table indexing].
      indexExpr = MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp
        (Width -> Width -> MachOp
MO_UU_Conv Width
expr_w (Platform -> Width
platformWordWidth Platform
platform))
        [CmmExpr
indexExpr0]
  if ncgPIC config
  then do
        (reg,e_code) <- getNonClobberedReg indexExpr
           -- getNonClobberedReg because it needs to survive across t_code
        lbl <- getNewLabelNat
        let is32bit = Platform -> Bool
target32Bit Platform
platform
            os = Platform -> OS
platformOS Platform
platform
            -- Might want to use .rodata.<function we're in> instead, but as
            -- long as it's something unique it'll work out since the
            -- references to the jump table are in the appropriate section.
            rosection = case OS
os of
              -- on Mac OS X/x86_64, put the jump table in the text section to
              -- work around a limitation of the linker.
              -- ld64 is unable to handle the relocations for
              --     .quad L1 - L0
              -- if L0 is not preceded by a non-anonymous label in its section.
              OS
OSDarwin | Bool -> Bool
not Bool
is32bit -> SectionType -> CLabel -> Section
Section SectionType
Text CLabel
lbl
              OS
_ -> SectionType -> CLabel -> Section
Section SectionType
ReadOnlyData CLabel
lbl
        dynRef <- cmmMakeDynamicReference config DataReference lbl
        (tableReg,t_code) <- getSomeReg $ dynRef
        let op = AddrMode -> Operand
OpAddr (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
tableReg)
                                       (Reg -> Int -> EAIndex
EAIndex Reg
reg (Platform -> Int
platformWordSizeInBytes Platform
platform)) (Int -> Imm
ImmInt Int
0))

        return $ e_code `appOL` t_code `appOL` toOL [
                                ADD (intFormat (platformWordWidth platform)) op (OpReg tableReg),
                                JMP_TBL (OpReg tableReg) ids rosection lbl
                       ]
  else do
        (reg,e_code) <- getSomeReg indexExpr
        lbl <- getNewLabelNat
        let is32bit = Platform -> Bool
target32Bit Platform
platform
        if is32bit
          then let op = AddrMode -> Operand
OpAddr (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex EABase
EABaseNone (Reg -> Int -> EAIndex
EAIndex Reg
reg (Platform -> Int
platformWordSizeInBytes Platform
platform)) (CLabel -> Imm
ImmCLbl CLabel
lbl))
                   jmp_code = Operand -> [Maybe JumpDest] -> Section -> CLabel -> Instr
JMP_TBL Operand
op [Maybe JumpDest]
ids (SectionType -> CLabel -> Section
Section SectionType
ReadOnlyData CLabel
lbl) CLabel
lbl
               in return $ e_code `appOL` unitOL jmp_code
          else do
            -- See Note [%rip-relative addressing on x86-64].
            tableReg <- getNewRegNat (intFormat (platformWordWidth platform))
            targetReg <- getNewRegNat (intFormat (platformWordWidth platform))
            let op = AddrMode -> Operand
OpAddr (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
tableReg) (Reg -> Int -> EAIndex
EAIndex Reg
reg (Platform -> Int
platformWordSizeInBytes Platform
platform)) (Int -> Imm
ImmInt Int
0))
                fmt = Bool -> Format
archWordFormat Bool
is32bit
                code = InstrBlock
e_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL
                    [ Format -> Operand -> Operand -> Instr
LEA Format
fmt (AddrMode -> Operand
OpAddr (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex EABase
EABaseRip EAIndex
EAIndexNone (CLabel -> Imm
ImmCLbl CLabel
lbl))) (Reg -> Operand
OpReg Reg
tableReg)
                    , Format -> Operand -> Operand -> Instr
MOV Format
fmt Operand
op (Reg -> Operand
OpReg Reg
targetReg)
                    , Operand -> [Maybe JumpDest] -> Section -> CLabel -> Instr
JMP_TBL (Reg -> Operand
OpReg Reg
targetReg) [Maybe JumpDest]
ids (SectionType -> CLabel -> Section
Section SectionType
ReadOnlyData CLabel
lbl) CLabel
lbl
                    ]
            return code
  where
    (Int
offset, [Maybe Label]
blockIds) = SwitchTargets -> (Int, [Maybe Label])
switchTargetsToTable SwitchTargets
targets
    ids :: [Maybe JumpDest]
ids = (Maybe Label -> Maybe JumpDest)
-> [Maybe Label] -> [Maybe JumpDest]
forall a b. (a -> b) -> [a] -> [b]
map ((Label -> JumpDest) -> Maybe Label -> Maybe JumpDest
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Label -> JumpDest
DestBlockId) [Maybe Label]
blockIds

generateJumpTableForInstr :: NCGConfig -> Instr -> Maybe (NatCmmDecl (Alignment, RawCmmStatics) Instr)
generateJumpTableForInstr :: NCGConfig
-> Instr -> Maybe (NatCmmDecl (Alignment, RawCmmStatics) Instr)
generateJumpTableForInstr NCGConfig
config (JMP_TBL Operand
_ [Maybe JumpDest]
ids Section
section CLabel
lbl)
    = let getBlockId :: JumpDest -> Label
getBlockId (DestBlockId Label
id) = Label
id
          getBlockId JumpDest
_ = String -> Label
forall a. HasCallStack => String -> a
panic String
"Non-Label target in Jump Table"
          blockIds :: [Maybe Label]
blockIds = (Maybe JumpDest -> Maybe Label)
-> [Maybe JumpDest] -> [Maybe Label]
forall a b. (a -> b) -> [a] -> [b]
map ((JumpDest -> Label) -> Maybe JumpDest -> Maybe Label
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap JumpDest -> Label
getBlockId) [Maybe JumpDest]
ids
      in NatCmmDecl (Alignment, RawCmmStatics) Instr
-> Maybe (NatCmmDecl (Alignment, RawCmmStatics) Instr)
forall a. a -> Maybe a
Just (NCGConfig
-> [Maybe Label]
-> Section
-> CLabel
-> NatCmmDecl (Alignment, RawCmmStatics) Instr
forall h g.
NCGConfig
-> [Maybe Label]
-> Section
-> CLabel
-> GenCmmDecl (Alignment, RawCmmStatics) h g
createJumpTable NCGConfig
config [Maybe Label]
blockIds Section
section CLabel
lbl)
generateJumpTableForInstr NCGConfig
_ Instr
_ = Maybe (NatCmmDecl (Alignment, RawCmmStatics) Instr)
forall a. Maybe a
Nothing

createJumpTable :: NCGConfig -> [Maybe BlockId] -> Section -> CLabel
                -> GenCmmDecl (Alignment, RawCmmStatics) h g
createJumpTable :: forall h g.
NCGConfig
-> [Maybe Label]
-> Section
-> CLabel
-> GenCmmDecl (Alignment, RawCmmStatics) h g
createJumpTable NCGConfig
config [Maybe Label]
ids Section
section CLabel
lbl
    = let jumpTable :: [CmmStatic]
jumpTable
            | NCGConfig -> Bool
ncgPIC NCGConfig
config =
                  let ww :: Width
ww = NCGConfig -> Width
ncgWordWidth NCGConfig
config
                      jumpTableEntryRel :: Maybe Label -> CmmStatic
jumpTableEntryRel Maybe Label
Nothing
                          = CmmLit -> CmmStatic
CmmStaticLit (Integer -> Width -> CmmLit
CmmInt Integer
0 Width
ww)
                      jumpTableEntryRel (Just Label
blockid)
                          = CmmLit -> CmmStatic
CmmStaticLit (CLabel -> CLabel -> Int -> Width -> CmmLit
CmmLabelDiffOff CLabel
blockLabel CLabel
lbl Int
0 Width
ww)
                          where blockLabel :: CLabel
blockLabel = Label -> CLabel
blockLbl Label
blockid
                  in (Maybe Label -> CmmStatic) -> [Maybe Label] -> [CmmStatic]
forall a b. (a -> b) -> [a] -> [b]
map Maybe Label -> CmmStatic
jumpTableEntryRel [Maybe Label]
ids
            | Bool
otherwise = (Maybe Label -> CmmStatic) -> [Maybe Label] -> [CmmStatic]
forall a b. (a -> b) -> [a] -> [b]
map (NCGConfig -> Maybe Label -> CmmStatic
jumpTableEntry NCGConfig
config) [Maybe Label]
ids
      in Section
-> (Alignment, RawCmmStatics)
-> GenCmmDecl (Alignment, RawCmmStatics) h g
forall d h g. Section -> d -> GenCmmDecl d h g
CmmData Section
section (Int -> Alignment
mkAlignment Int
1, CLabel -> [CmmStatic] -> RawCmmStatics
forall (rawOnly :: Bool).
CLabel -> [CmmStatic] -> GenCmmStatics rawOnly
CmmStaticsRaw CLabel
lbl [CmmStatic]
jumpTable)

extractUnwindPoints :: [Instr] -> [UnwindPoint]
extractUnwindPoints :: [Instr] -> [UnwindPoint]
extractUnwindPoints [Instr]
instrs =
    [ CLabel -> Map GlobalReg (Maybe UnwindExpr) -> UnwindPoint
UnwindPoint CLabel
lbl Map GlobalReg (Maybe UnwindExpr)
unwinds | UNWIND CLabel
lbl Map GlobalReg (Maybe UnwindExpr)
unwinds <- [Instr]
instrs]

-- -----------------------------------------------------------------------------
-- 'condIntReg' and 'condFltReg': condition codes into registers

-- Turn those condition codes into integers now (when they appear on
-- the right hand side of an assignment).
--
-- (If applicable) Do not fill the delay slots here; you will confuse the
-- register allocator.

condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register

condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
cond CmmExpr
x CmmExpr
y = do
  CondCode _ cond cond_code <- Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condIntCode Cond
cond CmmExpr
x CmmExpr
y
  tmp <- getNewRegNat II8
  let
        code Reg
dst = InstrBlock
cond_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                    Cond -> Operand -> Instr
SETCC Cond
cond (Reg -> Operand
OpReg Reg
tmp),
                    Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (Reg -> Operand
OpReg Reg
tmp) (Reg -> Operand
OpReg Reg
dst)
                  ]
  return (Any II32 code)


-- Note [SSE Parity Checks]
-- ~~~~~~~~~~~~~~~~~~~~~~~~
-- We have to worry about unordered operands (eg. comparisons
-- against NaN).  If the operands are unordered, the comparison
-- sets the parity flag, carry flag and zero flag.
-- All comparisons are supposed to return false for unordered
-- operands except for !=, which returns true.
--
-- Optimisation: we don't have to test the parity flag if we
-- know the test has already excluded the unordered case: eg >
-- and >= test for a zero carry flag, which can only occur for
-- ordered operands.
--
-- By reversing comparisons we can avoid testing the parity
-- for < and <= as well. If any of the arguments is an NaN we
-- return false either way. If both arguments are valid then
-- x <= y  <->  y >= x  holds. So it's safe to swap these.
--
-- We invert the condition inside getRegister'and  getCondCode
-- which should cover all invertable cases.
-- All other functions translating FP comparisons to assembly
-- use these to two generate the comparison code.
--
-- As an example consider a simple check:
--
-- func :: Float -> Float -> Int
-- func x y = if x < y then 1 else 0
--
-- Which in Cmm gives the floating point comparison.
--
--  if (%MO_F_Lt_W32(F1, F2)) goto c2gg; else goto c2gf;
--
-- We used to compile this to an assembly code block like this:
-- _c2gh:
--  ucomiss %xmm2,%xmm1
--  jp _c2gf
--  jb _c2gg
--  jmp _c2gf
--
-- Where we have to introduce an explicit
-- check for unordered results (using jmp parity):
--
-- We can avoid this by exchanging the arguments and inverting the direction
-- of the comparison. This results in the sequence of:
--
--  ucomiss %xmm1,%xmm2
--  ja _c2g2
--  jmp _c2g1
--
-- Removing the jump reduces the pressure on the branch prediction system
-- and plays better with the uOP cache.

condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
cond CmmExpr
x CmmExpr
y = NatM Register
condFltReg_sse2
 where


  condFltReg_sse2 :: NatM Register
condFltReg_sse2 = do
    CondCode _ cond cond_code <- Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
cond CmmExpr
x CmmExpr
y
    tmp1 <- getNewRegNat (archWordFormat is32Bit)
    tmp2 <- getNewRegNat (archWordFormat is32Bit)
    let -- See Note [SSE Parity Checks]
        code Reg
dst =
           InstrBlock
cond_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
             (case Cond
cond of
                Cond
NE  -> Reg -> InstrBlock
or_unordered Reg
dst
                Cond
GU  -> Reg -> InstrBlock
plain_test   Reg
dst
                Cond
GEU -> Reg -> InstrBlock
plain_test   Reg
dst
                -- Use ASSERT so we don't break releases if these creep in.
                Cond
LTT -> Bool -> SDoc -> InstrBlock -> InstrBlock
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr Bool
False (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"Should have been turned into >") (InstrBlock -> InstrBlock) -> InstrBlock -> InstrBlock
forall a b. (a -> b) -> a -> b
$
                       Reg -> InstrBlock
and_ordered  Reg
dst
                Cond
LE  -> Bool -> SDoc -> InstrBlock -> InstrBlock
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr Bool
False (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"Should have been turned into >=") (InstrBlock -> InstrBlock) -> InstrBlock -> InstrBlock
forall a b. (a -> b) -> a -> b
$
                       Reg -> InstrBlock
and_ordered  Reg
dst
                Cond
_   -> Reg -> InstrBlock
and_ordered  Reg
dst)

        plain_test Reg
dst = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                    Cond -> Operand -> Instr
SETCC Cond
cond (Reg -> Operand
OpReg Reg
tmp1),
                    Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (Reg -> Operand
OpReg Reg
tmp1) (Reg -> Operand
OpReg Reg
dst)
                 ]
        or_unordered Reg
dst = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                    Cond -> Operand -> Instr
SETCC Cond
cond (Reg -> Operand
OpReg Reg
tmp1),
                    Cond -> Operand -> Instr
SETCC Cond
PARITY (Reg -> Operand
OpReg Reg
tmp2),
                    Format -> Operand -> Operand -> Instr
OR Format
II8 (Reg -> Operand
OpReg Reg
tmp1) (Reg -> Operand
OpReg Reg
tmp2),
                    Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (Reg -> Operand
OpReg Reg
tmp2) (Reg -> Operand
OpReg Reg
dst)
                  ]
        and_ordered Reg
dst = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                    Cond -> Operand -> Instr
SETCC Cond
cond (Reg -> Operand
OpReg Reg
tmp1),
                    Cond -> Operand -> Instr
SETCC Cond
NOTPARITY (Reg -> Operand
OpReg Reg
tmp2),
                    Format -> Operand -> Operand -> Instr
AND Format
II8 (Reg -> Operand
OpReg Reg
tmp1) (Reg -> Operand
OpReg Reg
tmp2),
                    Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (Reg -> Operand
OpReg Reg
tmp2) (Reg -> Operand
OpReg Reg
dst)
                  ]
    return (Any II32 code)


-- -----------------------------------------------------------------------------
-- 'trivial*Code': deal with trivial instructions

-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
-- Only look for constants on the right hand side, because that's
-- where the generic optimizer will have put them.

-- Similarly, for unary instructions, we don't have to worry about
-- matching an StInt as the argument, because genericOpt will already
-- have handled the constant-folding.


{-
The Rules of the Game are:

* You cannot assume anything about the destination register dst;
  it may be anything, including a fixed reg.

* You may compute an operand into a fixed reg, but you may not
  subsequently change the contents of that fixed reg.  If you
  want to do so, first copy the value either to a temporary
  or into dst.  You are free to modify dst even if it happens
  to be a fixed reg -- that's not your problem.

* You cannot assume that a fixed reg will stay live over an
  arbitrary computation.  The same applies to the dst reg.

* Temporary regs obtained from getNewRegNat are distinct from
  each other and from all other regs, and stay live over
  arbitrary computations.

--------------------

SDM's version of The Rules:

* If getRegister returns Any, that means it can generate correct
  code which places the result in any register, period.  Even if that
  register happens to be read during the computation.

  Corollary #1: this means that if you are generating code for an
  operation with two arbitrary operands, you cannot assign the result
  of the first operand into the destination register before computing
  the second operand.  The second operand might require the old value
  of the destination register.

  Corollary #2: A function might be able to generate more efficient
  code if it knows the destination register is a new temporary (and
  therefore not read by any of the sub-computations).

* If getRegister returns Any, then the code it generates may modify only:
        (a) fresh temporaries
        (b) the destination register
        (c) known registers (eg. %ecx is used by shifts)
  In particular, it may *not* modify global registers, unless the global
  register happens to be the destination register.
-}

trivialCode :: Width -> (Operand -> Operand -> Instr)
            -> Maybe (Operand -> Operand -> Instr)
            -> CmmExpr -> CmmExpr -> NatM Register
trivialCode :: Width
-> (Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialCode Width
width Operand -> Operand -> Instr
instr Maybe (Operand -> Operand -> Instr)
m CmmExpr
a CmmExpr
b
    = do platform <- NatM Platform
getPlatform
         trivialCode' platform width instr m a b

trivialCode' :: Platform -> Width -> (Operand -> Operand -> Instr)
             -> Maybe (Operand -> Operand -> Instr)
             -> CmmExpr -> CmmExpr -> NatM Register
trivialCode' :: Platform
-> Width
-> (Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialCode' Platform
platform Width
width Operand -> Operand -> Instr
_ (Just Operand -> Operand -> Instr
revinstr) (CmmLit CmmLit
lit_a) CmmExpr
b
  | Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
lit_a = do
  b_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
b
  let
       code Reg
dst
         = Reg -> InstrBlock
b_code Reg
dst InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
           Operand -> Operand -> Instr
revinstr (Imm -> Operand
OpImm (CmmLit -> Imm
litToImm CmmLit
lit_a)) (Reg -> Operand
OpReg Reg
dst)
  return (Any (intFormat width) code)

trivialCode' Platform
_ Width
width Operand -> Operand -> Instr
instr Maybe (Operand -> Operand -> Instr)
_ CmmExpr
a CmmExpr
b
  = Format
-> (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
genTrivialCode (Width -> Format
intFormat Width
width) Operand -> Operand -> Instr
instr CmmExpr
a CmmExpr
b

-- This is re-used for floating pt instructions too.
genTrivialCode :: Format -> (Operand -> Operand -> Instr)
               -> CmmExpr -> CmmExpr -> NatM Register
genTrivialCode :: Format
-> (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
genTrivialCode Format
rep Operand -> Operand -> Instr
instr CmmExpr
a CmmExpr
b = do
  (b_op, b_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand CmmExpr
b
  a_code <- getAnyReg a
  tmp <- getNewRegNat rep
  let
     -- We want the value of 'b' to stay alive across the computation of 'a'.
     -- But, we want to calculate 'a' straight into the destination register,
     -- because the instruction only has two operands (dst := dst `op` src).
     -- The troublesome case is when the result of 'b' is in the same register
     -- as the destination 'reg'.  In this case, we have to save 'b' in a
     -- new temporary across the computation of 'a'.
     code Reg
dst
        | Reg
dst Reg -> Operand -> Bool
`regClashesWithOp` Operand
b_op =
                InstrBlock
b_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
rep Operand
b_op (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                Reg -> InstrBlock
a_code Reg
dst InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                Operand -> Operand -> Instr
instr (Reg -> Operand
OpReg Reg
tmp) (Reg -> Operand
OpReg Reg
dst)
        | Bool
otherwise =
                InstrBlock
b_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
                Reg -> InstrBlock
a_code Reg
dst InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                Operand -> Operand -> Instr
instr Operand
b_op (Reg -> Operand
OpReg Reg
dst)
  return (Any rep code)

regClashesWithOp :: Reg -> Operand -> Bool
Reg
reg regClashesWithOp :: Reg -> Operand -> Bool
`regClashesWithOp` OpReg Reg
reg2   = Reg
reg Reg -> Reg -> Bool
forall a. Eq a => a -> a -> Bool
== Reg
reg2
Reg
reg `regClashesWithOp` OpAddr AddrMode
amode = (Reg -> Bool) -> [Reg] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Reg -> Reg -> Bool
forall a. Eq a => a -> a -> Bool
==Reg
reg) (AddrMode -> [Reg]
addrModeRegs AddrMode
amode)
Reg
_   `regClashesWithOp` Operand
_            = Bool
False

-- | Generate code for a fused multiply-add operation, of the form @± x * y ± z@,
-- with 3 operands (FMA3 instruction set).
genFMA3Code :: Length
            -> Width
            -> FMASign
            -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
genFMA3Code :: Int
-> Width
-> FMASign
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM Register
genFMA3Code Int
l Width
w FMASign
signs CmmExpr
x CmmExpr
y CmmExpr
z = do
  config <- NatM NCGConfig
getConfig
  -- For the FMA instruction, we want to compute x * y + z
  --
  -- There are three possible instructions we could emit:
  --
  --   - fmadd213 z y x, result in x, z can be a memory address
  --   - fmadd132 x z y, result in y, x can be a memory address
  --   - fmadd231 y x z, result in z, y can be a memory address
  --
  -- This suggests two possible optimisations:
  --
  --   - OPTIMISATION 1
  --     If one argument is an address, use the instruction that allows
  --     a memory address in that position.
  --
  --   - OPTIMISATION 2
  --     If one argument is in a fixed register, use the instruction that puts
  --     the result in that same register.
  --
  -- Currently we follow neither of these optimisations,
  -- opting to always use fmadd213 for simplicity.
  --
  -- We would like to compute the result directly into the requested register.
  -- To do so we must first compute `x` into the destination register. This is
  -- only possible if the other arguments don't use the destination register.
  -- We check for this and if there is a conflict we move the result only after
  -- the computation. See #24496 how this went wrong in the past.
  let rep
        | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1
        = Width -> Format
floatFormat Width
w
        | Bool
otherwise
        = CmmType -> Format
vecFormat (Int -> CmmType -> CmmType
cmmVec Int
l (CmmType -> CmmType) -> CmmType -> CmmType
forall a b. (a -> b) -> a -> b
$ Width -> CmmType
cmmFloat Width
w)
  (y_reg, y_code) <- getNonClobberedReg y
  (z_op, z_code) <- getNonClobberedOperand z
  x_code <- getAnyReg x
  x_tmp <- getNewRegNat rep
  let
     fma213 = Format
-> FMASign -> FMAPermutation -> Operand -> Reg -> Reg -> Instr
FMA3 Format
rep FMASign
signs FMAPermutation
FMA213

     code, code_direct, code_mov :: Reg -> InstrBlock
     -- Ideal: Compute the result directly into dst
     code_direct Reg
dst = Reg -> InstrBlock
x_code Reg
dst InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                       Operand -> Reg -> Reg -> Instr
fma213 Operand
z_op Reg
y_reg Reg
dst
     -- Fallback: Compute the result into a tmp reg and then move it.
     code_mov Reg
dst    = Reg -> InstrBlock
x_code Reg
x_tmp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                       Operand -> Reg -> Reg -> Instr
fma213 Operand
z_op Reg
y_reg Reg
x_tmp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
                       HasDebugCallStack => NCGConfig -> Format -> Reg -> Reg -> Instr
NCGConfig -> Format -> Reg -> Reg -> Instr
mkRegRegMoveInstr NCGConfig
config Format
rep Reg
x_tmp Reg
dst

     code Reg
dst =
        InstrBlock
y_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
        InstrBlock
z_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
        ( if Bool
arg_regs_conflict then Reg -> InstrBlock
code_mov Reg
dst else Reg -> InstrBlock
code_direct Reg
dst )

      where

        arg_regs_conflict :: Bool
arg_regs_conflict =
          Reg
y_reg Reg -> Reg -> Bool
forall a. Eq a => a -> a -> Bool
== Reg
dst Bool -> Bool -> Bool
||
          case Operand
z_op of
            OpReg Reg
z_reg -> Reg
z_reg Reg -> Reg -> Bool
forall a. Eq a => a -> a -> Bool
== Reg
dst
            OpAddr AddrMode
amode -> Reg
dst Reg -> [Reg] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` AddrMode -> [Reg]
addrModeRegs AddrMode
amode
            OpImm {} -> Bool
False

  -- NB: Computing the result into a desired register using Any can be tricky.
  -- So for now, we keep it simple. (See #24496).
  return (Any rep code)

-----------

trivialUCode :: Format -> (Operand -> Instr)
             -> CmmExpr -> NatM Register
trivialUCode :: Format -> (Operand -> Instr) -> CmmExpr -> NatM Register
trivialUCode Format
rep Operand -> Instr
instr CmmExpr
x = do
  x_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
x
  let
     code Reg
dst =
        Reg -> InstrBlock
x_code Reg
dst InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
        Operand -> Instr
instr (Reg -> Operand
OpReg Reg
dst)
  return (Any rep code)

-----------


trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr)
                  -> CmmExpr -> CmmExpr -> NatM Register
trivialFCode_sse2 :: Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
ty Format -> Operand -> Operand -> Instr
instr CmmExpr
x CmmExpr
y
    = Format
-> (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
genTrivialCode Format
format (Format -> Operand -> Operand -> Instr
instr Format
format) CmmExpr
x CmmExpr
y
    where format :: Format
format = Width -> Format
floatFormat Width
ty


--------------------------------------------------------------------------------
coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
coerceInt2FP Width
from Width
to CmmExpr
x =  NatM Register
coerce_sse2
 where

   coerce_sse2 :: NatM Register
coerce_sse2 = do
     (x_op, x_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getOperand CmmExpr
x  -- ToDo: could be a safe operand
     let
           opc  = case Width
to of Width
W32 -> Format -> Operand -> Reg -> Instr
CVTSI2SS; Width
W64 -> Format -> Operand -> Reg -> Instr
CVTSI2SD
                             Width
n -> String -> Format -> Operand -> Reg -> Instr
forall a. HasCallStack => String -> a
panic (String -> Format -> Operand -> Reg -> Instr)
-> String -> Format -> Operand -> Reg -> Instr
forall a b. (a -> b) -> a -> b
$ String
"coerceInt2FP.sse: unhandled width ("
                                         String -> String -> String
forall a. [a] -> [a] -> [a]
++ Width -> String
forall a. Show a => a -> String
show Width
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")"
           code Reg
dst = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` Format -> Operand -> Reg -> Instr
opc (Width -> Format
intFormat Width
from) Operand
x_op Reg
dst
     return (Any (floatFormat to) code)
        -- works even if the destination rep is <II32

--------------------------------------------------------------------------------
coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
coerceFP2Int Width
from Width
to CmmExpr
x =  NatM Register
coerceFP2Int_sse2
 where
   coerceFP2Int_sse2 :: NatM Register
coerceFP2Int_sse2 = do
     (x_op, x_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getOperand CmmExpr
x  -- ToDo: could be a safe operand
     let
           opc  = case Width
from of Width
W32 -> Format -> Operand -> Reg -> Instr
CVTTSS2SIQ; Width
W64 -> Format -> Operand -> Reg -> Instr
CVTTSD2SIQ;
                               Width
n -> String -> Format -> Operand -> Reg -> Instr
forall a. HasCallStack => String -> a
panic (String -> Format -> Operand -> Reg -> Instr)
-> String -> Format -> Operand -> Reg -> Instr
forall a b. (a -> b) -> a -> b
$ String
"coerceFP2Init.sse: unhandled width ("
                                           String -> String -> String
forall a. [a] -> [a] -> [a]
++ Width -> String
forall a. Show a => a -> String
show Width
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")"
           code Reg
dst = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` Format -> Operand -> Reg -> Instr
opc (Width -> Format
intFormat Width
to) Operand
x_op Reg
dst
     return (Any (intFormat to) code)
         -- works even if the destination rep is <II32


--------------------------------------------------------------------------------
coerceFP2FP :: Width -> CmmExpr -> NatM Register
coerceFP2FP :: Width -> CmmExpr -> NatM Register
coerceFP2FP Width
to CmmExpr
x = do
  (x_reg, x_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
x
  let
        opc  = case Width
to of Width
W32 -> Reg -> Reg -> Instr
CVTSD2SS; Width
W64 -> Reg -> Reg -> Instr
CVTSS2SD;
                                     Width
n -> String -> Reg -> Reg -> Instr
forall a. HasCallStack => String -> a
panic (String -> Reg -> Reg -> Instr) -> String -> Reg -> Reg -> Instr
forall a b. (a -> b) -> a -> b
$ String
"coerceFP2FP: unhandled width ("
                                                 String -> String -> String
forall a. [a] -> [a] -> [a]
++ Width -> String
forall a. Show a => a -> String
show Width
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")"
        code Reg
dst = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` Reg -> Reg -> Instr
opc Reg
x_reg Reg
dst
  return (Any ( floatFormat to) code)

--------------------------------------------------------------------------------

sse2NegCode :: Width -> CmmExpr -> NatM Register
sse2NegCode :: Width -> CmmExpr -> NatM Register
sse2NegCode Width
w CmmExpr
x = do
  let fmt :: Format
fmt = Width -> Format
floatFormat Width
w
  x_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
x
  -- This is how gcc does it, so it can't be that bad:
  let
    const = case Format
fmt of
      Format
FF32 -> Integer -> Width -> CmmLit
CmmInt Integer
0x80000000 Width
W32
      Format
FF64 -> Integer -> Width -> CmmLit
CmmInt Integer
0x8000000000000000 Width
W64
      x :: Format
x@Format
II8  -> Format -> CmmLit
forall {a} {b}. Show a => a -> b
wrongFmt Format
x
      x :: Format
x@Format
II16 -> Format -> CmmLit
forall {a} {b}. Show a => a -> b
wrongFmt Format
x
      x :: Format
x@Format
II32 -> Format -> CmmLit
forall {a} {b}. Show a => a -> b
wrongFmt Format
x
      x :: Format
x@Format
II64 -> Format -> CmmLit
forall {a} {b}. Show a => a -> b
wrongFmt Format
x
      x :: Format
x@(VecFormat {}) -> Format -> CmmLit
forall {a} {b}. Show a => a -> b
wrongFmt Format
x

      where
        wrongFmt :: a -> b
wrongFmt a
x = String -> b
forall a. HasCallStack => String -> a
panic (String -> b) -> String -> b
forall a b. (a -> b) -> a -> b
$ String
"sse2NegCode: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
x
  Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
  tmp <- getNewRegNat fmt
  let
    code Reg
dst = Reg -> InstrBlock
x_code Reg
dst InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
amode_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
        Format -> Operand -> Operand -> Instr
MOV Format
fmt (AddrMode -> Operand
OpAddr AddrMode
amode) (Reg -> Operand
OpReg Reg
tmp),
        Format -> Operand -> Operand -> Instr
XOR Format
fmt (Reg -> Operand
OpReg Reg
tmp) (Reg -> Operand
OpReg Reg
dst)
        ]
  --
  return (Any fmt code)

needLlvm :: MachOp -> NatM a
needLlvm :: forall a. MachOp -> NatM a
needLlvm MachOp
mop =
  String -> NatM a
forall a. HasCallStack => String -> a
sorry (String -> NatM a) -> String -> NatM a
forall a b. (a -> b) -> a -> b
$ [String] -> String
unlines [ String
"Unsupported vector instruction for the native code generator:"
                  , MachOp -> String
forall a. Show a => a -> String
show MachOp
mop
                  , String
"Please use -fllvm." ]

incorrectOperands :: NatM a
incorrectOperands :: forall a. NatM a
incorrectOperands = String -> NatM a
forall a. HasCallStack => String -> a
sorry String
"Incorrect number of operands"

invalidConversion :: Width -> Width -> NatM a
invalidConversion :: forall a. Width -> Width -> NatM a
invalidConversion Width
from Width
to =
  String -> NatM a
forall a. HasCallStack => String -> a
sorry (String -> NatM a) -> String -> NatM a
forall a b. (a -> b) -> a -> b
$ String
"Invalid conversion operation from " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Width -> String
forall a. Show a => a -> String
show Width
from String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" to " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Width -> String
forall a. Show a => a -> String
show Width
to

-- | This works on the invariant that all jumps in the given blocks are required.
--   Starting from there we try to make a few more jumps redundant by reordering
--   them.
--   We depend on the information in the CFG to do so so without a given CFG
--   we do nothing.
invertCondBranches :: Maybe CFG  -- ^ CFG if present
                   -> LabelMap a -- ^ Blocks with info tables
                   -> [NatBasicBlock Instr] -- ^ List of basic blocks
                   -> [NatBasicBlock Instr]
invertCondBranches :: forall a.
Maybe CFG
-> LabelMap a -> [NatBasicBlock Instr] -> [NatBasicBlock Instr]
invertCondBranches Maybe CFG
Nothing LabelMap a
_       [NatBasicBlock Instr]
bs = [NatBasicBlock Instr]
bs
invertCondBranches (Just CFG
cfg) LabelMap a
keep [NatBasicBlock Instr]
bs =
    [NatBasicBlock Instr] -> [NatBasicBlock Instr]
invert [NatBasicBlock Instr]
bs
  where
    invert :: [NatBasicBlock Instr] -> [NatBasicBlock Instr]
    invert :: [NatBasicBlock Instr] -> [NatBasicBlock Instr]
invert (BasicBlock Label
lbl1 [Instr]
ins:b2 :: NatBasicBlock Instr
b2@(BasicBlock Label
lbl2 [Instr]
_):[NatBasicBlock Instr]
bs)
      | --pprTrace "Block" (ppr lbl1) True,
        Just (Instr
jmp1,Instr
jmp2) <- [Instr] -> Maybe (Instr, Instr)
forall a. [a] -> Maybe (a, a)
last2 [Instr]
ins
      , JXX Cond
cond1 Label
target1 <- Instr
jmp1
      , Label
target1 Label -> Label -> Bool
forall a. Eq a => a -> a -> Bool
== Label
lbl2
      --, pprTrace "CutChance" (ppr b1) True
      , JXX Cond
ALWAYS Label
target2 <- Instr
jmp2
      -- We have enough information to check if we can perform the inversion
      -- TODO: We could also check for the last asm instruction which sets
      -- status flags instead. Which I suspect is worse in terms of compiler
      -- performance, but might be applicable to more cases
      , Just EdgeInfo
edgeInfo1 <- Label -> Label -> CFG -> Maybe EdgeInfo
getEdgeInfo Label
lbl1 Label
target1 CFG
cfg
      , Just EdgeInfo
edgeInfo2 <- Label -> Label -> CFG -> Maybe EdgeInfo
getEdgeInfo Label
lbl1 Label
target2 CFG
cfg
      -- Both jumps come from the same cmm statement
      , EdgeInfo -> TransitionSource
transitionSource EdgeInfo
edgeInfo1 TransitionSource -> TransitionSource -> Bool
forall a. Eq a => a -> a -> Bool
== EdgeInfo -> TransitionSource
transitionSource EdgeInfo
edgeInfo2
      , CmmSource {trans_cmmNode :: TransitionSource -> CmmNode O C
trans_cmmNode = CmmNode O C
cmmCondBranch} <- EdgeInfo -> TransitionSource
transitionSource EdgeInfo
edgeInfo1

      --Int comparisons are invertable
      , CmmCondBranch (CmmMachOp MachOp
op [CmmExpr]
_args) Label
_ Label
_ Maybe Bool
_ <- CmmNode O C
cmmCondBranch
      , Just Width
_ <- MachOp -> Maybe Width
maybeIntComparison MachOp
op
      , Just Cond
invCond <- Cond -> Maybe Cond
maybeInvertCond Cond
cond1

      --Swap the last two jumps, invert the conditional jumps condition.
      = let jumps :: [Instr]
jumps =
              case () of
                -- We are free the eliminate the jmp. So we do so.
                ()
_ | Bool -> Bool
not (Label -> LabelMap a -> Bool
forall a. Label -> LabelMap a -> Bool
mapMember Label
target1 LabelMap a
keep)
                    -> [Cond -> Label -> Instr
JXX Cond
invCond Label
target2]
                -- If the conditional target is unlikely we put the other
                -- target at the front.
                  | EdgeInfo -> EdgeWeight
edgeWeight EdgeInfo
edgeInfo2 EdgeWeight -> EdgeWeight -> Bool
forall a. Ord a => a -> a -> Bool
> EdgeInfo -> EdgeWeight
edgeWeight EdgeInfo
edgeInfo1
                    -> [Cond -> Label -> Instr
JXX Cond
invCond Label
target2, Cond -> Label -> Instr
JXX Cond
ALWAYS Label
target1]
                -- Keep things as-is otherwise
                  | Bool
otherwise
                    -> [Instr
jmp1, Instr
jmp2]
        in --pprTrace "Cutable" (ppr [jmp1,jmp2] <+> text "=>" <+> ppr jumps) $
           (Label -> [Instr] -> NatBasicBlock Instr
forall i. Label -> [i] -> GenBasicBlock i
BasicBlock Label
lbl1
            (Int -> [Instr] -> [Instr]
forall a. Int -> [a] -> [a]
dropTail Int
2 [Instr]
ins [Instr] -> [Instr] -> [Instr]
forall a. [a] -> [a] -> [a]
++ [Instr]
jumps))
            NatBasicBlock Instr
-> [NatBasicBlock Instr] -> [NatBasicBlock Instr]
forall a. a -> [a] -> [a]
: [NatBasicBlock Instr] -> [NatBasicBlock Instr]
invert (NatBasicBlock Instr
b2NatBasicBlock Instr
-> [NatBasicBlock Instr] -> [NatBasicBlock Instr]
forall a. a -> [a] -> [a]
:[NatBasicBlock Instr]
bs)
    invert (NatBasicBlock Instr
b:[NatBasicBlock Instr]
bs) = NatBasicBlock Instr
b NatBasicBlock Instr
-> [NatBasicBlock Instr] -> [NatBasicBlock Instr]
forall a. a -> [a] -> [a]
: [NatBasicBlock Instr] -> [NatBasicBlock Instr]
invert [NatBasicBlock Instr]
bs
    invert [] = []

genAtomicRMW
  :: BlockId
  -> Width
  -> AtomicMachOp
  -> LocalReg
  -> CmmExpr
  -> CmmExpr
  -> NatM (InstrBlock, Maybe BlockId)
genAtomicRMW :: Label
-> Width
-> AtomicMachOp
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM (InstrBlock, Maybe Label)
genAtomicRMW Label
bid Width
width AtomicMachOp
amop LocalReg
dst CmmExpr
addr CmmExpr
n = do
    Amode amode addr_code <-
        if AtomicMachOp
amop AtomicMachOp -> [AtomicMachOp] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [AtomicMachOp
AMO_Add, AtomicMachOp
AMO_Sub]
        then CmmExpr -> NatM Amode
getAmode CmmExpr
addr
        else CmmExpr -> NatM Amode
getSimpleAmode CmmExpr
addr  -- See genForeignCall for MO_Cmpxchg
    arg <- getNewRegNat format
    arg_code <- getAnyReg n
    platform <- ncgPlatform <$> getConfig

    let dst_r    = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform  (LocalReg -> CmmReg
CmmLocal LocalReg
dst)
    (code, lbl) <- op_code dst_r arg amode
    return (addr_code `appOL` arg_code arg `appOL` code, Just lbl)
  where
    -- Code for the operation
    op_code :: Reg       -- Destination reg
            -> Reg       -- Register containing argument
            -> AddrMode  -- Address of location to mutate
            -> NatM (OrdList Instr,BlockId) -- TODO: Return Maybe BlockId
    op_code :: Reg -> Reg -> AddrMode -> NatM (InstrBlock, Label)
op_code Reg
dst_r Reg
arg AddrMode
amode = do
        case AtomicMachOp
amop of
          -- In the common case where dst_r is a virtual register the
          -- final move should go away, because it's the last use of arg
          -- and the first use of dst_r.
          AtomicMachOp
AMO_Add  -> (InstrBlock, Label) -> NatM (InstrBlock, Label)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return ((InstrBlock, Label) -> NatM (InstrBlock, Label))
-> (InstrBlock, Label) -> NatM (InstrBlock, Label)
forall a b. (a -> b) -> a -> b
$ ([Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Instr -> Instr
LOCK (Format -> Operand -> Operand -> Instr
XADD Format
format (Reg -> Operand
OpReg Reg
arg) (AddrMode -> Operand
OpAddr AddrMode
amode))
                                     , Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
arg) (Reg -> Operand
OpReg Reg
dst_r)
                                     ], Label
bid)
          AtomicMachOp
AMO_Sub  -> (InstrBlock, Label) -> NatM (InstrBlock, Label)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return ((InstrBlock, Label) -> NatM (InstrBlock, Label))
-> (InstrBlock, Label) -> NatM (InstrBlock, Label)
forall a b. (a -> b) -> a -> b
$ ([Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Instr
NEGI Format
format (Reg -> Operand
OpReg Reg
arg)
                                     , Instr -> Instr
LOCK (Format -> Operand -> Operand -> Instr
XADD Format
format (Reg -> Operand
OpReg Reg
arg) (AddrMode -> Operand
OpAddr AddrMode
amode))
                                     , Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
arg) (Reg -> Operand
OpReg Reg
dst_r)
                                     ], Label
bid)
          -- In these cases we need a new block id, and have to return it so
          -- that later instruction selection can reference it.
          AtomicMachOp
AMO_And  -> (Operand -> Operand -> InstrBlock) -> NatM (InstrBlock, Label)
cmpxchg_code (\ Operand
src Operand
dst -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Format -> Operand -> Operand -> Instr
AND Format
format Operand
src Operand
dst)
          AtomicMachOp
AMO_Nand -> (Operand -> Operand -> InstrBlock) -> NatM (InstrBlock, Label)
cmpxchg_code (\ Operand
src Operand
dst -> [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
AND Format
format Operand
src Operand
dst
                                                      , Format -> Operand -> Instr
NOT Format
format Operand
dst
                                                      ])
          AtomicMachOp
AMO_Or   -> (Operand -> Operand -> InstrBlock) -> NatM (InstrBlock, Label)
cmpxchg_code (\ Operand
src Operand
dst -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Format -> Operand -> Operand -> Instr
OR Format
format Operand
src Operand
dst)
          AtomicMachOp
AMO_Xor  -> (Operand -> Operand -> InstrBlock) -> NatM (InstrBlock, Label)
cmpxchg_code (\ Operand
src Operand
dst -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Format -> Operand -> Operand -> Instr
XOR Format
format Operand
src Operand
dst)
      where
        -- Simulate operation that lacks a dedicated instruction using
        -- cmpxchg.
        cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
                     -> NatM (OrdList Instr, BlockId)
        cmpxchg_code :: (Operand -> Operand -> InstrBlock) -> NatM (InstrBlock, Label)
cmpxchg_code Operand -> Operand -> InstrBlock
instrs = do
            lbl1 <- NatM Label
getBlockIdNat
            lbl2 <- getBlockIdNat
            tmp <- getNewRegNat format

            --Record inserted blocks
            --  We turn A -> B into A -> A' -> A'' -> B
            --  with a self loop on A'.
            addImmediateSuccessorNat bid lbl1
            addImmediateSuccessorNat lbl1 lbl2
            updateCfgNat (addWeightEdge lbl1 lbl1 0)

            return $ (toOL
                [ MOV format (OpAddr amode) (OpReg eax)
                , JXX ALWAYS lbl1
                , NEWBLOCK lbl1
                  -- Keep old value so we can return it:
                , MOV format (OpReg eax) (OpReg dst_r)
                , MOV format (OpReg eax) (OpReg tmp)
                ]
                `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
                [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode))
                , JXX NE lbl1
                -- See Note [Introducing cfg edges inside basic blocks]
                -- why this basic block is required.
                , JXX ALWAYS lbl2
                , NEWBLOCK lbl2
                ],
                lbl2)
    format :: Format
format = Width -> Format
intFormat Width
width

-- | Count trailing zeroes
genCtz :: BlockId -> Width -> LocalReg -> CmmExpr -> NatM (InstrBlock, Maybe BlockId)
genCtz :: Label
-> Width -> LocalReg -> CmmExpr -> NatM (InstrBlock, Maybe Label)
genCtz Label
bid Width
width LocalReg
dst CmmExpr
src = do
  is32Bit <- NatM Bool
is32BitPlatform
  if is32Bit && width == W64
    then genCtz64_32 bid dst src
    else (,Nothing) <$> genCtzGeneric width dst src

-- | Count trailing zeroes
--
-- 64-bit width on 32-bit architecture
genCtz64_32
  :: BlockId
  -> LocalReg
  -> CmmExpr
  -> NatM (InstrBlock, Maybe BlockId)
genCtz64_32 :: Label -> LocalReg -> CmmExpr -> NatM (InstrBlock, Maybe Label)
genCtz64_32 Label
bid LocalReg
dst CmmExpr
src = do
  RegCode64 vcode rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
src
  let dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
dst
  lbl1 <- getBlockIdNat
  lbl2 <- getBlockIdNat
  tmp_r <- getNewRegNat II64

  -- New CFG Edges:
  --  bid -> lbl2
  --  bid -> lbl1 -> lbl2
  --  We also changes edges originating at bid to start at lbl2 instead.
  weights <- getCfgWeights
  updateCfgNat (addWeightEdge bid lbl1 110 .
                addWeightEdge lbl1 lbl2 110 .
                addImmediateSuccessor weights bid lbl2)

  -- The following instruction sequence corresponds to the pseudo-code
  --
  --  if (src) {
  --    dst = src.lo32 ? BSF(src.lo32) : (BSF(src.hi32) + 32);
  --  } else {
  --    dst = 64;
  --  }
  let instrs = InstrBlock
vcode InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL
           ([ Format -> Operand -> Operand -> Instr
MOV      Format
II32 (Reg -> Operand
OpReg Reg
rhi)         (Reg -> Operand
OpReg Reg
tmp_r)
            , Format -> Operand -> Operand -> Instr
OR       Format
II32 (Reg -> Operand
OpReg Reg
rlo)         (Reg -> Operand
OpReg Reg
tmp_r)
            , Format -> Operand -> Operand -> Instr
MOV      Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
64)) (Reg -> Operand
OpReg Reg
dst_r)
            , Cond -> Label -> Instr
JXX Cond
EQQ    Label
lbl2
            , Cond -> Label -> Instr
JXX Cond
ALWAYS Label
lbl1

            , Label -> Instr
NEWBLOCK   Label
lbl1
            , Format -> Operand -> Reg -> Instr
BSF     Format
II32 (Reg -> Operand
OpReg Reg
rhi)         Reg
dst_r
            , Format -> Operand -> Operand -> Instr
ADD     Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
32)) (Reg -> Operand
OpReg Reg
dst_r)
            , Format -> Operand -> Reg -> Instr
BSF     Format
II32 (Reg -> Operand
OpReg Reg
rlo)         Reg
tmp_r
            , Cond -> Format -> Operand -> Reg -> Instr
CMOV Cond
NE Format
II32 (Reg -> Operand
OpReg Reg
tmp_r)       Reg
dst_r
            , Cond -> Label -> Instr
JXX Cond
ALWAYS Label
lbl2

            , Label -> Instr
NEWBLOCK   Label
lbl2
            ])
  return (instrs, Just lbl2)

-- | Count trailing zeroes
--
-- Generic case (width <= word size)
genCtzGeneric :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genCtzGeneric :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genCtzGeneric Width
width LocalReg
dst CmmExpr
src = do
  code_src <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
src
  config <- getConfig
  let bw = Width -> Int
widthInBits Width
width
  let dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
dst
  if ncgBmiVersion config >= Just BMI2
  then do
      src_r <- getNewRegNat (intFormat width)
      let instrs = InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
appOL (Reg -> InstrBlock
code_src Reg
src_r) (InstrBlock -> InstrBlock) -> InstrBlock -> InstrBlock
forall a b. (a -> b) -> a -> b
$ case Width
width of
              Width
W8 -> [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL
                  [ Format -> Operand -> Operand -> Instr
OR    Format
II32 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
0xFFFFFF00)) (Reg -> Operand
OpReg Reg
src_r)
                  , Format -> Operand -> Reg -> Instr
TZCNT Format
II32 (Reg -> Operand
OpReg Reg
src_r) Reg
dst_r
                  ]
              Width
W16 -> [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL
                  [ Format -> Operand -> Reg -> Instr
TZCNT  Format
II16 (Reg -> Operand
OpReg Reg
src_r) Reg
dst_r
                  , Format -> Operand -> Operand -> Instr
MOVZxL Format
II16 (Reg -> Operand
OpReg Reg
dst_r) (Reg -> Operand
OpReg Reg
dst_r)
                  ]
              Width
_ -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Format -> Operand -> Reg -> Instr
TZCNT (Width -> Format
intFormat Width
width) (Reg -> Operand
OpReg Reg
src_r) Reg
dst_r
      return instrs
  else do
      -- The following insn sequence makes sure 'ctz 0' has a defined value.
      -- starting with Haswell, one could use the TZCNT insn instead.
      let format = if Width
width Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
W8 then Format
II16 else Width -> Format
intFormat Width
width
      src_r <- getNewRegNat format
      tmp_r <- getNewRegNat format
      let instrs = Reg -> InstrBlock
code_src Reg
src_r InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL
               ([ Format -> Operand -> Operand -> Instr
MOVZxL  Format
II8    (Reg -> Operand
OpReg Reg
src_r)       (Reg -> Operand
OpReg Reg
src_r) | Width
width Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
W8 ] [Instr] -> [Instr] -> [Instr]
forall a. [a] -> [a] -> [a]
++
                [ Format -> Operand -> Reg -> Instr
BSF     Format
format (Reg -> Operand
OpReg Reg
src_r)       Reg
tmp_r
                , Format -> Operand -> Operand -> Instr
MOV     Format
II32   (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
bw)) (Reg -> Operand
OpReg Reg
dst_r)
                , Cond -> Format -> Operand -> Reg -> Instr
CMOV Cond
NE Format
format (Reg -> Operand
OpReg Reg
tmp_r)       Reg
dst_r
                ]) -- NB: We don't need to zero-extend the result for the
                   -- W8/W16 cases because the 'MOV' insn already
                   -- took care of implicitly clearing the upper bits
      return instrs



-- | Copy memory
--
-- Unroll memcpy calls if the number of bytes to copy isn't too large (cf
-- ncgInlineThresholdMemcpy).  Otherwise, call C's memcpy.
genMemCpy
  :: BlockId
  -> Int
  -> CmmExpr
  -> CmmExpr
  -> CmmExpr
  -> NatM InstrBlock
genMemCpy :: Label -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
genMemCpy Label
bid Int
align CmmExpr
dst CmmExpr
src CmmExpr
arg_n = do

  let libc_memcpy :: NatM InstrBlock
libc_memcpy = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"memcpy") [] [CmmExpr
dst,CmmExpr
src,CmmExpr
arg_n]

  case CmmExpr
arg_n of
    CmmLit (CmmInt Integer
n Width
_) -> do
      -- try to inline it
      mcode <- Int -> CmmExpr -> CmmExpr -> Integer -> NatM (Maybe InstrBlock)
genMemCpyInlineMaybe Int
align CmmExpr
dst CmmExpr
src Integer
n
      -- if it didn't inline, call the C function
      case mcode of
        Maybe InstrBlock
Nothing -> NatM InstrBlock
libc_memcpy
        Just InstrBlock
c  -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (f :: * -> *) a. Applicative f => a -> f a
pure InstrBlock
c

    -- not a literal size argument: call the C function
    CmmExpr
_ -> NatM InstrBlock
libc_memcpy



genMemCpyInlineMaybe
  :: Int
  -> CmmExpr
  -> CmmExpr
  -> Integer
  -> NatM (Maybe InstrBlock)
genMemCpyInlineMaybe :: Int -> CmmExpr -> CmmExpr -> Integer -> NatM (Maybe InstrBlock)
genMemCpyInlineMaybe Int
align CmmExpr
dst CmmExpr
src Integer
n = do
  config <- NatM NCGConfig
getConfig
  let
    platform     = NCGConfig -> Platform
ncgPlatform NCGConfig
config
    maxAlignment = Platform -> Alignment
wordAlignment Platform
platform
                   -- only machine word wide MOVs are supported
    effectiveAlignment = Alignment -> Alignment -> Alignment
forall a. Ord a => a -> a -> a
min (Int -> Alignment
alignmentOf Int
align) Alignment
maxAlignment
    format = Width -> Format
intFormat (Width -> Format) -> (Int -> Width) -> Int -> Format
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Width
widthFromBytes (Int -> Format) -> Int -> Format
forall a b. (a -> b) -> a -> b
$ Alignment -> Int
alignmentBytes Alignment
effectiveAlignment


  -- The size of each move, in bytes.
  let sizeBytes :: Integer
      sizeBytes = Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Format -> Int
formatInBytes Format
format)

  -- The number of instructions we will generate (approx). We need 2
  -- instructions per move.
  let insns = Integer
2 Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* ((Integer
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
sizeBytes Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
1) Integer -> Integer -> Integer
forall a. Integral a => a -> a -> a
`div` Integer
sizeBytes)

      go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
      go Reg
dst Reg
src Reg
tmp Integer
i
          | Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
sizeBytes =
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
format (AddrMode -> Operand
OpAddr AddrMode
src_addr) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
tmp) (AddrMode -> Operand
OpAddr AddrMode
dst_addr)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
              Reg -> Reg -> Reg -> Integer -> InstrBlock
go Reg
dst Reg
src Reg
tmp (Integer
i Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
sizeBytes)
          -- Deal with remaining bytes.
          | Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
4 =  -- Will never happen on 32-bit
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II32 (AddrMode -> Operand
OpAddr AddrMode
src_addr) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
tmp) (AddrMode -> Operand
OpAddr AddrMode
dst_addr)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
              Reg -> Reg -> Reg -> Integer -> InstrBlock
go Reg
dst Reg
src Reg
tmp (Integer
i Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
4)
          | Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
2 =
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOVZxL Format
II16 (AddrMode -> Operand
OpAddr AddrMode
src_addr) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV    Format
II16  (Reg -> Operand
OpReg Reg
tmp) (AddrMode -> Operand
OpAddr AddrMode
dst_addr)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
              Reg -> Reg -> Reg -> Integer -> InstrBlock
go Reg
dst Reg
src Reg
tmp (Integer
i Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
2)
          | Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
1 =
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (AddrMode -> Operand
OpAddr AddrMode
src_addr) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV    Format
II8 (Reg -> Operand
OpReg Reg
tmp) (AddrMode -> Operand
OpAddr AddrMode
dst_addr)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
              Reg -> Reg -> Reg -> Integer -> InstrBlock
go Reg
dst Reg
src Reg
tmp (Integer
i Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
1)
          | Bool
otherwise = InstrBlock
forall a. OrdList a
nilOL
        where
          src_addr :: AddrMode
src_addr = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
src) EAIndex
EAIndexNone
                       (Integer -> Imm
ImmInteger (Integer
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
i))

          dst_addr :: AddrMode
dst_addr = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
dst) EAIndex
EAIndexNone
                       (Integer -> Imm
ImmInteger (Integer
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
i))

  if insns > fromIntegral (ncgInlineThresholdMemcpy config)
    then pure Nothing
    else do
      code_dst <- getAnyReg dst
      dst_r <- getNewRegNat format
      code_src <- getAnyReg src
      src_r <- getNewRegNat format
      tmp_r <- getNewRegNat format
      pure $ Just $ code_dst dst_r `appOL` code_src src_r `appOL`
                      go dst_r src_r tmp_r (fromInteger n)

-- | Set memory to the given byte
--
-- Unroll memset calls if the number of bytes to copy isn't too large (cf
-- ncgInlineThresholdMemset).  Otherwise, call C's memset.
genMemSet
  :: BlockId
  -> Int
  -> CmmExpr
  -> CmmExpr
  -> CmmExpr
  -> NatM InstrBlock
genMemSet :: Label -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
genMemSet Label
bid Int
align CmmExpr
dst CmmExpr
arg_c CmmExpr
arg_n = do

  let libc_memset :: NatM InstrBlock
libc_memset = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"memset") [] [CmmExpr
dst,CmmExpr
arg_c,CmmExpr
arg_n]

  case (CmmExpr
arg_c,CmmExpr
arg_n) of
    (CmmLit (CmmInt Integer
c Width
_), CmmLit (CmmInt Integer
n Width
_)) -> do
      -- try to inline it
      mcode <- Int -> CmmExpr -> Integer -> Integer -> NatM (Maybe InstrBlock)
genMemSetInlineMaybe Int
align CmmExpr
dst Integer
c Integer
n
      -- if it didn't inline, call the C function
      case mcode of
        Maybe InstrBlock
Nothing -> NatM InstrBlock
libc_memset
        Just InstrBlock
c  -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (f :: * -> *) a. Applicative f => a -> f a
pure InstrBlock
c

    -- not literal size arguments: call the C function
    (CmmExpr, CmmExpr)
_ -> NatM InstrBlock
libc_memset

genMemSetInlineMaybe
  :: Int
  -> CmmExpr
  -> Integer
  -> Integer
  -> NatM (Maybe InstrBlock)
genMemSetInlineMaybe :: Int -> CmmExpr -> Integer -> Integer -> NatM (Maybe InstrBlock)
genMemSetInlineMaybe Int
align CmmExpr
dst Integer
c Integer
n = do
  config <- NatM NCGConfig
getConfig
  let
    platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config
    maxAlignment = Platform -> Alignment
wordAlignment Platform
platform -- only machine word wide MOVs are supported
    effectiveAlignment = Alignment -> Alignment -> Alignment
forall a. Ord a => a -> a -> a
min (Int -> Alignment
alignmentOf Int
align) Alignment
maxAlignment
    format = Width -> Format
intFormat (Width -> Format) -> (Int -> Width) -> Int -> Format
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Width
widthFromBytes (Int -> Format) -> Int -> Format
forall a b. (a -> b) -> a -> b
$ Alignment -> Int
alignmentBytes Alignment
effectiveAlignment
    c2 = Integer
c Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
8 Integer -> Integer -> Integer
forall a. Bits a => a -> a -> a
.|. Integer
c
    c4 = Integer
c2 Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
16 Integer -> Integer -> Integer
forall a. Bits a => a -> a -> a
.|. Integer
c2
    c8 = Integer
c4 Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
32 Integer -> Integer -> Integer
forall a. Bits a => a -> a -> a
.|. Integer
c4

    -- The number of instructions we will generate (approx). We need 1
    -- instructions per move.
    insns = (Integer
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
sizeBytes Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
1) Integer -> Integer -> Integer
forall a. Integral a => a -> a -> a
`div` Integer
sizeBytes

    -- The size of each move, in bytes.
    sizeBytes :: Integer
    sizeBytes = Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Format -> Int
formatInBytes Format
format)

    -- Depending on size returns the widest MOV instruction and its
    -- width.
    gen4 :: AddrMode -> Integer -> (InstrBlock, Integer)
    gen4 AddrMode
addr Integer
size
        | Integer
size Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
4 =
            (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
c4)) (AddrMode -> Operand
OpAddr AddrMode
addr)), Integer
4)
        | Integer
size Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
2 =
            (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II16 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
c2)) (AddrMode -> Operand
OpAddr AddrMode
addr)), Integer
2)
        | Integer
size Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
1 =
            (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II8 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
c)) (AddrMode -> Operand
OpAddr AddrMode
addr)), Integer
1)
        | Bool
otherwise = (InstrBlock
forall a. OrdList a
nilOL, Integer
0)

    -- Generates a 64-bit wide MOV instruction from REG to MEM.
    gen8 :: AddrMode -> Reg -> InstrBlock
    gen8 AddrMode
addr Reg
reg8byte =
      Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
reg8byte) (AddrMode -> Operand
OpAddr AddrMode
addr))

    -- Unrolls memset when the widest MOV is <= 4 bytes.
    go4 :: Reg -> Integer -> InstrBlock
    go4 Reg
dst Integer
left =
      if Integer
left Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
<= Integer
0 then InstrBlock
forall a. OrdList a
nilOL
      else InstrBlock
curMov InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Reg -> Integer -> InstrBlock
go4 Reg
dst (Integer
left Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
curWidth)
      where
        possibleWidth :: Integer
possibleWidth = [Integer] -> Integer
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum [Integer
left, Integer
sizeBytes]
        dst_addr :: AddrMode
dst_addr = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
dst) EAIndex
EAIndexNone (Integer -> Imm
ImmInteger (Integer
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
left))
        (InstrBlock
curMov, Integer
curWidth) = AddrMode -> Integer -> (InstrBlock, Integer)
gen4 AddrMode
dst_addr Integer
possibleWidth

    -- Unrolls memset when the widest MOV is 8 bytes (thus another Reg
    -- argument). Falls back to go4 when all 8 byte moves are
    -- exhausted.
    go8 :: Reg -> Reg -> Integer -> InstrBlock
    go8 Reg
dst Reg
reg8byte Integer
left =
      if Integer
possibleWidth Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
8 then
        let curMov :: InstrBlock
curMov = AddrMode -> Reg -> InstrBlock
gen8 AddrMode
dst_addr Reg
reg8byte
        in  InstrBlock
curMov InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Reg -> Reg -> Integer -> InstrBlock
go8 Reg
dst Reg
reg8byte (Integer
left Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
8)
      else Reg -> Integer -> InstrBlock
go4 Reg
dst Integer
left
      where
        possibleWidth :: Integer
possibleWidth = [Integer] -> Integer
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum [Integer
left, Integer
sizeBytes]
        dst_addr :: AddrMode
dst_addr = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
dst) EAIndex
EAIndexNone (Integer -> Imm
ImmInteger (Integer
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
left))

  if fromInteger insns > ncgInlineThresholdMemset config
    then pure Nothing
    else do
        code_dst <- getAnyReg dst
        dst_r <- getNewRegNat format
        if format == II64 && n >= 8
          then do
            code_imm8byte <- getAnyReg (CmmLit (CmmInt c8 W64))
            imm8byte_r <- getNewRegNat II64
            return $ Just $ code_dst dst_r `appOL`
                              code_imm8byte imm8byte_r `appOL`
                              go8 dst_r imm8byte_r (fromInteger n)
          else
            return $ Just $ code_dst dst_r `appOL`
                              go4 dst_r (fromInteger n)


genMemMove :: BlockId -> p -> CmmActual -> CmmActual -> CmmActual -> NatM InstrBlock
genMemMove :: forall p.
Label -> p -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
genMemMove Label
bid p
_align CmmExpr
dst CmmExpr
src CmmExpr
n = do
  -- TODO: generate inline assembly when under a given threshold (similarly to
  -- memcpy and memset)
  Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"memmove") [] [CmmExpr
dst,CmmExpr
src,CmmExpr
n]

genMemCmp :: BlockId -> p -> CmmFormal -> CmmActual -> CmmActual -> CmmActual -> NatM InstrBlock
genMemCmp :: forall p.
Label
-> p
-> LocalReg
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genMemCmp Label
bid p
_align LocalReg
res CmmExpr
dst CmmExpr
src CmmExpr
n = do
  -- TODO: generate inline assembly when under a given threshold (similarly to
  -- memcpy and memset)
  Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"memcmp") [LocalReg
res] [CmmExpr
dst,CmmExpr
src,CmmExpr
n]

genPrefetchData :: Int -> CmmExpr -> NatM (OrdList Instr)
genPrefetchData :: Int -> CmmExpr -> NatM InstrBlock
genPrefetchData Int
n CmmExpr
src = do
  is32Bit <- NatM Bool
is32BitPlatform
  let
    format = Bool -> Format
archWordFormat Bool
is32Bit
    -- need to know what register width for pointers!
    genPrefetch CmmExpr
inRegSrc Operand -> Instr
prefetchCTor = do
      code_src <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
inRegSrc
      src_r <- getNewRegNat format
      return $ code_src src_r `appOL`
        (unitOL (prefetchCTor  (OpAddr
                    ((AddrBaseIndex (EABaseReg src_r )   EAIndexNone (ImmInt 0))))  ))
        -- prefetch always takes an address

  -- the c / llvm prefetch convention is 0, 1, 2, and 3
  -- the x86 corresponding names are : NTA, 2 , 1, and 0
  case n of
      Int
0 -> CmmExpr -> (Operand -> Instr) -> NatM InstrBlock
genPrefetch CmmExpr
src ((Operand -> Instr) -> NatM InstrBlock)
-> (Operand -> Instr) -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ PrefetchVariant -> Format -> Operand -> Instr
PREFETCH PrefetchVariant
NTA  Format
format
      Int
1 -> CmmExpr -> (Operand -> Instr) -> NatM InstrBlock
genPrefetch CmmExpr
src ((Operand -> Instr) -> NatM InstrBlock)
-> (Operand -> Instr) -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ PrefetchVariant -> Format -> Operand -> Instr
PREFETCH PrefetchVariant
Lvl2 Format
format
      Int
2 -> CmmExpr -> (Operand -> Instr) -> NatM InstrBlock
genPrefetch CmmExpr
src ((Operand -> Instr) -> NatM InstrBlock)
-> (Operand -> Instr) -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ PrefetchVariant -> Format -> Operand -> Instr
PREFETCH PrefetchVariant
Lvl1 Format
format
      Int
3 -> CmmExpr -> (Operand -> Instr) -> NatM InstrBlock
genPrefetch CmmExpr
src ((Operand -> Instr) -> NatM InstrBlock)
-> (Operand -> Instr) -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ PrefetchVariant -> Format -> Operand -> Instr
PREFETCH PrefetchVariant
Lvl0 Format
format
      Int
l -> String -> SDoc -> NatM InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"genPrefetchData: unexpected prefetch level" (Int -> SDoc
forall a. Outputable a => a -> SDoc
ppr Int
l)

genByteSwap :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genByteSwap :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genByteSwap Width
width LocalReg
dst CmmExpr
src = do
  is32Bit <- NatM Bool
is32BitPlatform
  let format = Width -> Format
intFormat Width
width
  case width of
      Width
W64 | Bool
is32Bit -> do
        let Reg64 Reg
dst_hi Reg
dst_lo = HasDebugCallStack => LocalReg -> Reg64
LocalReg -> Reg64
localReg64 LocalReg
dst
        RegCode64 vcode rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
src
        return $ vcode `appOL`
                 toOL [ MOV II32 (OpReg rlo) (OpReg dst_hi),
                        MOV II32 (OpReg rhi) (OpReg dst_lo),
                        BSWAP II32 dst_hi,
                        BSWAP II32 dst_lo ]
      Width
W16 -> do
        let dst_r :: Reg
dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
dst
        code_src <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
src
        return $ code_src dst_r `appOL`
                 unitOL (BSWAP II32 dst_r) `appOL`
                 unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
      Width
_   -> do
        let dst_r :: Reg
dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
dst
        code_src <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
src
        return $ code_src dst_r `appOL` unitOL (BSWAP format dst_r)

genBitRev :: BlockId -> Width -> CmmFormal -> CmmActual -> NatM InstrBlock
genBitRev :: Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genBitRev Label
bid Width
width LocalReg
dst CmmExpr
src = do
  -- Here the C implementation (hs_bitrevN) is used as there is no x86
  -- instruction to reverse a word's bit order.
  Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (Width -> FastString
bRevLabel Width
width) [LocalReg
dst] [CmmExpr
src]

genPopCnt :: BlockId -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genPopCnt :: Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genPopCnt Label
bid Width
width LocalReg
dst CmmExpr
src = do
  config <- NatM NCGConfig
getConfig
  let
    platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config
    format = Width -> Format
intFormat Width
width

  sse4_2Enabled >>= \case

    Bool
True -> do
      code_src <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
src
      src_r <- getNewRegNat format
      let dst_r = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform  (LocalReg -> CmmReg
CmmLocal LocalReg
dst)
      return $ code_src src_r `appOL`
          (if width == W8 then
               -- The POPCNT instruction doesn't take a r/m8
               unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
               unitOL (POPCNT II16 (OpReg src_r) dst_r)
           else
               unitOL (POPCNT format (OpReg src_r) dst_r)) `appOL`
          (if width == W8 || width == W16 then
               -- We used a 16-bit destination register above,
               -- so zero-extend
               unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
           else nilOL)

    Bool
False ->
      -- generate C call to hs_popcntN in ghc-prim
      -- TODO: we could directly generate the assembly to index popcount_tab
      -- here instead of doing it by calling a C function
      Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (Width -> FastString
popCntLabel Width
width) [LocalReg
dst] [CmmExpr
src]


genPdep :: BlockId -> Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genPdep :: Label -> Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genPdep Label
bid Width
width LocalReg
dst CmmExpr
src CmmExpr
mask = do
  config <- NatM NCGConfig
getConfig
  let
    platform = NCGConfig -> Platform
ncgPlatform NCGConfig
config
    format = Width -> Format
intFormat Width
width

  if ncgBmiVersion config >= Just BMI2
    then do
      code_src  <- getAnyReg src
      code_mask <- getAnyReg mask
      src_r     <- getNewRegNat format
      mask_r    <- getNewRegNat format
      let dst_r = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform  (LocalReg -> CmmReg
CmmLocal LocalReg
dst)
      return $ code_src src_r `appOL` code_mask mask_r `appOL`
          -- PDEP only supports > 32 bit args
          ( if width == W8 || width == W16 then
              toOL
                [ MOVZxL format (OpReg src_r ) (OpReg src_r )
                , MOVZxL format (OpReg mask_r) (OpReg mask_r)
                , PDEP   II32 (OpReg mask_r) (OpReg src_r ) dst_r
                , MOVZxL format (OpReg dst_r) (OpReg dst_r) -- Truncate to op width
                ]
            else
              unitOL (PDEP format (OpReg mask_r) (OpReg src_r) dst_r)
          )
    else
      -- generate C call to hs_pdepN in ghc-prim
      genPrimCCall bid (pdepLabel width) [dst] [src,mask]


genPext :: BlockId -> Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genPext :: Label -> Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genPext Label
bid Width
width LocalReg
dst CmmExpr
src CmmExpr
mask = do
  config <- NatM NCGConfig
getConfig
  if ncgBmiVersion config >= Just BMI2
    then do
      let format   = Width -> Format
intFormat Width
width
      let dst_r    = LocalReg -> Reg
getLocalRegReg LocalReg
dst
      code_src  <- getAnyReg src
      code_mask <- getAnyReg mask
      src_r     <- getNewRegNat format
      mask_r    <- getNewRegNat format
      return $ code_src src_r `appOL` code_mask mask_r `appOL`
          (if width == W8 || width == W16 then
               -- The PEXT instruction doesn't take a r/m8 or 16
              toOL
                [ MOVZxL format (OpReg src_r ) (OpReg src_r )
                , MOVZxL format (OpReg mask_r) (OpReg mask_r)
                , PEXT   II32   (OpReg mask_r) (OpReg src_r ) dst_r
                , MOVZxL format (OpReg dst_r)  (OpReg dst_r) -- Truncate to op width
                ]
            else
              unitOL (PEXT format (OpReg mask_r) (OpReg src_r) dst_r)
          )
    else
      -- generate C call to hs_pextN in ghc-prim
      genPrimCCall bid (pextLabel width) [dst] [src,mask]

genClz :: BlockId -> Width -> CmmFormal -> CmmActual -> NatM InstrBlock
genClz :: Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genClz Label
bid Width
width LocalReg
dst CmmExpr
src = do
  is32Bit <- NatM Bool
is32BitPlatform
  config <- getConfig
  if is32Bit && width == W64

    then
      -- Fallback to `hs_clz64` on i386
      genPrimCCall bid (clzLabel width) [dst] [src]

    else do
      code_src <- getAnyReg src
      let dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
dst
      if ncgBmiVersion config >= Just BMI2
        then do
          src_r <- getNewRegNat (intFormat width)
          return $ appOL (code_src src_r) $ case width of
            Width
W8 -> [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL
                [ Format -> Operand -> Operand -> Instr
MOVZxL Format
II8  (Reg -> Operand
OpReg Reg
src_r)       (Reg -> Operand
OpReg Reg
src_r) -- zero-extend to 32 bit
                , Format -> Operand -> Reg -> Instr
LZCNT  Format
II32 (Reg -> Operand
OpReg Reg
src_r)       Reg
dst_r         -- lzcnt with extra 24 zeros
                , Format -> Operand -> Operand -> Instr
SUB    Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
24)) (Reg -> Operand
OpReg Reg
dst_r) -- compensate for extra zeros
                ]
            Width
W16 -> [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL
                [ Format -> Operand -> Reg -> Instr
LZCNT  Format
II16 (Reg -> Operand
OpReg Reg
src_r) Reg
dst_r
                , Format -> Operand -> Operand -> Instr
MOVZxL Format
II16 (Reg -> Operand
OpReg Reg
dst_r) (Reg -> Operand
OpReg Reg
dst_r) -- zero-extend from 16 bit
                ]
            Width
_ -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Reg -> Instr
LZCNT (Width -> Format
intFormat Width
width) (Reg -> Operand
OpReg Reg
src_r) Reg
dst_r)
        else do
          let format = if Width
width Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
W8 then Format
II16 else Width -> Format
intFormat Width
width
          let bw = Width -> Int
widthInBits Width
width
          src_r <- getNewRegNat format
          tmp_r <- getNewRegNat format
          return $ code_src src_r `appOL` toOL
                   ([ MOVZxL   II8    (OpReg src_r) (OpReg src_r) | width == W8 ] ++
                    [ BSR      format (OpReg src_r) tmp_r
                    , MOV      II32   (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
                    , CMOV NE  format (OpReg tmp_r) dst_r
                    , XOR      format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
                    ]) -- NB: We don't need to zero-extend the result for the
                       -- W8/W16 cases because the 'MOV' insn already
                       -- took care of implicitly clearing the upper bits

genWordToFloat :: BlockId -> Width -> CmmFormal -> CmmActual -> NatM InstrBlock
genWordToFloat :: Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genWordToFloat Label
bid Width
width LocalReg
dst CmmExpr
src =
  -- TODO: generate assembly instead
  Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (Width -> FastString
word2FloatLabel Width
width) [LocalReg
dst] [CmmExpr
src]

genAtomicRead :: Width -> MemoryOrdering -> LocalReg -> CmmExpr -> NatM InstrBlock
genAtomicRead :: Width -> MemoryOrdering -> LocalReg -> CmmExpr -> NatM InstrBlock
genAtomicRead Width
width MemoryOrdering
_mord LocalReg
dst CmmExpr
addr = do
  let fmt :: Format
fmt = Width -> Format
intFormat Width
width
  load_code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOV Format
fmt) CmmExpr
addr
  return (load_code (getLocalRegReg dst))

genAtomicWrite :: Width -> MemoryOrdering -> CmmExpr -> CmmExpr -> NatM InstrBlock
genAtomicWrite :: Width -> MemoryOrdering -> CmmExpr -> CmmExpr -> NatM InstrBlock
genAtomicWrite Width
width MemoryOrdering
mord CmmExpr
addr CmmExpr
val = do
  code <- Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_IntCode (Width -> Format
intFormat Width
width) CmmExpr
addr CmmExpr
val
  let needs_fence = case MemoryOrdering
mord of
        MemoryOrdering
MemOrderSeqCst  -> Bool
True
        MemoryOrdering
MemOrderRelease -> Bool
False
        MemoryOrdering
MemOrderAcquire -> String -> SDoc -> Bool
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"genAtomicWrite: acquire ordering on write" SDoc
forall doc. IsOutput doc => doc
empty
        MemoryOrdering
MemOrderRelaxed -> Bool
False
  return $ if needs_fence then code `snocOL` MFENCE else code

genCmpXchg
  :: BlockId
  -> Width
  -> LocalReg
  -> CmmExpr
  -> CmmExpr
  -> CmmExpr
  -> NatM InstrBlock
genCmpXchg :: Label
-> Width
-> LocalReg
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genCmpXchg Label
bid Width
width LocalReg
dst CmmExpr
addr CmmExpr
old CmmExpr
new = do
  is32Bit <- NatM Bool
is32BitPlatform
  -- On x86 we don't have enough registers to use cmpxchg with a
  -- complicated addressing mode, so on that architecture we
  -- pre-compute the address first.
  if not (is32Bit && width == W64)
    then do
      let format = Width -> Format
intFormat Width
width
      Amode amode addr_code <- getSimpleAmode addr
      newval <- getNewRegNat format
      newval_code <- getAnyReg new
      oldval <- getNewRegNat format
      oldval_code <- getAnyReg old
      platform <- getPlatform
      let dst_r    = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform  (LocalReg -> CmmReg
CmmLocal LocalReg
dst)
          code     = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL
                     [ Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
oldval) (Reg -> Operand
OpReg Reg
eax)
                     , Instr -> Instr
LOCK (Format -> Operand -> Operand -> Instr
CMPXCHG Format
format (Reg -> Operand
OpReg Reg
newval) (AddrMode -> Operand
OpAddr AddrMode
amode))
                     , Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
eax) (Reg -> Operand
OpReg Reg
dst_r)
                     ]
      return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
          `appOL` code
    else
      -- generate C call to hs_cmpxchgN in ghc-prim
      genPrimCCall bid (cmpxchgLabel width) [dst] [addr,old,new]
      -- TODO: implement cmpxchg8b instruction

genXchg :: Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genXchg :: Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genXchg Width
width LocalReg
dst CmmExpr
addr CmmExpr
value = do
  is32Bit <- NatM Bool
is32BitPlatform

  when (is32Bit && width == W64) $
    panic "genXchg: 64bit atomic exchange not supported on 32bit platforms"

  Amode amode addr_code <- getSimpleAmode addr
  (newval, newval_code) <- getSomeReg value
  let format   = Width -> Format
intFormat Width
width
  let dst_r    = LocalReg -> Reg
getLocalRegReg LocalReg
dst
  -- Copy the value into the target register, perform the exchange.
  let code     = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL
                 [ Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
newval) (Reg -> Operand
OpReg Reg
dst_r)
                  -- On X86 xchg implies a lock prefix if we use a memory argument.
                  -- so this is atomic.
                 , Format -> Operand -> Reg -> Instr
XCHG Format
format (AddrMode -> Operand
OpAddr AddrMode
amode) Reg
dst_r
                 ]
  return $ addr_code `appOL` newval_code `appOL` code


genFloatAbs :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatAbs :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatAbs Width
width LocalReg
dst CmmExpr
src = do
  let
    format :: Format
format = Width -> Format
floatFormat Width
width
    const :: CmmLit
const = case Width
width of
      Width
W32 -> Integer -> Width -> CmmLit
CmmInt Integer
0x7fffffff Width
W32
      Width
W64 -> Integer -> Width -> CmmLit
CmmInt Integer
0x7fffffffffffffff Width
W64
      Width
_   -> String -> SDoc -> CmmLit
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"genFloatAbs: invalid width" (Width -> SDoc
forall a. Outputable a => a -> SDoc
ppr Width
width)
  src_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
src
  Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes width) const
  tmp <- getNewRegNat format
  let dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
dst
  pure $ src_code dst_r `appOL` amode_code `appOL` toOL
           [ MOV format (OpAddr amode) (OpReg tmp)
           , AND format (OpReg tmp) (OpReg dst_r)
           ]


genFloatSqrt :: Format -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatSqrt :: Format -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatSqrt Format
format LocalReg
dst CmmExpr
src = do
  let dst_r :: Reg
dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
dst
  src_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
src
  pure $ src_code dst_r `snocOL` SQRT format (OpReg dst_r) dst_r


genAddSubRetCarry
  :: Width
  -> (Format -> Operand -> Operand -> Instr)
  -> (Format -> Maybe (Operand -> Operand -> Instr))
  -> Cond
  -> LocalReg
  -> LocalReg
  -> CmmExpr
  -> CmmExpr
  -> NatM InstrBlock
genAddSubRetCarry :: Width
-> (Format -> Operand -> Operand -> Instr)
-> (Format -> Maybe (Operand -> Operand -> Instr))
-> Cond
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genAddSubRetCarry Width
width Format -> Operand -> Operand -> Instr
instr Format -> Maybe (Operand -> Operand -> Instr)
mrevinstr Cond
cond LocalReg
res_r LocalReg
res_c CmmExpr
arg_x CmmExpr
arg_y = do
  platform <- NCGConfig -> Platform
ncgPlatform (NCGConfig -> Platform) -> NatM NCGConfig -> NatM Platform
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NatM NCGConfig
getConfig
  let format = Width -> Format
intFormat Width
width
  rCode <- anyReg =<< trivialCode width (instr format)
                        (mrevinstr format) arg_x arg_y
  reg_tmp <- getNewRegNat II8
  let reg_c = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform  (LocalReg -> CmmReg
CmmLocal LocalReg
res_c)
      reg_r = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform  (LocalReg -> CmmReg
CmmLocal LocalReg
res_r)
      code = Reg -> InstrBlock
rCode Reg
reg_r InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
             Cond -> Operand -> Instr
SETCC Cond
cond (Reg -> Operand
OpReg Reg
reg_tmp) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
             Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (Reg -> Operand
OpReg Reg
reg_tmp) (Reg -> Operand
OpReg Reg
reg_c)
  return code


genAddWithCarry
  :: Width
  -> LocalReg
  -> LocalReg
  -> CmmExpr
  -> CmmExpr
  -> NatM InstrBlock
genAddWithCarry :: Width
-> LocalReg -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genAddWithCarry Width
width LocalReg
res_h LocalReg
res_l CmmExpr
arg_x CmmExpr
arg_y = do
  hCode <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg (CmmLit -> CmmExpr
CmmLit (Integer -> Width -> CmmLit
CmmInt Integer
0 Width
width))
  let format = Width -> Format
intFormat Width
width
  lCode <- anyReg =<< trivialCode width (ADD_CC format)
                        (Just (ADD_CC format)) arg_x arg_y
  let reg_l = LocalReg -> Reg
getLocalRegReg LocalReg
res_l
      reg_h = LocalReg -> Reg
getLocalRegReg LocalReg
res_h
      code = Reg -> InstrBlock
hCode Reg
reg_h InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
             Reg -> InstrBlock
lCode Reg
reg_l InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL`
             Format -> Operand -> Operand -> Instr
ADC Format
format (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
0)) (Reg -> Operand
OpReg Reg
reg_h)
  return code


genSignedLargeMul
  :: Width
  -> LocalReg
  -> LocalReg
  -> LocalReg
  -> CmmExpr
  -> CmmExpr
  -> NatM (OrdList Instr)
genSignedLargeMul :: Width
-> LocalReg
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genSignedLargeMul Width
width LocalReg
res_c LocalReg
res_h LocalReg
res_l CmmExpr
arg_x CmmExpr
arg_y = do
  (y_reg, y_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem CmmExpr
arg_y
  x_code <- getAnyReg arg_x
  reg_tmp <- getNewRegNat II8
  let format = Width -> Format
intFormat Width
width
      reg_h = LocalReg -> Reg
getLocalRegReg LocalReg
res_h
      reg_l = LocalReg -> Reg
getLocalRegReg LocalReg
res_l
      reg_c = LocalReg -> Reg
getLocalRegReg LocalReg
res_c
      code = InstrBlock
y_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
             Reg -> InstrBlock
x_code Reg
rax InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
             [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Instr
IMUL2 Format
format Operand
y_reg
                  , Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
rdx) (Reg -> Operand
OpReg Reg
reg_h)
                  , Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
rax) (Reg -> Operand
OpReg Reg
reg_l)
                  , Cond -> Operand -> Instr
SETCC Cond
CARRY (Reg -> Operand
OpReg Reg
reg_tmp)
                  , Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (Reg -> Operand
OpReg Reg
reg_tmp) (Reg -> Operand
OpReg Reg
reg_c)
                  ]
  return code

genUnsignedLargeMul
  :: Width
  -> LocalReg
  -> LocalReg
  -> CmmExpr
  -> CmmExpr
  -> NatM (OrdList Instr)
genUnsignedLargeMul :: Width
-> LocalReg -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genUnsignedLargeMul Width
width LocalReg
res_h LocalReg
res_l CmmExpr
arg_x CmmExpr
arg_y = do
  (y_reg, y_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem CmmExpr
arg_y
  x_code <- getAnyReg arg_x
  let format = Width -> Format
intFormat Width
width
      reg_h = LocalReg -> Reg
getLocalRegReg LocalReg
res_h
      reg_l = LocalReg -> Reg
getLocalRegReg LocalReg
res_l
      code = InstrBlock
y_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
             Reg -> InstrBlock
x_code Reg
rax InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL`
             [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [Format -> Operand -> Instr
MUL2 Format
format Operand
y_reg,
                   Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
rdx) (Reg -> Operand
OpReg Reg
reg_h),
                   Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
rax) (Reg -> Operand
OpReg Reg
reg_l)]
  return code


genQuotRem
  :: Width
  -> Bool
  -> LocalReg
  -> LocalReg
  -> Maybe CmmExpr
  -> CmmExpr
  -> CmmExpr
  -> NatM InstrBlock
genQuotRem :: Width
-> Bool
-> LocalReg
-> LocalReg
-> Maybe CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genQuotRem Width
width Bool
signed LocalReg
res_q LocalReg
res_r Maybe CmmExpr
m_arg_x_high CmmExpr
arg_x_low CmmExpr
arg_y = do
  case Width
width of
    Width
W8 -> do
      -- See Note [DIV/IDIV for bytes]
      let widen :: MachOp
widen | Bool
signed = Width -> Width -> MachOp
MO_SS_Conv Width
W8 Width
W16
                | Bool
otherwise = Width -> Width -> MachOp
MO_UU_Conv Width
W8 Width
W16
          arg_x_low_16 :: CmmExpr
arg_x_low_16 = MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp MachOp
widen [CmmExpr
arg_x_low]
          arg_y_16 :: CmmExpr
arg_y_16 = MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp MachOp
widen [CmmExpr
arg_y]
          m_arg_x_high_16 :: Maybe CmmExpr
m_arg_x_high_16 = (\CmmExpr
p -> MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp MachOp
widen [CmmExpr
p]) (CmmExpr -> CmmExpr) -> Maybe CmmExpr -> Maybe CmmExpr
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe CmmExpr
m_arg_x_high
      Width
-> Bool
-> LocalReg
-> LocalReg
-> Maybe CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genQuotRem Width
W16 Bool
signed LocalReg
res_q LocalReg
res_r Maybe CmmExpr
m_arg_x_high_16 CmmExpr
arg_x_low_16 CmmExpr
arg_y_16

    Width
_ -> do
      let format :: Format
format = Width -> Format
intFormat Width
width
          reg_q :: Reg
reg_q = LocalReg -> Reg
getLocalRegReg LocalReg
res_q
          reg_r :: Reg
reg_r = LocalReg -> Reg
getLocalRegReg LocalReg
res_r
          widen :: Instr
widen | Bool
signed    = Format -> Instr
CLTD Format
format
                | Bool
otherwise = Format -> Operand -> Operand -> Instr
XOR Format
format (Reg -> Operand
OpReg Reg
rdx) (Reg -> Operand
OpReg Reg
rdx)
          instr :: Format -> Operand -> Instr
instr | Bool
signed    = Format -> Operand -> Instr
IDIV
                | Bool
otherwise = Format -> Operand -> Instr
DIV
      (y_reg, y_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem CmmExpr
arg_y
      x_low_code <- getAnyReg arg_x_low
      x_high_code <- case m_arg_x_high of
                     Just CmmExpr
arg_x_high ->
                         HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
arg_x_high
                     Maybe CmmExpr
Nothing ->
                         (Reg -> InstrBlock) -> NatM (Reg -> InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return ((Reg -> InstrBlock) -> NatM (Reg -> InstrBlock))
-> (Reg -> InstrBlock) -> NatM (Reg -> InstrBlock)
forall a b. (a -> b) -> a -> b
$ InstrBlock -> Reg -> InstrBlock
forall a b. a -> b -> a
const (InstrBlock -> Reg -> InstrBlock)
-> InstrBlock -> Reg -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Instr -> InstrBlock
forall a. a -> OrdList a
unitOL Instr
widen
      return $ y_code `appOL`
               x_low_code rax `appOL`
               x_high_code rdx `appOL`
               toOL [instr format y_reg,
                     MOV format (OpReg rax) (OpReg reg_q),
                     MOV format (OpReg rdx) (OpReg reg_r)]