{-# LANGUAGE CPP #-}

module GHC.StgToCmm.InfoTableProv (emitIpeBufferListNode) where

import Foreign

#if defined(HAVE_LIBZSTD)
import Foreign.C.Types
import qualified Data.ByteString.Internal as BSI
import GHC.IO (unsafePerformIO)
#endif

import GHC.Prelude
import GHC.Platform
import GHC.Types.SrcLoc (pprUserRealSpan, srcSpanFile)
import GHC.Types.Unique.DSM
import GHC.Unit.Module
import GHC.Utils.Outputable
import GHC.Data.FastString (fastStringToShortText, unpackFS, LexicalFastString(..))

import GHC.Cmm
import GHC.Cmm.CLabel
import GHC.Cmm.Utils

import GHC.StgToCmm.Config
import GHC.StgToCmm.Monad

import GHC.Data.ShortText (ShortText)
import qualified GHC.Data.ShortText as ST

import Control.Monad.Trans.State.Strict

import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder as BSB
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Map.Strict as M

{-
Note [Compression and Decompression of IPE data]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Compiling with `-finfo-table-map` causes build results to include a map from
info tables to source positions called the info table provenance entry (IPE)
map. See Note [Mapping Info Tables to Source Positions]. The IPE information
can grow the size of build results significantly. At the time of writing, a
default build of GHC results in a total of 109M of libHSghc-*.so build results.
A default+ipe build of GHC (see ./hadrian/doc/flavours.md) results in 262M of
libHSghc-*.so build results without compression.

We reduce the impact of IPE data on the size of build results by compressing
the data before it is emitted using the zstd compression library. See
Note [The Info Table Provenance Entry (IPE) Map] for information on the layout
of IPE data on disk and in the RTS. We cannot simply compress all data held in
the IPE entry buffer, as the pointers to info tables must be converted to
memory addresses during linking. Therefore, we can only compress the strings
table and the IPE entries themselves (which essentially only consist of indices
into the strings table).

With compression, a default+ipe build of GHC results in a total of 205M of
libHSghc-*.so build results. This is over a 20% reduction from the uncompressed
case.

Decompression happens lazily, as it only occurs when the IPE map is
constructed (which is also done lazily on first lookup or traversal). During
construction, the 'compressed' field of each IPE buffer list node is examined.
If the field indicates that the data has been compressed, the entry data and
strings table are decompressed before continuing with the normal IPE map
construction.
-}

emitIpeBufferListNode ::
     Module
  -> [InfoProvEnt]
  -> DUniqSupply -- ^ Symbols created source uniques deterministically
                 -- All uniques must be created from this supply.
                 -- NB: If you are creating a new symbol within this function,
                 -- make sure it is local only (as in not `externallyVisibleCLabel`).
                 -- If you need it to be global, reconsider the comment on the
                 -- call of emitIpeBufferListNode in Cmm.Parser.
  -> FCode DUniqSupply
emitIpeBufferListNode :: Module -> [InfoProvEnt] -> DUniqSupply -> FCode DUniqSupply
emitIpeBufferListNode Module
_ [] DUniqSupply
dus = DUniqSupply -> FCode DUniqSupply
forall a. a -> FCode a
forall (m :: * -> *) a. Monad m => a -> m a
return DUniqSupply
dus
emitIpeBufferListNode Module
this_mod [InfoProvEnt]
ents DUniqSupply
dus0 = do
    cfg <- FCode StgToCmmConfig
getStgToCmmConfig

    let (u1, dus1) = takeUniqueFromDSupply dus0
        (u2, dus2) = takeUniqueFromDSupply dus1
        (u3, dus3) = takeUniqueFromDSupply dus2

        tables_lbl  = Unique -> CLabel
mkStringLitLabel Unique
u1
        strings_lbl = Unique -> CLabel
mkStringLitLabel Unique
u2
        entries_lbl = Unique -> CLabel
mkStringLitLabel Unique
u3

        ctx      = StgToCmmConfig -> SDocContext
stgToCmmContext StgToCmmConfig
cfg
        platform = StgToCmmConfig -> Platform
stgToCmmPlatform StgToCmmConfig
cfg
        int Int
n    = Platform -> Int -> CmmLit
mkIntCLit Platform
platform Int
n

        ((cg_ipes, unit_id, module_name), strtab) = flip runState emptyStringTable $ do
          unit_id <- lookupStringTable $ ST.pack $ renderWithContext ctx (ppr $ moduleName this_mod)
          module_name <- lookupStringTable $ ST.pack $ renderWithContext ctx (ppr $ moduleUnit this_mod)
          cg_ipes <- mapM (toCgIPE platform ctx) ents
          return (cg_ipes, unit_id, module_name)

        tables :: [CmmStatic]
        tables = (CgInfoProvEnt -> CmmStatic) -> [CgInfoProvEnt] -> [CmmStatic]
forall a b. (a -> b) -> [a] -> [b]
map (CmmLit -> CmmStatic
CmmStaticLit (CmmLit -> CmmStatic)
-> (CgInfoProvEnt -> CmmLit) -> CgInfoProvEnt -> CmmStatic
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CLabel -> CmmLit
CmmLabel (CLabel -> CmmLit)
-> (CgInfoProvEnt -> CLabel) -> CgInfoProvEnt -> CmmLit
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CgInfoProvEnt -> CLabel
ipeInfoTablePtr) [CgInfoProvEnt]
cg_ipes

        uncompressed_strings :: BS.ByteString
        uncompressed_strings = StringTable -> ByteString
getStringTableStrings StringTable
strtab

        strings_bytes :: BS.ByteString
        strings_bytes = Int -> ByteString -> ByteString
compress Int
defaultCompressionLevel ByteString
uncompressed_strings

        strings :: [CmmStatic]
        strings = [ByteString -> CmmStatic
CmmString ByteString
strings_bytes]

        uncompressed_entries :: BS.ByteString
        uncompressed_entries = ByteOrder -> [CgInfoProvEnt] -> ByteString
toIpeBufferEntries (Platform -> ByteOrder
platformByteOrder Platform
platform) [CgInfoProvEnt]
cg_ipes

        entries_bytes :: BS.ByteString
        entries_bytes = Int -> ByteString -> ByteString
compress Int
defaultCompressionLevel ByteString
uncompressed_entries

        entries :: [CmmStatic]
        entries = [ByteString -> CmmStatic
CmmString ByteString
entries_bytes]

        ipe_buffer_lbl :: CLabel
        ipe_buffer_lbl = Module -> CLabel
mkIPELabel Module
this_mod

        ipe_buffer_node :: [CmmStatic]
        ipe_buffer_node = (CmmLit -> CmmStatic) -> [CmmLit] -> [CmmStatic]
forall a b. (a -> b) -> [a] -> [b]
map CmmLit -> CmmStatic
CmmStaticLit
          [ -- 'next' field
            Platform -> CmmLit
zeroCLit Platform
platform

            -- 'compressed' field
          , Int -> CmmLit
int Int
do_compress

            -- 'count' field
          , Int -> CmmLit
int (Int -> CmmLit) -> Int -> CmmLit
forall a b. (a -> b) -> a -> b
$ [CgInfoProvEnt] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CgInfoProvEnt]
cg_ipes

            -- 'tables' field
          , CLabel -> CmmLit
CmmLabel CLabel
tables_lbl

            -- 'entries' field
          , CLabel -> CmmLit
CmmLabel CLabel
entries_lbl

            -- 'entries_size' field (decompressed size)
          , Int -> CmmLit
int (Int -> CmmLit) -> Int -> CmmLit
forall a b. (a -> b) -> a -> b
$ ByteString -> Int
BS.length ByteString
uncompressed_entries

            -- 'string_table' field
          , CLabel -> CmmLit
CmmLabel CLabel
strings_lbl

            -- 'string_table_size' field (decompressed size)
          , Int -> CmmLit
int (Int -> CmmLit) -> Int -> CmmLit
forall a b. (a -> b) -> a -> b
$ ByteString -> Int
BS.length ByteString
uncompressed_strings

            -- 'module_name' field
          , Integer -> Width -> CmmLit
CmmInt (StrTabOffset -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral StrTabOffset
module_name) Width
W32

            -- 'unit_id' field
          , Integer -> Width -> CmmLit
CmmInt (StrTabOffset -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral StrTabOffset
unit_id) Width
W32
          ]

    -- Emit the list of info table pointers
    emitDecl $ CmmData
      (Section Data tables_lbl)
      (CmmStaticsRaw tables_lbl tables)

    -- Emit the strings table
    emitDecl $ CmmData
      (Section Data strings_lbl)
      (CmmStaticsRaw strings_lbl strings)

    -- Emit the list of IPE buffer entries
    emitDecl $ CmmData
      (Section Data entries_lbl)
      (CmmStaticsRaw entries_lbl entries)

    -- Emit the IPE buffer list node
    emitDecl $ CmmData
      (Section Data ipe_buffer_lbl)
      (CmmStaticsRaw ipe_buffer_lbl ipe_buffer_node)

    return dus3

-- | Emit the fields of an IpeBufferEntry struct for each entry in a given list.
toIpeBufferEntries ::
     ByteOrder       -- ^ Byte order to write the data in
  -> [CgInfoProvEnt] -- ^ List of IPE buffer entries
  -> BS.ByteString
toIpeBufferEntries :: ByteOrder -> [CgInfoProvEnt] -> ByteString
toIpeBufferEntries ByteOrder
byte_order [CgInfoProvEnt]
cg_ipes =
      LazyByteString -> ByteString
BSL.toStrict (LazyByteString -> ByteString)
-> ([Builder] -> LazyByteString) -> [Builder] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> LazyByteString
BSB.toLazyByteString (Builder -> LazyByteString)
-> ([Builder] -> Builder) -> [Builder] -> LazyByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat
    ([Builder] -> ByteString) -> [Builder] -> ByteString
forall a b. (a -> b) -> a -> b
$ (CgInfoProvEnt -> Builder) -> [CgInfoProvEnt] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map ([Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder)
-> (CgInfoProvEnt -> [Builder]) -> CgInfoProvEnt -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (StrTabOffset -> Builder) -> [StrTabOffset] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map StrTabOffset -> Builder
word32Builder ([StrTabOffset] -> [Builder])
-> (CgInfoProvEnt -> [StrTabOffset]) -> CgInfoProvEnt -> [Builder]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CgInfoProvEnt -> [StrTabOffset]
to_ipe_buf_ent) [CgInfoProvEnt]
cg_ipes
  where
    to_ipe_buf_ent :: CgInfoProvEnt -> [Word32]
    to_ipe_buf_ent :: CgInfoProvEnt -> [StrTabOffset]
to_ipe_buf_ent CgInfoProvEnt
cg_ipe =
      [ CgInfoProvEnt -> StrTabOffset
ipeTableName CgInfoProvEnt
cg_ipe
      , StrTabOffset -> StrTabOffset
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrTabOffset -> StrTabOffset) -> StrTabOffset -> StrTabOffset
forall a b. (a -> b) -> a -> b
$ CgInfoProvEnt -> StrTabOffset
ipeClosureDesc CgInfoProvEnt
cg_ipe
      , CgInfoProvEnt -> StrTabOffset
ipeTypeDesc CgInfoProvEnt
cg_ipe
      , CgInfoProvEnt -> StrTabOffset
ipeLabel CgInfoProvEnt
cg_ipe
      , CgInfoProvEnt -> StrTabOffset
ipeSrcFile CgInfoProvEnt
cg_ipe
      , CgInfoProvEnt -> StrTabOffset
ipeSrcSpan CgInfoProvEnt
cg_ipe
      ]

    word32Builder :: Word32 -> BSB.Builder
    word32Builder :: StrTabOffset -> Builder
word32Builder = case ByteOrder
byte_order of
      ByteOrder
BigEndian    -> StrTabOffset -> Builder
BSB.word32BE
      ByteOrder
LittleEndian -> StrTabOffset -> Builder
BSB.word32LE

toCgIPE :: Platform -> SDocContext -> InfoProvEnt -> State StringTable CgInfoProvEnt
toCgIPE :: Platform
-> SDocContext
-> InfoProvEnt
-> StateT StringTable Identity CgInfoProvEnt
toCgIPE Platform
platform SDocContext
ctx InfoProvEnt
ipe = do
    table_name <- ShortText -> State StringTable StrTabOffset
lookupStringTable (ShortText -> State StringTable StrTabOffset)
-> ShortText -> State StringTable StrTabOffset
forall a b. (a -> b) -> a -> b
$ String -> ShortText
ST.pack (String -> ShortText) -> String -> ShortText
forall a b. (a -> b) -> a -> b
$ SDocContext -> SDoc -> String
renderWithContext SDocContext
ctx (Platform -> CLabel -> SDoc
forall doc. IsLine doc => Platform -> CLabel -> doc
pprCLabel Platform
platform (InfoProvEnt -> CLabel
infoTablePtr InfoProvEnt
ipe))
    type_desc <- lookupStringTable $ ST.pack $ infoTableType ipe
    let label_str = String
-> ((RealSrcSpan, LexicalFastString) -> String)
-> Maybe (RealSrcSpan, LexicalFastString)
-> String
forall b a. b -> (a -> b) -> Maybe a -> b
maybe String
"" ((\(LexicalFastString FastString
s) -> FastString -> String
unpackFS FastString
s) (LexicalFastString -> String)
-> ((RealSrcSpan, LexicalFastString) -> LexicalFastString)
-> (RealSrcSpan, LexicalFastString)
-> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (RealSrcSpan, LexicalFastString) -> LexicalFastString
forall a b. (a, b) -> b
snd) (InfoProvEnt -> Maybe (RealSrcSpan, LexicalFastString)
infoTableProv InfoProvEnt
ipe)
    let (src_loc_file, src_loc_span) =
            case infoTableProv ipe of
              Maybe (RealSrcSpan, LexicalFastString)
Nothing -> (ShortText
forall a. Monoid a => a
mempty, String
"")
              Just (RealSrcSpan
span, LexicalFastString
_) ->
                  let file :: ShortText
file = FastString -> ShortText
fastStringToShortText (FastString -> ShortText) -> FastString -> ShortText
forall a b. (a -> b) -> a -> b
$ RealSrcSpan -> FastString
srcSpanFile RealSrcSpan
span
                      coords :: String
coords = SDocContext -> SDoc -> String
renderWithContext SDocContext
ctx (Bool -> RealSrcSpan -> SDoc
pprUserRealSpan Bool
False RealSrcSpan
span)
                  in (ShortText
file, String
coords)
    label    <- lookupStringTable $ ST.pack label_str
    src_file <- lookupStringTable src_loc_file
    src_span <- lookupStringTable $ ST.pack src_loc_span
    return $ CgInfoProvEnt { ipeInfoTablePtr = infoTablePtr ipe
                           , ipeTableName = table_name
                           , ipeClosureDesc = fromIntegral (infoProvEntClosureType ipe)
                           , ipeTypeDesc = type_desc
                           , ipeLabel = label
                           , ipeSrcFile = src_file
                           , ipeSrcSpan = src_span
                           }

data CgInfoProvEnt = CgInfoProvEnt
                               { CgInfoProvEnt -> CLabel
ipeInfoTablePtr :: !CLabel
                               , CgInfoProvEnt -> StrTabOffset
ipeTableName :: !StrTabOffset
                               , CgInfoProvEnt -> StrTabOffset
ipeClosureDesc :: !Word32
                               , CgInfoProvEnt -> StrTabOffset
ipeTypeDesc :: !StrTabOffset
                               , CgInfoProvEnt -> StrTabOffset
ipeLabel :: !StrTabOffset
                               , CgInfoProvEnt -> StrTabOffset
ipeSrcFile :: !StrTabOffset
                               , CgInfoProvEnt -> StrTabOffset
ipeSrcSpan :: !StrTabOffset
                               }

data StringTable = StringTable { StringTable -> DList ShortText
stStrings :: DList ShortText
                               , StringTable -> Int
stLength :: !Int
                               , StringTable -> Map ShortText StrTabOffset
stLookup :: !(M.Map ShortText StrTabOffset)
                               }

type StrTabOffset = Word32

emptyStringTable :: StringTable
emptyStringTable :: StringTable
emptyStringTable =
    StringTable { stStrings :: DList ShortText
stStrings = DList ShortText
forall a. DList a
emptyDList
                , stLength :: Int
stLength = Int
0
                , stLookup :: Map ShortText StrTabOffset
stLookup = Map ShortText StrTabOffset
forall k a. Map k a
M.empty
                }

getStringTableStrings :: StringTable -> BS.ByteString
getStringTableStrings :: StringTable -> ByteString
getStringTableStrings StringTable
st =
    LazyByteString -> ByteString
BSL.toStrict (LazyByteString -> ByteString) -> LazyByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Builder -> LazyByteString
BSB.toLazyByteString
    (Builder -> LazyByteString) -> Builder -> LazyByteString
forall a b. (a -> b) -> a -> b
$ (ShortText -> Builder) -> [ShortText] -> Builder
forall m a. Monoid m => (a -> m) -> [a] -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap ShortText -> Builder
f ([ShortText] -> Builder) -> [ShortText] -> Builder
forall a b. (a -> b) -> a -> b
$ DList ShortText -> [ShortText]
forall a. DList a -> [a]
dlistToList (StringTable -> DList ShortText
stStrings StringTable
st)
  where
    f :: ShortText -> Builder
f ShortText
x = ShortByteString -> Builder
BSB.shortByteString (ShortText -> ShortByteString
ST.contents ShortText
x) Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` Word8 -> Builder
BSB.word8 Word8
0

lookupStringTable :: ShortText -> State StringTable StrTabOffset
lookupStringTable :: ShortText -> State StringTable StrTabOffset
lookupStringTable ShortText
str = (StringTable -> (StrTabOffset, StringTable))
-> State StringTable StrTabOffset
forall (m :: * -> *) s a. Monad m => (s -> (a, s)) -> StateT s m a
state ((StringTable -> (StrTabOffset, StringTable))
 -> State StringTable StrTabOffset)
-> (StringTable -> (StrTabOffset, StringTable))
-> State StringTable StrTabOffset
forall a b. (a -> b) -> a -> b
$ \StringTable
st ->
    case ShortText -> Map ShortText StrTabOffset -> Maybe StrTabOffset
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup ShortText
str (StringTable -> Map ShortText StrTabOffset
stLookup StringTable
st) of
      Just StrTabOffset
off -> (StrTabOffset
off, StringTable
st)
      Maybe StrTabOffset
Nothing ->
          let !st' :: StringTable
st' = StringTable
st { stStrings = stStrings st `snoc` str
                        , stLength  = stLength st + ST.byteLength str + 1
                        , stLookup  = M.insert str res (stLookup st)
                        }
              res :: StrTabOffset
res = Int -> StrTabOffset
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StringTable -> Int
stLength StringTable
st)
          in (StrTabOffset
res, StringTable
st')

do_compress :: Int
compress    :: Int -> BS.ByteString -> BS.ByteString
#if !defined(HAVE_LIBZSTD)
do_compress :: Int
do_compress   = Int
0
compress :: Int -> ByteString -> ByteString
compress Int
_ ByteString
bs = ByteString
bs
#else
do_compress = 1

compress clvl (BSI.PS srcForeignPtr off len) = unsafePerformIO $
    withForeignPtr srcForeignPtr $ \srcPtr -> do
      maxCompressedSize <- zstd_compress_bound $ fromIntegral len
      dstForeignPtr <- BSI.mallocByteString (fromIntegral maxCompressedSize)
      withForeignPtr dstForeignPtr $ \dstPtr -> do
        compressedSize <- fromIntegral <$>
          zstd_compress
            dstPtr
            maxCompressedSize
            (srcPtr `plusPtr` off)
            (fromIntegral len)
            (fromIntegral clvl)
        BSI.create compressedSize $ \p -> copyBytes p dstPtr compressedSize

foreign import ccall unsafe "ZSTD_compress"
    zstd_compress ::
         Ptr dst -- ^ Destination buffer
      -> CSize   -- ^ Capacity of destination buffer
      -> Ptr src -- ^ Source buffer
      -> CSize   -- ^ Size of source buffer
      -> CInt    -- ^ Compression level
      -> IO CSize

-- | Compute the maximum compressed size for a given source buffer size
foreign import ccall unsafe "ZSTD_compressBound"
    zstd_compress_bound ::
         CSize -- ^ Size of source buffer
      -> IO CSize
#endif

defaultCompressionLevel :: Int
defaultCompressionLevel :: Int
defaultCompressionLevel = Int
3

newtype DList a = DList ([a] -> [a])

emptyDList :: DList a
emptyDList :: forall a. DList a
emptyDList = ([a] -> [a]) -> DList a
forall a. ([a] -> [a]) -> DList a
DList [a] -> [a]
forall a. a -> a
id

snoc :: DList a -> a -> DList a
snoc :: forall a. DList a -> a -> DList a
snoc (DList [a] -> [a]
f) a
x = ([a] -> [a]) -> DList a
forall a. ([a] -> [a]) -> DList a
DList ([a] -> [a]
f ([a] -> [a]) -> ([a] -> [a]) -> [a] -> [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:))

dlistToList :: DList a -> [a]
dlistToList :: forall a. DList a -> [a]
dlistToList (DList [a] -> [a]
f) = [a] -> [a]
f []