{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE BangPatterns #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  GHC.Internal.Foreign.C.String.Encoding
-- Copyright   :  (c) The University of Glasgow, 2008-2011
-- License     :  see libraries/base/LICENSE
--
-- Maintainer  :  libraries@haskell.org
-- Stability   :  internal
-- Portability :  non-portable
--
-- Foreign marshalling support for CStrings with configurable encodings
--
-----------------------------------------------------------------------------

module GHC.Internal.Foreign.C.String.Encoding (
    -- * C strings with a configurable encoding
    CString, CStringLen,

    -- * Conversion of C strings into Haskell strings
    peekCString,
    peekCStringLen,

    -- * Conversion of Haskell strings into C strings
    newCString,
    newCStringLen,
    newCStringLen0,

    -- * Conversion of Haskell strings into C strings using temporary storage
    withCString,
    withCStringLen,
    withCStringLen0,
    withCStringsLen,

    charIsRepresentable,
  ) where

import GHC.Internal.Foreign.Marshal.Array
import GHC.Internal.Foreign.C.Types
import GHC.Internal.Foreign.Ptr
import GHC.Internal.Foreign.Storable

import GHC.Internal.Word

-- Imports for the locale-encoding version of marshallers

import GHC.Internal.Data.Tuple (fst)

import GHC.Internal.Show ( show )

import GHC.Internal.Foreign.Marshal.Alloc
import GHC.Internal.Foreign.ForeignPtr

import GHC.Debug
import GHC.Internal.List
import GHC.Internal.Num
import GHC.Internal.Base

import GHC.Internal.IO
import GHC.Internal.IO.Exception
import GHC.Internal.IO.Buffer
import GHC.Internal.IO.Encoding.Types


c_DEBUG_DUMP :: Bool
c_DEBUG_DUMP :: Bool
c_DEBUG_DUMP = Bool
False

putDebugMsg :: String -> IO ()
putDebugMsg :: String -> IO ()
putDebugMsg | Bool
c_DEBUG_DUMP = String -> IO ()
debugLn
            | Bool
otherwise    = IO () -> String -> IO ()
forall a b. a -> b -> a
const (() -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ())


-- | A C string is a reference to an array of C characters terminated by NUL.
type CString    = Ptr CChar

-- | A string with explicit length information in bytes instead of a
-- terminating NUL (allowing NUL characters in the middle of the string).
type CStringLen = (Ptr CChar, Int)

-- exported functions
-- ------------------

-- | Marshal a NUL terminated C string into a Haskell string.
--
peekCString    :: TextEncoding -> CString -> IO String
peekCString :: TextEncoding -> CString -> IO String
peekCString TextEncoding
enc CString
cp = do
    sz <- CChar -> CString -> IO Int
forall a. (Storable a, Eq a) => a -> Ptr a -> IO Int
lengthArray0 CChar
nUL CString
cp
    peekEncodedCString enc (cp, sz * cCharSize)

-- | Marshal a C string with explicit length into a Haskell string.
--
peekCStringLen           :: TextEncoding -> CStringLen -> IO String
peekCStringLen :: TextEncoding -> CStringLen -> IO String
peekCStringLen = TextEncoding -> CStringLen -> IO String
peekEncodedCString

-- | Marshal a Haskell string into a NUL terminated C string.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * new storage is allocated for the C string and must be
--   explicitly freed using 'GHC.Internal.Foreign.Marshal.Alloc.free' or
--   'GHC.Internal.Foreign.Marshal.Alloc.finalizerFree'.
--
newCString :: TextEncoding -> String -> IO CString
newCString :: TextEncoding -> String -> IO CString
newCString TextEncoding
enc = (CStringLen -> CString) -> IO CStringLen -> IO CString
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM CStringLen -> CString
forall a b. (a, b) -> a
fst (IO CStringLen -> IO CString)
-> (String -> IO CStringLen) -> String -> IO CString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TextEncoding -> Bool -> String -> IO CStringLen
newEncodedCString TextEncoding
enc Bool
True

-- | Marshal a Haskell string into a C string (ie, character array) with
-- explicit length information.
--
-- Note that this does not NUL terminate the resulting string.
--
-- * new storage is allocated for the C string and must be
--   explicitly freed using 'GHC.Internal.Foreign.Marshal.Alloc.free' or
--   'GHC.Internal.Foreign.Marshal.Alloc.finalizerFree'.
--
newCStringLen     :: TextEncoding -> String -> IO CStringLen
newCStringLen :: TextEncoding -> String -> IO CStringLen
newCStringLen TextEncoding
enc = TextEncoding -> Bool -> String -> IO CStringLen
newEncodedCString TextEncoding
enc Bool
False

-- | Marshal a Haskell string into a NUL terminated C string using temporary
-- storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * the memory is freed when the subcomputation terminates (either
--   normally or via an exception), so the pointer to the temporary
--   storage must /not/ be used after this.
--
withCString :: TextEncoding -> String -> (CString -> IO a) -> IO a
withCString :: forall a. TextEncoding -> String -> (CString -> IO a) -> IO a
withCString TextEncoding
enc String
s CString -> IO a
act = TextEncoding -> Bool -> String -> (CStringLen -> IO a) -> IO a
forall a.
TextEncoding -> Bool -> String -> (CStringLen -> IO a) -> IO a
withEncodedCString TextEncoding
enc Bool
True String
s ((CStringLen -> IO a) -> IO a) -> (CStringLen -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \(CString
cp, Int
_sz) -> CString -> IO a
act CString
cp

-- | Marshal a Haskell string into a C string (ie, character array)
-- in temporary storage, with explicit length information.
--
-- Note that this does not NUL terminate the resulting string.
--
-- * the memory is freed when the subcomputation terminates (either
--   normally or via an exception), so the pointer to the temporary
--   storage must /not/ be used after this.
--
withCStringLen         :: TextEncoding -> String -> (CStringLen -> IO a) -> IO a
withCStringLen :: forall a. TextEncoding -> String -> (CStringLen -> IO a) -> IO a
withCStringLen TextEncoding
enc = TextEncoding -> Bool -> String -> (CStringLen -> IO a) -> IO a
forall a.
TextEncoding -> Bool -> String -> (CStringLen -> IO a) -> IO a
withEncodedCString TextEncoding
enc Bool
False

-- | Marshal a Haskell string into a NUL-terminated C string (ie, character array)
-- with explicit length information.
--
-- * new storage is allocated for the C string and must be
--   explicitly freed using 'GHC.Internal.Foreign.Marshal.Alloc.free' or
--   'GHC.Internal.Foreign.Marshal.Alloc.finalizerFree'.
--
-- @since base-4.19.0.0
newCStringLen0     :: TextEncoding -> String -> IO CStringLen
newCStringLen0 :: TextEncoding -> String -> IO CStringLen
newCStringLen0 TextEncoding
enc = TextEncoding -> Bool -> String -> IO CStringLen
newEncodedCString TextEncoding
enc Bool
True

-- | Marshal a Haskell string into a NUL-terminated C string (ie, character array)
-- in temporary storage, with explicit length information.
--
-- * the memory is freed when the subcomputation terminates (either
--   normally or via an exception), so the pointer to the temporary
--   storage must /not/ be used after this.
--
-- @since base-4.19.0.0
withCStringLen0         :: TextEncoding -> String -> (CStringLen -> IO a) -> IO a
withCStringLen0 :: forall a. TextEncoding -> String -> (CStringLen -> IO a) -> IO a
withCStringLen0 TextEncoding
enc = TextEncoding -> Bool -> String -> (CStringLen -> IO a) -> IO a
forall a.
TextEncoding -> Bool -> String -> (CStringLen -> IO a) -> IO a
withEncodedCString TextEncoding
enc Bool
True

-- | Marshal a list of Haskell strings into an array of NUL terminated C strings
-- using temporary storage.
--
-- * the Haskell strings may /not/ contain any NUL characters
--
-- * the memory is freed when the subcomputation terminates (either
--   normally or via an exception), so the pointer to the temporary
--   storage must /not/ be used after this.
--
withCStringsLen :: TextEncoding
                -> [String]
                -> (Int -> Ptr CString -> IO a)
                -> IO a
withCStringsLen :: forall a.
TextEncoding -> [String] -> (Int -> Ptr CString -> IO a) -> IO a
withCStringsLen TextEncoding
enc [String]
strs Int -> Ptr CString -> IO a
f = [CString] -> [String] -> IO a
go [] [String]
strs
  where
  go :: [CString] -> [String] -> IO a
go [CString]
cs (String
s:[String]
ss) = TextEncoding -> String -> (CString -> IO a) -> IO a
forall a. TextEncoding -> String -> (CString -> IO a) -> IO a
withCString TextEncoding
enc String
s ((CString -> IO a) -> IO a) -> (CString -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \CString
c -> [CString] -> [String] -> IO a
go (CString
cCString -> [CString] -> [CString]
forall a. a -> [a] -> [a]
:[CString]
cs) [String]
ss
  go [CString]
cs [] = [CString] -> (Int -> Ptr CString -> IO a) -> IO a
forall a b. Storable a => [a] -> (Int -> Ptr a -> IO b) -> IO b
withArrayLen ([CString] -> [CString]
forall a. [a] -> [a]
reverse [CString]
cs) Int -> Ptr CString -> IO a
f

-- | Determines whether a character can be accurately encoded in a
-- 'Foreign.C.String.CString'.
--
-- Pretty much anyone who uses this function is in a state of sin because
-- whether or not a character is encodable will, in general, depend on the
-- context in which it occurs.
charIsRepresentable :: TextEncoding -> Char -> IO Bool
-- We force enc explicitly because `catch` is lazy in its
-- first argument. We would probably like to force c as well,
-- but unfortunately worker/wrapper produces very bad code for
-- that.
--
-- TODO If this function is performance-critical, it would probably
-- pay to use a single-character specialization of withCString. That
-- would allow worker/wrapper to actually eliminate Char boxes, and
-- would also get rid of the completely unnecessary cons allocation.
charIsRepresentable :: TextEncoding -> Char -> IO Bool
charIsRepresentable !TextEncoding
enc Char
c =
  TextEncoding -> String -> (CString -> IO Bool) -> IO Bool
forall a. TextEncoding -> String -> (CString -> IO a) -> IO a
withCString TextEncoding
enc [Char
c]
              (\CString
cstr -> do str <- TextEncoding -> CString -> IO String
peekCString TextEncoding
enc CString
cstr
                           case str of
                             [Char
ch] | Char
ch Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c -> Bool -> IO Bool
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True
                             String
_ -> Bool -> IO Bool
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False)
    IO Bool -> (IOException -> IO Bool) -> IO Bool
forall e a. Exception e => IO a -> (e -> IO a) -> IO a
`catch`
       \(IOException
_ :: IOException) -> Bool -> IO Bool
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False

-- auxiliary definitions
-- ----------------------

-- C's end of string character
nUL :: CChar
nUL :: CChar
nUL  = CChar
0

-- Size of a CChar in bytes
cCharSize :: Int
cCharSize :: Int
cCharSize = CChar -> Int
forall a. Storable a => a -> Int
sizeOf (CChar
forall a. HasCallStack => a
undefined :: CChar)


{-# INLINE peekEncodedCString #-}
peekEncodedCString :: TextEncoding -- ^ Encoding of CString
                   -> CStringLen
                   -> IO String    -- ^ String in Haskell terms
peekEncodedCString :: TextEncoding -> CStringLen -> IO String
peekEncodedCString (TextEncoding { mkTextDecoder :: ()
mkTextDecoder = IO (TextDecoder dstate)
mk_decoder }) (CString
p, Int
sz_bytes)
  = IO (TextDecoder dstate)
-> (TextDecoder dstate -> IO ())
-> (TextDecoder dstate -> IO String)
-> IO String
forall a b c. IO a -> (a -> IO b) -> (a -> IO c) -> IO c
bracket IO (TextDecoder dstate)
mk_decoder TextDecoder dstate -> IO ()
forall from to state. BufferCodec from to state -> IO ()
close ((TextDecoder dstate -> IO String) -> IO String)
-> (TextDecoder dstate -> IO String) -> IO String
forall a b. (a -> b) -> a -> b
$ \TextDecoder dstate
decoder -> do
      let chunk_size :: Int
chunk_size = Int
sz_bytes Int -> Int -> Int
forall a. Ord a => a -> a -> a
`max` Int
1 -- Decode buffer chunk size in characters: one iteration only for ASCII
      !from0 <- (RawBuffer Word8 -> Buffer Word8)
-> IO (RawBuffer Word8) -> IO (Buffer Word8)
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\RawBuffer Word8
fp -> Int -> Buffer Word8 -> Buffer Word8
forall e. Int -> Buffer e -> Buffer e
bufferAdd Int
sz_bytes (RawBuffer Word8 -> Int -> BufferState -> Buffer Word8
forall e. RawBuffer e -> Int -> BufferState -> Buffer e
emptyBuffer RawBuffer Word8
fp Int
sz_bytes BufferState
ReadBuffer)) (IO (RawBuffer Word8) -> IO (Buffer Word8))
-> IO (RawBuffer Word8) -> IO (Buffer Word8)
forall a b. (a -> b) -> a -> b
$ Ptr Word8 -> IO (RawBuffer Word8)
forall a. Ptr a -> IO (ForeignPtr a)
newForeignPtr_ (CString -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr CString
p)
      !to    <- newCharBuffer chunk_size WriteBuffer

      let go !t
iteration !Buffer Word8
from = do
            (why, from', !to') <- TextDecoder dstate -> CodeBuffer Word8 Char
forall from to state.
BufferCodec from to state -> CodeBuffer from to
encode TextDecoder dstate
decoder Buffer Word8
from CharBuffer
to
            if isEmptyBuffer from'
             then
              -- No input remaining: @why@ will be InputUnderflow, but we don't care
              withBuffer to' $ peekArray (bufferElems to')
             else do
              -- Input remaining: what went wrong?
              putDebugMsg ("peekEncodedCString: " ++ show iteration ++ " " ++ show why)
              (from'', to'') <- case why of CodingProgress
InvalidSequence -> TextDecoder dstate
-> Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer)
forall from to state.
BufferCodec from to state
-> Buffer from -> Buffer to -> IO (Buffer from, Buffer to)
recover TextDecoder dstate
decoder Buffer Word8
from' CharBuffer
to' -- These conditions are equally bad because
                                            CodingProgress
InputUnderflow  -> TextDecoder dstate
-> Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer)
forall from to state.
BufferCodec from to state
-> Buffer from -> Buffer to -> IO (Buffer from, Buffer to)
recover TextDecoder dstate
decoder Buffer Word8
from' CharBuffer
to' -- they indicate malformed/truncated input
                                            CodingProgress
OutputUnderflow -> (Buffer Word8, CharBuffer) -> IO (Buffer Word8, CharBuffer)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Word8
from', CharBuffer
to')       -- We will have more space next time round
              putDebugMsg ("peekEncodedCString: from " ++ summaryBuffer from ++ " " ++ summaryBuffer from' ++ " " ++ summaryBuffer from'')
              putDebugMsg ("peekEncodedCString: to " ++ summaryBuffer to ++ " " ++ summaryBuffer to' ++ " " ++ summaryBuffer to'')
              to_chars <- withBuffer to'' $ peekArray (bufferElems to'')
              fmap (to_chars++) $ go (iteration + 1) from''

      go (0 :: Int) from0

{-# INLINE withEncodedCString #-}
withEncodedCString :: TextEncoding         -- ^ Encoding of CString to create
                   -> Bool                 -- ^ Null-terminate?
                   -> String               -- ^ String to encode
                   -> (CStringLen -> IO a) -- ^ Worker that can safely use the allocated memory
                   -> IO a
withEncodedCString :: forall a.
TextEncoding -> Bool -> String -> (CStringLen -> IO a) -> IO a
withEncodedCString (TextEncoding { mkTextEncoder :: ()
mkTextEncoder = IO (TextEncoder estate)
mk_encoder }) Bool
null_terminate String
s CStringLen -> IO a
act
  = IO (TextEncoder estate)
-> (TextEncoder estate -> IO ())
-> (TextEncoder estate -> IO a)
-> IO a
forall a b c. IO a -> (a -> IO b) -> (a -> IO c) -> IO c
bracket IO (TextEncoder estate)
mk_encoder TextEncoder estate -> IO ()
forall from to state. BufferCodec from to state -> IO ()
close ((TextEncoder estate -> IO a) -> IO a)
-> (TextEncoder estate -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \TextEncoder estate
encoder -> String -> (Int -> Ptr Char -> IO a) -> IO a
forall a b. Storable a => [a] -> (Int -> Ptr a -> IO b) -> IO b
withArrayLen String
s ((Int -> Ptr Char -> IO a) -> IO a)
-> (Int -> Ptr Char -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \Int
sz Ptr Char
p -> do
      from <- (RawBuffer Char -> CharBuffer)
-> IO (RawBuffer Char) -> IO CharBuffer
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\RawBuffer Char
fp -> Int -> CharBuffer -> CharBuffer
forall e. Int -> Buffer e -> Buffer e
bufferAdd Int
sz (RawBuffer Char -> Int -> BufferState -> CharBuffer
forall e. RawBuffer e -> Int -> BufferState -> Buffer e
emptyBuffer RawBuffer Char
fp Int
sz BufferState
ReadBuffer)) (IO (RawBuffer Char) -> IO CharBuffer)
-> IO (RawBuffer Char) -> IO CharBuffer
forall a b. (a -> b) -> a -> b
$ Ptr Char -> IO (RawBuffer Char)
forall a. Ptr a -> IO (ForeignPtr a)
newForeignPtr_ Ptr Char
p

      let go !t
iteration Int
to_sz_bytes = do
           String -> IO ()
putDebugMsg (String
"withEncodedCString: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ t -> String
forall a. Show a => a -> String
show t
iteration)
           Int -> (Ptr Word8 -> IO a) -> IO a
forall a b. Int -> (Ptr a -> IO b) -> IO b
allocaBytes Int
to_sz_bytes ((Ptr Word8 -> IO a) -> IO a) -> (Ptr Word8 -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
to_p -> do
            -- See Note [Check *before* fill in withEncodedCString] about why
            -- this is subtle.
            mb_res <- TextEncoder estate
-> Bool
-> CharBuffer
-> Ptr Word8
-> Int
-> IO (Maybe (Buffer Word8))
forall dstate.
TextEncoder dstate
-> Bool
-> CharBuffer
-> Ptr Word8
-> Int
-> IO (Maybe (Buffer Word8))
tryFillBuffer TextEncoder estate
encoder Bool
null_terminate CharBuffer
from Ptr Word8
to_p Int
to_sz_bytes
            case mb_res of
              Maybe (Buffer Word8)
Nothing  -> t -> Int -> IO a
go (t
iteration t -> t -> t
forall a. Num a => a -> a -> a
+ t
1) (Int
to_sz_bytes Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2)
              Just Buffer Word8
to_buf -> Buffer Word8 -> Bool -> (CStringLen -> IO a) -> IO a
forall r. Buffer Word8 -> Bool -> (CStringLen -> IO r) -> IO r
withCStringBuffer Buffer Word8
to_buf Bool
null_terminate CStringLen -> IO a
act

      -- If the input string is ASCII, this value will ensure we only allocate once
      go (0 :: Int) (cCharSize * (sz + 1))

withCStringBuffer :: Buffer Word8 -> Bool -> (CStringLen -> IO r) -> IO r
withCStringBuffer :: forall r. Buffer Word8 -> Bool -> (CStringLen -> IO r) -> IO r
withCStringBuffer Buffer Word8
to_buf Bool
null_terminate CStringLen -> IO r
act = do
  let bytes :: Int
bytes = Buffer Word8 -> Int
forall e. Buffer e -> Int
bufferElems Buffer Word8
to_buf
  Buffer Word8 -> (Ptr Word8 -> IO r) -> IO r
forall e a. Buffer e -> (Ptr e -> IO a) -> IO a
withBuffer Buffer Word8
to_buf ((Ptr Word8 -> IO r) -> IO r) -> (Ptr Word8 -> IO r) -> IO r
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
to_ptr -> do
    Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
null_terminate (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ Ptr Word8 -> Int -> Word8 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word8
to_ptr (Buffer Word8 -> Int
forall e. Buffer e -> Int
bufR Buffer Word8
to_buf) Word8
0
    CStringLen -> IO r
act (Ptr Word8 -> CString
forall a b. Ptr a -> Ptr b
castPtr Ptr Word8
to_ptr, Int
bytes) -- NB: the length information is specified as being in *bytes*

{-# INLINE newEncodedCString #-}
newEncodedCString :: TextEncoding  -- ^ Encoding of CString to create
                  -> Bool          -- ^ Null-terminate?
                  -> String        -- ^ String to encode
                  -> IO CStringLen
newEncodedCString :: TextEncoding -> Bool -> String -> IO CStringLen
newEncodedCString (TextEncoding { mkTextEncoder :: ()
mkTextEncoder = IO (TextEncoder estate)
mk_encoder }) Bool
null_terminate String
s
  = IO (TextEncoder estate)
-> (TextEncoder estate -> IO ())
-> (TextEncoder estate -> IO CStringLen)
-> IO CStringLen
forall a b c. IO a -> (a -> IO b) -> (a -> IO c) -> IO c
bracket IO (TextEncoder estate)
mk_encoder TextEncoder estate -> IO ()
forall from to state. BufferCodec from to state -> IO ()
close ((TextEncoder estate -> IO CStringLen) -> IO CStringLen)
-> (TextEncoder estate -> IO CStringLen) -> IO CStringLen
forall a b. (a -> b) -> a -> b
$ \TextEncoder estate
encoder -> String -> (Int -> Ptr Char -> IO CStringLen) -> IO CStringLen
forall a b. Storable a => [a] -> (Int -> Ptr a -> IO b) -> IO b
withArrayLen String
s ((Int -> Ptr Char -> IO CStringLen) -> IO CStringLen)
-> (Int -> Ptr Char -> IO CStringLen) -> IO CStringLen
forall a b. (a -> b) -> a -> b
$ \Int
sz Ptr Char
p -> do
      from <- (RawBuffer Char -> CharBuffer)
-> IO (RawBuffer Char) -> IO CharBuffer
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\RawBuffer Char
fp -> Int -> CharBuffer -> CharBuffer
forall e. Int -> Buffer e -> Buffer e
bufferAdd Int
sz (RawBuffer Char -> Int -> BufferState -> CharBuffer
forall e. RawBuffer e -> Int -> BufferState -> Buffer e
emptyBuffer RawBuffer Char
fp Int
sz BufferState
ReadBuffer)) (IO (RawBuffer Char) -> IO CharBuffer)
-> IO (RawBuffer Char) -> IO CharBuffer
forall a b. (a -> b) -> a -> b
$ Ptr Char -> IO (RawBuffer Char)
forall a. Ptr a -> IO (ForeignPtr a)
newForeignPtr_ Ptr Char
p

      let go !t
iteration Ptr Word8
to_p Int
to_sz_bytes = do
           String -> IO ()
putDebugMsg (String
"newEncodedCString: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ t -> String
forall a. Show a => a -> String
show t
iteration)
           mb_res <- TextEncoder estate
-> Bool
-> CharBuffer
-> Ptr Word8
-> Int
-> IO (Maybe (Buffer Word8))
forall dstate.
TextEncoder dstate
-> Bool
-> CharBuffer
-> Ptr Word8
-> Int
-> IO (Maybe (Buffer Word8))
tryFillBuffer TextEncoder estate
encoder Bool
null_terminate CharBuffer
from Ptr Word8
to_p Int
to_sz_bytes
           case mb_res of
             Maybe (Buffer Word8)
Nothing  -> do
                 let to_sz_bytes' :: Int
to_sz_bytes' = Int
to_sz_bytes Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2
                 to_p' <- Ptr Word8 -> Int -> IO (Ptr Word8)
forall a. Ptr a -> Int -> IO (Ptr a)
reallocBytes Ptr Word8
to_p Int
to_sz_bytes'
                 go (iteration + 1) to_p' to_sz_bytes'
             Just Buffer Word8
to_buf -> Buffer Word8
-> Bool -> (CStringLen -> IO CStringLen) -> IO CStringLen
forall r. Buffer Word8 -> Bool -> (CStringLen -> IO r) -> IO r
withCStringBuffer Buffer Word8
to_buf Bool
null_terminate CStringLen -> IO CStringLen
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return

      -- If the input string is ASCII, this value will ensure we only allocate once
      let to_sz_bytes = Int
cCharSize Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Int
sz Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
      to_p <- mallocBytes to_sz_bytes
      go (0 :: Int) to_p to_sz_bytes


tryFillBuffer :: TextEncoder dstate -> Bool -> Buffer Char -> Ptr Word8 -> Int
                    ->  IO (Maybe (Buffer Word8))
tryFillBuffer :: forall dstate.
TextEncoder dstate
-> Bool
-> CharBuffer
-> Ptr Word8
-> Int
-> IO (Maybe (Buffer Word8))
tryFillBuffer TextEncoder dstate
encoder Bool
null_terminate CharBuffer
from0 Ptr Word8
to_p !Int
to_sz_bytes = do
    !to_fp <- Ptr Word8 -> IO (RawBuffer Word8)
forall a. Ptr a -> IO (ForeignPtr a)
newForeignPtr_ Ptr Word8
to_p
    go (0 :: Int) from0 (emptyBuffer to_fp to_sz_bytes WriteBuffer)
  where
    go :: t -> CharBuffer -> Buffer Word8 -> IO (Maybe (Buffer Word8))
go !t
iteration !CharBuffer
from !Buffer Word8
to = do
      (why, from', to') <- TextEncoder dstate -> CodeBuffer Char Word8
forall from to state.
BufferCodec from to state -> CodeBuffer from to
encode TextEncoder dstate
encoder CharBuffer
from Buffer Word8
to
      putDebugMsg ("tryFillBufferAndCall: " ++ show iteration ++ " " ++ show why ++ " " ++ summaryBuffer from ++ " " ++ summaryBuffer from')
      if isEmptyBuffer from'
       then if null_terminate && bufferAvailable to' == 0
             then return Nothing -- We had enough for the string but not the terminator: ask the caller for more buffer
             else return (Just to')
       else case why of -- We didn't consume all of the input
              CodingProgress
InputUnderflow  -> TextEncoder dstate
-> CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
forall from to state.
BufferCodec from to state
-> Buffer from -> Buffer to -> IO (Buffer from, Buffer to)
recover TextEncoder dstate
encoder CharBuffer
from' Buffer Word8
to' IO (CharBuffer, Buffer Word8)
-> ((CharBuffer, Buffer Word8) -> IO (Maybe (Buffer Word8)))
-> IO (Maybe (Buffer Word8))
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \(CharBuffer
a,Buffer Word8
b) -> t -> CharBuffer -> Buffer Word8 -> IO (Maybe (Buffer Word8))
go (t
iteration t -> t -> t
forall a. Num a => a -> a -> a
+ t
1) CharBuffer
a Buffer Word8
b -- These conditions are equally bad
              CodingProgress
InvalidSequence -> TextEncoder dstate
-> CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
forall from to state.
BufferCodec from to state
-> Buffer from -> Buffer to -> IO (Buffer from, Buffer to)
recover TextEncoder dstate
encoder CharBuffer
from' Buffer Word8
to' IO (CharBuffer, Buffer Word8)
-> ((CharBuffer, Buffer Word8) -> IO (Maybe (Buffer Word8)))
-> IO (Maybe (Buffer Word8))
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \(CharBuffer
a,Buffer Word8
b) -> t -> CharBuffer -> Buffer Word8 -> IO (Maybe (Buffer Word8))
go (t
iteration t -> t -> t
forall a. Num a => a -> a -> a
+ t
1) CharBuffer
a Buffer Word8
b -- since the input was truncated/invalid
              CodingProgress
OutputUnderflow -> Maybe (Buffer Word8) -> IO (Maybe (Buffer Word8))
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (Buffer Word8)
forall a. Maybe a
Nothing -- Oops, out of buffer during decoding: ask the caller for more
{-
Note [Check *before* fill in withEncodedCString]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
It's very important that the size check and readjustment peformed by tryFillBuffer
happens before the continuation is called. The size check is the part which can
fail, the call to the continuation never fails and so the caller should respond
first to the size check failing and *then* call the continuation. Making this evident
to the compiler avoids historic space leaks.

In a previous iteration of this code we had a pattern that, somewhat simplified,
looked like this:

go :: State -> (State -> IO a) -> IO a
go state action =
    case tryFillBufferAndCall state action of
        Left state' -> go state' action
        Right result -> result

`tryFillBufferAndCall` performed some checks, and then we either called action,
or we modified the state and tried again.
This went wrong because `action` can be a function closure containing a reference to
a lazy data structure. If we call action directly, without retaining any references
to action, that is fine. The data structure is consumed as it is produced and we operate
in constant space.

However the failure branch `go state' action` *does* capture a reference to action.
This went wrong because the reference to action in the failure branch only becomes
unreachable *after* action returns. This means we keep alive the function closure
for `action` until `action` returns. Which in turn keeps alive the *whole* lazy list
via `action` until the action has fully run.
This went wrong in #20107, where the continuation kept an entire lazy bytestring alive
rather than allowing it to be incrementally consumed and collected.
-}