{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

-- This file is also included by "Data.ByteString.ReadInt", after defining
-- "BYTESTRING_STRICT".  The two modules share much of their code, but
-- the lazy version adds an outer loop over the chunks.

#ifdef BYTESTRING_STRICT
module Data.ByteString.ReadInt
#else
module Data.ByteString.Lazy.ReadInt
#endif
    ( readInt
    , readInt8
    , readInt16
    , readInt32
    , readWord
    , readWord8
    , readWord16
    , readWord32
    , readInt64
    , readWord64
    ) where

import qualified Data.ByteString.Internal as BI
#ifdef BYTESTRING_STRICT
import Data.ByteString
#else
import Data.ByteString.Lazy
import Data.ByteString.Lazy.Internal
#endif
import Data.Bits (FiniteBits, isSigned)
import Data.ByteString.Internal (pattern BS, plusForeignPtr)
import Data.Int
import Data.Word
import Foreign.ForeignPtr (ForeignPtr)
import Foreign.Ptr (minusPtr, plusPtr)
import Foreign.Storable (Storable(..))

----- Public API

-- | Try to read a signed 'Int' value from the 'ByteString', returning
-- @Just (val, str)@ on success, where @val@ is the value read and @str@ is the
-- rest of the input string.  If the sequence of digits decodes to a value
-- larger than can be represented by an 'Int', the returned value will be
-- 'Nothing'.
--
-- 'readInt' does not ignore leading whitespace, the value must start
-- immediately at the beginning of the input string.
--
-- ==== __Examples__
-- >>> readInt "-1729 sum of cubes"
-- Just (-1729," sum of cubes")
-- >>> readInt "+1: readInt also accepts a leading '+'"
-- Just (1, ": readInt also accepts a leading '+'")
-- >>> readInt "not a decimal number"
-- Nothing
-- >>> readInt "12345678901234567890 overflows maxBound"
-- Nothing
-- >>> readInt "-12345678901234567890 underflows minBound"
-- Nothing
--
readInt :: ByteString -> Maybe (Int, ByteString)
readInt :: ByteString -> Maybe (Int, ByteString)
readInt = ByteString -> Maybe (Int, ByteString)
forall a.
(Integral a, FiniteBits a, Bounded a) =>
ByteString -> Maybe (a, ByteString)
_read

-- | A variant of 'readInt' specialised to 'Int32'.
readInt32 :: ByteString -> Maybe (Int32, ByteString)
readInt32 :: ByteString -> Maybe (Int32, ByteString)
readInt32 = ByteString -> Maybe (Int32, ByteString)
forall a.
(Integral a, FiniteBits a, Bounded a) =>
ByteString -> Maybe (a, ByteString)
_read

-- | A variant of 'readInt' specialised to 'Int16'.
readInt16 :: ByteString -> Maybe (Int16, ByteString)
readInt16 :: ByteString -> Maybe (Int16, ByteString)
readInt16 = ByteString -> Maybe (Int16, ByteString)
forall a.
(Integral a, FiniteBits a, Bounded a) =>
ByteString -> Maybe (a, ByteString)
_read

-- | A variant of 'readInt' specialised to 'Int8'.
readInt8 :: ByteString -> Maybe (Int8, ByteString)
readInt8 :: ByteString -> Maybe (Int8, ByteString)
readInt8 = ByteString -> Maybe (Int8, ByteString)
forall a.
(Integral a, FiniteBits a, Bounded a) =>
ByteString -> Maybe (a, ByteString)
_read

-- | Try to read a 'Word' value from the 'ByteString', returning
-- @Just (val, str)@ on success, where @val@ is the value read and @str@ is the
-- rest of the input string.  If the sequence of digits decodes to a value
-- larger than can be represented by a 'Word', the returned value will be
-- 'Nothing'.
--
-- 'readWord' does not ignore leading whitespace, the value must start with a
-- decimal digit immediately at the beginning of the input string.  Leading @+@
-- signs are not accepted.
--
-- ==== __Examples__
-- >>> readWord "1729 sum of cubes"
-- Just (1729," sum of cubes")
-- >>> readWord "+1729 has an explicit sign"
-- Nothing
-- >>> readWord "not a decimal number"
-- Nothing
-- >>> readWord "98765432109876543210 overflows maxBound"
-- Nothing
--
readWord :: ByteString -> Maybe (Word, ByteString)
readWord :: ByteString -> Maybe (Word, ByteString)
readWord = ByteString -> Maybe (Word, ByteString)
forall a.
(Integral a, FiniteBits a, Bounded a) =>
ByteString -> Maybe (a, ByteString)
_read

-- | A variant of 'readWord' specialised to 'Word32'.
readWord32 :: ByteString -> Maybe (Word32, ByteString)
readWord32 :: ByteString -> Maybe (Word32, ByteString)
readWord32 = ByteString -> Maybe (Word32, ByteString)
forall a.
(Integral a, FiniteBits a, Bounded a) =>
ByteString -> Maybe (a, ByteString)
_read

-- | A variant of 'readWord' specialised to 'Word16'.
readWord16 :: ByteString -> Maybe (Word16, ByteString)
readWord16 :: ByteString -> Maybe (Word16, ByteString)
readWord16 = ByteString -> Maybe (Word16, ByteString)
forall a.
(Integral a, FiniteBits a, Bounded a) =>
ByteString -> Maybe (a, ByteString)
_read

-- | A variant of 'readWord' specialised to 'Word8'.
readWord8 :: ByteString -> Maybe (Word8, ByteString)
readWord8 :: ByteString -> Maybe (Word8, ByteString)
readWord8 = ByteString -> Maybe (Word8, ByteString)
forall a.
(Integral a, FiniteBits a, Bounded a) =>
ByteString -> Maybe (a, ByteString)
_read

-- | A variant of 'readInt' specialised to 'Int64'.
readInt64 :: ByteString -> Maybe (Int64, ByteString)
readInt64 :: ByteString -> Maybe (Int64, ByteString)
readInt64 = ByteString -> Maybe (Int64, ByteString)
forall a.
(Integral a, FiniteBits a, Bounded a) =>
ByteString -> Maybe (a, ByteString)
_read

-- | A variant of 'readWord' specialised to 'Word64'.
readWord64 :: ByteString -> Maybe (Word64, ByteString)
readWord64 :: ByteString -> Maybe (Word64, ByteString)
readWord64 = ByteString -> Maybe (Word64, ByteString)
forall a.
(Integral a, FiniteBits a, Bounded a) =>
ByteString -> Maybe (a, ByteString)
_read

-- | Polymorphic Int*/Word* reader
_read :: forall a. (Integral a, FiniteBits a, Bounded a)
      => ByteString  -> Maybe (a, ByteString)
{-# INLINE _read #-}
_read :: forall a.
(Integral a, FiniteBits a, Bounded a) =>
ByteString -> Maybe (a, ByteString)
_read
    | forall a. Bits a => a -> Bool
isSigned @a a
0
      = \ ByteString
bs -> ByteString -> Maybe (Word64, ByteString, Word64)
signed ByteString
bs Maybe (Word64, ByteString, Word64)
-> ((Word64, ByteString, Word64) -> Maybe (a, ByteString))
-> Maybe (a, ByteString)
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \ (Word64
r, ByteString
s, Word64
d1) -> Word64 -> ByteString -> Word64 -> Maybe (a, ByteString)
forall a.
(Integral a, Bounded a) =>
Word64 -> ByteString -> Word64 -> Maybe (a, ByteString)
_readDecimal Word64
r ByteString
s Word64
d1
    | Bool
otherwise
      -- When the input is @16^n-1@, as is the case with 'maxBound' for
      -- all the Word* types, the last decimal digit of 'maxBound' is 5.
      = \ ByteString
bs -> Word64 -> ByteString -> Maybe (Word64, ByteString, Word64)
unsigned Word64
5 ByteString
bs Maybe (Word64, ByteString, Word64)
-> ((Word64, ByteString, Word64) -> Maybe (a, ByteString))
-> Maybe (a, ByteString)
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \ (Word64
r, ByteString
s, Word64
d1) -> Word64 -> ByteString -> Word64 -> Maybe (a, ByteString)
forall a.
(Integral a, Bounded a) =>
Word64 -> ByteString -> Word64 -> Maybe (a, ByteString)
_readDecimal Word64
r ByteString
s Word64
d1
  where
    -- Returns:
    --  * Mod 10 min/max bound remainder
    --  * 2nd and later digits
    --  * 1st digit
    --
    -- When the input is @8*16^n-1@, as is the case with 'maxBound' for
    -- all the Int* types, the last decimal digit of 'maxBound' is 7.
    --
    signed :: ByteString -> Maybe (Word64, ByteString, Word64)
    signed :: ByteString -> Maybe (Word64, ByteString, Word64)
signed ByteString
bs = do
        (w, s) <- ByteString -> Maybe (Word8, ByteString)
uncons ByteString
bs
        let d1 = Word8 -> Word64
fromDigit Word8
w
        if | d1 <= 9   -> Just (7, s, d1) -- leading digit
           | w == 0x2d -> unsigned 8 s    -- minus sign
           | w == 0x2b -> unsigned 7 s    -- plus sign
           | otherwise -> Nothing         -- not a number

    unsigned :: Word64 -> ByteString -> Maybe (Word64, ByteString, Word64)
    unsigned :: Word64 -> ByteString -> Maybe (Word64, ByteString, Word64)
unsigned Word64
r ByteString
bs = do
        (w, s) <- ByteString -> Maybe (Word8, ByteString)
uncons ByteString
bs
        let d1 = Word8 -> Word64
fromDigit Word8
w
        if | d1 <= 9   -> Just (r, s, d1) -- leading digit
           | otherwise -> Nothing         -- not a number

----- Fixed-width unsigned reader

-- | Intermediate result from scanning a chunk, final output is
-- converted to the requested type once all chunks are processed.
--
data Result = Overflow
            | Result !Int    -- number of bytes (digits) read
                     !Word64 -- accumulator value

_readDecimal :: forall a. (Integral a, Bounded a)
             => Word64     -- ^ abs(maxBound/minBound) `mod` 10
             -> ByteString -- ^ Input string
             -> Word64     -- ^ First digit value
             -> Maybe (a, ByteString)
{-# INLINE _readDecimal #-}
_readDecimal :: forall a.
(Integral a, Bounded a) =>
Word64 -> ByteString -> Word64 -> Maybe (a, ByteString)
_readDecimal !Word64
r = ByteString -> Word64 -> Maybe (a, ByteString)
consume
  where
    consume :: ByteString -> Word64 -> Maybe (a, ByteString)
#ifdef BYTESTRING_STRICT
    consume (BS fp len) a = case _digits q r fp len a of
        Result used acc
            | used == len
              -> convert acc empty
            | otherwise
              -> convert acc $ BS (fp `plusForeignPtr` used) (len - used)
        _   -> Nothing
#else
    -- All done
    consume :: ByteString -> Word64 -> Maybe (a, ByteString)
consume ByteString
Empty Word64
acc = Word64 -> ByteString -> Maybe (a, ByteString)
convert Word64
acc ByteString
Empty
    -- Process next chunk
    consume (Chunk (BS ForeignPtr Word8
fp Int
len) ByteString
cs) Word64
acc
        = case Word64 -> Word64 -> ForeignPtr Word8 -> Int -> Word64 -> Result
_digits Word64
q Word64
r ForeignPtr Word8
fp Int
len Word64
acc of
            Result Int
used Word64
acc'
                | Int
used Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
len
                  -- process remaining chunks
                  -> ByteString -> Word64 -> Maybe (a, ByteString)
consume ByteString
cs Word64
acc'
                | Bool
otherwise
                  -- ran into a non-digit
                  -> Word64 -> ByteString -> Maybe (a, ByteString)
convert Word64
acc' (ByteString -> Maybe (a, ByteString))
-> ByteString -> Maybe (a, ByteString)
forall a b. (a -> b) -> a -> b
$
                     ByteString -> ByteString -> ByteString
Chunk (ForeignPtr Word8 -> Int -> ByteString
BS (ForeignPtr Word8
fp ForeignPtr Word8 -> Int -> ForeignPtr Word8
forall a b. ForeignPtr a -> Int -> ForeignPtr b
`plusForeignPtr` Int
used) (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
used)) ByteString
cs
            Result
_     -> Maybe (a, ByteString)
forall a. Maybe a
Nothing
#endif
    convert :: Word64 -> ByteString -> Maybe (a, ByteString)
    convert :: Word64 -> ByteString -> Maybe (a, ByteString)
convert !Word64
acc ByteString
rest =
        let !i :: a
i = case Word64
r of
                -- minBound @Int* `mod` 10 == 8
                Word64
8 -> a -> a
forall a. Num a => a -> a
negate (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word64 @a Word64
acc
                Word64
_ -> forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word64 @a Word64
acc
         in (a, ByteString) -> Maybe (a, ByteString)
forall a. a -> Maybe a
Just (a
i, ByteString
rest)

    -- The quotient of 'maxBound' divided by 10 is needed for
    -- overflow checks, once the accumulator exceeds this value
    -- no further digits can be added.  If equal, the last digit
    -- must not exceed the `r` value (max/min bound `mod` 10).
    --
    q :: Word64
q = forall a b. (Integral a, Num b) => a -> b
fromIntegral @a @Word64 a
forall a. Bounded a => a
maxBound Word64 -> Word64 -> Word64
forall a. Integral a => a -> a -> a
`div` Word64
10

----- Per chunk decoder

-- | Process as many digits as we can, returning the additional
-- number of digits found and the updated accumulator.  If the
-- accumulator would overflow return 'Overflow'.
--
_digits :: Word64           -- ^ maximum non-overflow value `div` 10
        -> Word64           -- ^ maximum non-overflow vavlue `mod` 10
        -> ForeignPtr Word8 -- ^ Input buffer
        -> Int              -- ^ Input length
        -> Word64           -- ^ Accumulated value of leading digits
        -> Result           -- ^ Bytes read and final accumulator,
                            -- or else overflow indication
{-# INLINE _digits #-}
_digits :: Word64 -> Word64 -> ForeignPtr Word8 -> Int -> Word64 -> Result
_digits !Word64
q !Word64
r ForeignPtr Word8
fp Int
len Word64
a = IO Result -> Result
forall a. IO a -> a
BI.accursedUnutterablePerformIO (IO Result -> Result) -> IO Result -> Result
forall a b. (a -> b) -> a -> b
$
    ForeignPtr Word8 -> (Ptr Word8 -> IO Result) -> IO Result
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
BI.unsafeWithForeignPtr ForeignPtr Word8
fp ((Ptr Word8 -> IO Result) -> IO Result)
-> (Ptr Word8 -> IO Result) -> IO Result
forall a b. (a -> b) -> a -> b
$ \ Ptr Word8
ptr -> do
        let end :: Ptr b
end = Ptr Word8
ptr Ptr Word8 -> Int -> Ptr b
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
len
        Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> Word64 -> IO Result
forall {b}. Ptr b -> Ptr Word8 -> Ptr Word8 -> Word64 -> IO Result
go Ptr Word8
ptr Ptr Word8
forall {b}. Ptr b
end Ptr Word8
ptr Word64
a
  where
    go :: Ptr b -> Ptr Word8 -> Ptr Word8 -> Word64 -> IO Result
go !Ptr b
start !Ptr Word8
end = Ptr Word8 -> Word64 -> IO Result
loop
      where
        loop :: Ptr Word8 -> Word64 -> IO Result
loop !Ptr Word8
ptr !Word64
acc = IO Word64
getDigit IO Word64 -> (Word64 -> IO Result) -> IO Result
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \ !Word64
d ->
            if | Word64
d Word64 -> Word64 -> Bool
forall a. Ord a => a -> a -> Bool
> Word64
9
                 -> Result -> IO Result
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Result -> IO Result) -> Result -> IO Result
forall a b. (a -> b) -> a -> b
$ Int -> Word64 -> Result
Result (Ptr Word8
ptr Ptr Word8 -> Ptr b -> Int
forall a b. Ptr a -> Ptr b -> Int
`minusPtr` Ptr b
start) Word64
acc
               | Word64
acc Word64 -> Word64 -> Bool
forall a. Ord a => a -> a -> Bool
< Word64
q Bool -> Bool -> Bool
|| Word64
acc Word64 -> Word64 -> Bool
forall a. Eq a => a -> a -> Bool
== Word64
q Bool -> Bool -> Bool
&& Word64
d Word64 -> Word64 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word64
r
                 -> Ptr Word8 -> Word64 -> IO Result
loop (Ptr Word8
ptr Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
1) (Word64
acc Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
10 Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
+ Word64
d)
               | Bool
otherwise
                 -> Result -> IO Result
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Result
Overflow
          where
            getDigit :: IO Word64
            getDigit :: IO Word64
getDigit
                | Ptr Word8
ptr Ptr Word8 -> Ptr Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Ptr Word8
end = Word8 -> Word64
fromDigit (Word8 -> Word64) -> IO Word8 -> IO Word64
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr Word8 -> IO Word8
forall a. Storable a => Ptr a -> IO a
peek Ptr Word8
ptr
                | Bool
otherwise  = Word64 -> IO Word64
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Word64
10  -- End of input
            {-# NOINLINE getDigit #-}
            -- 'getDigit' makes it possible to implement a single success
            -- exit point from the loop.  If instead we return 'Result'
            -- from multiple places, when '_digits' is inlined we get (at
            -- least GHC 8.10 through 9.2) for each exit path a separate
            -- join point implementing the continuation code.  GHC ticket
            -- <https://gitlab.haskell.org/ghc/ghc/-/issues/20739>.
            --
            -- The NOINLINE pragma is required to avoid inlining branches
            -- that would restore multiple exit points.

fromDigit :: Word8 -> Word64
{-# INLINE fromDigit #-}
fromDigit :: Word8 -> Word64
fromDigit = \ !Word8
w -> Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
- Word64
0x30 -- i.e. w - '0'