{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}

-- | Native implementation of 'Data.Text.Internal.Validate'.
module Data.Text.Internal.Validate.Native
  ( isValidUtf8ByteStringHaskell
  , isValidUtf8ByteArrayHaskell
  ) where

import Data.Array.Byte (ByteArray(ByteArray))
import Data.ByteString (ByteString)
import GHC.Exts (ByteArray#,Int(I#),indexWord8Array#)
import GHC.Word (Word8(W8#))
import Data.Text.Internal.Encoding.Utf8 (CodePoint(..),DecoderResult(..),utf8DecodeStart,utf8DecodeContinue)
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B

-- | Native implementation of 'Data.Text.Internal.Validate.isValidUtf8ByteString'.
isValidUtf8ByteStringHaskell :: ByteString -> Bool
isValidUtf8ByteStringHaskell :: ByteString -> Bool
isValidUtf8ByteStringHaskell ByteString
bs = Int -> Bool
start Int
0
  where
    start :: Int -> Bool
start Int
ix
      | Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= ByteString -> Int
B.length ByteString
bs = Bool
True
      | Bool
otherwise = case Word8 -> DecoderResult
utf8DecodeStart (ByteString -> Int -> Word8
B.unsafeIndex ByteString
bs Int
ix) of
        Accept{} -> Int -> Bool
start (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
        Reject{} -> Bool
False
        Incomplete DecoderState
st CodePoint
_ -> Int -> DecoderState -> Bool
step (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) DecoderState
st
    step :: Int -> DecoderState -> Bool
step Int
ix DecoderState
st
      | Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= ByteString -> Int
B.length ByteString
bs = Bool
False
      -- We do not use decoded code point, so passing a dummy value to save an argument.
      | Bool
otherwise = case Word8 -> DecoderState -> CodePoint -> DecoderResult
utf8DecodeContinue (ByteString -> Int -> Word8
B.unsafeIndex ByteString
bs Int
ix) DecoderState
st (Int -> CodePoint
CodePoint Int
0) of
        Accept{} -> Int -> Bool
start (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
        Reject{} -> Bool
False
        Incomplete DecoderState
st' CodePoint
_ -> Int -> DecoderState -> Bool
step (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) DecoderState
st'

-- | Native implementation of
-- 'Data.Text.Internal.Validate.isValidUtf8ByteArrayUnpinned'
-- and 'Data.Text.Internal.Validate.isValidUtf8ByteArrayPinned'.
isValidUtf8ByteArrayHaskell ::
     ByteArray -- ^ Bytes
  -> Int -- ^ Offset
  -> Int -- ^ Length
  -> Bool
isValidUtf8ByteArrayHaskell :: ByteArray -> Int -> Int -> Bool
isValidUtf8ByteArrayHaskell (ByteArray ByteArray#
b) !Int
off !Int
len = Int -> Bool
start Int
off
  where
    indexWord8 :: ByteArray# -> Int -> Word8
    indexWord8 :: ByteArray# -> Int -> Word8
indexWord8 !ByteArray#
x (I# Int#
i) = Word8# -> Word8
W8# (ByteArray# -> Int# -> Word8#
indexWord8Array# ByteArray#
x Int#
i)
    start :: Int -> Bool
start Int
ix
      | Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len = Bool
True
      | Bool
otherwise = case Word8 -> DecoderResult
utf8DecodeStart (ByteArray# -> Int -> Word8
indexWord8 ByteArray#
b Int
ix) of
        Accept{} -> Int -> Bool
start (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
        Reject{} -> Bool
False
        Incomplete DecoderState
st CodePoint
_ -> Int -> DecoderState -> Bool
step (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) DecoderState
st
    step :: Int -> DecoderState -> Bool
step Int
ix DecoderState
st
      | Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len = Bool
False
      -- We do not use decoded code point, so passing a dummy value to save an argument.
      | Bool
otherwise = case Word8 -> DecoderState -> CodePoint -> DecoderResult
utf8DecodeContinue (ByteArray# -> Int -> Word8
indexWord8 ByteArray#
b Int
ix) DecoderState
st (Int -> CodePoint
CodePoint Int
0) of
        Accept{} -> Int -> Bool
start (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
        Reject{} -> Bool
False
        Incomplete DecoderState
st' CodePoint
_ -> Int -> DecoderState -> Bool
step (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) DecoderState
st'