{-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples, MultiWayIf #-}
{-# OPTIONS_GHC -O2 -fno-warn-name-shadowing #-}
module GHC.Utils.Encoding.UTF8
(
utf8DecodeCharAddr#
, utf8DecodeCharPtr
, utf8DecodeCharByteArray#
, utf8PrevChar
, utf8CharStart
, utf8UnconsByteString
, utf8DecodeByteString
, utf8DecodeShortByteString
, utf8DecodeForeignPtr
, utf8DecodeByteArray#
, utf8CountCharsShortByteString
, utf8CountCharsByteArray#
, utf8CompareByteArray#
, utf8CompareShortByteString
, utf8EncodeByteArray#
, utf8EncodePtr
, utf8EncodeByteString
, utf8EncodeShortByteString
, utf8EncodedLength
) where
import Prelude
import Foreign
import GHC.IO
import GHC.Encoding.UTF8
import Data.ByteString (ByteString)
import qualified Data.ByteString.Internal as BS
import Data.ByteString.Short.Internal (ShortByteString(..))
utf8PrevChar :: Ptr Word8 -> IO (Ptr Word8)
utf8PrevChar :: Ptr Word8 -> IO (Ptr Word8)
utf8PrevChar Ptr Word8
p = Ptr Word8 -> IO (Ptr Word8)
utf8CharStart (Ptr Word8
p Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` (-Int
1))
utf8CharStart :: Ptr Word8 -> IO (Ptr Word8)
utf8CharStart :: Ptr Word8 -> IO (Ptr Word8)
utf8CharStart Ptr Word8
p = Ptr Word8 -> IO (Ptr Word8)
forall {b}. (Storable b, Ord b, Num b) => Ptr b -> IO (Ptr b)
go Ptr Word8
p
where go :: Ptr b -> IO (Ptr b)
go Ptr b
p = do w <- Ptr b -> IO b
forall a. Storable a => Ptr a -> IO a
peek Ptr b
p
if w >= 0x80 && w < 0xC0
then go (p `plusPtr` (-1))
else return p
utf8CountCharsShortByteString :: ShortByteString -> Int
(SBS ByteArray#
ba) = ByteArray# -> Int
utf8CountCharsByteArray# ByteArray#
ba
utf8DecodeShortByteString :: ShortByteString -> [Char]
utf8DecodeShortByteString :: ShortByteString -> [Char]
utf8DecodeShortByteString (SBS ByteArray#
ba#) = ByteArray# -> [Char]
utf8DecodeByteArray# ByteArray#
ba#
utf8DecodeByteString :: ByteString -> [Char]
utf8DecodeByteString :: ByteString -> [Char]
utf8DecodeByteString (BS.PS ForeignPtr Word8
fptr Int
offset Int
len)
= ForeignPtr Word8 -> Int -> Int -> [Char]
utf8DecodeForeignPtr ForeignPtr Word8
fptr Int
offset Int
len
utf8EncodeShortByteString :: String -> ShortByteString
utf8EncodeShortByteString :: [Char] -> ShortByteString
utf8EncodeShortByteString [Char]
str = ByteArray# -> ShortByteString
SBS ([Char] -> ByteArray#
utf8EncodeByteArray# [Char]
str)
utf8EncodeByteString :: String -> ByteString
utf8EncodeByteString :: [Char] -> ByteString
utf8EncodeByteString [Char]
s =
IO ByteString -> ByteString
forall a. IO a -> a
unsafePerformIO (IO ByteString -> ByteString) -> IO ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ do
let len :: Int
len = [Char] -> Int
utf8EncodedLength [Char]
s
buf <- Int -> IO (ForeignPtr Word8)
forall a. Int -> IO (ForeignPtr a)
mallocForeignPtrBytes Int
len
withForeignPtr buf $ \Ptr Word8
ptr -> do
Ptr Word8 -> [Char] -> IO ()
utf8EncodePtr Ptr Word8
ptr [Char]
s
ByteString -> IO ByteString
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ForeignPtr Word8 -> Int -> Int -> ByteString
BS.fromForeignPtr ForeignPtr Word8
buf Int
0 Int
len)
utf8UnconsByteString :: ByteString -> Maybe (Char, ByteString)
utf8UnconsByteString :: ByteString -> Maybe (Char, ByteString)
utf8UnconsByteString (BS.PS ForeignPtr Word8
_ Int
_ Int
0) = Maybe (Char, ByteString)
forall a. Maybe a
Nothing
utf8UnconsByteString (BS.PS ForeignPtr Word8
fptr Int
offset Int
len)
= IO (Maybe (Char, ByteString)) -> Maybe (Char, ByteString)
forall a. IO a -> a
unsafeDupablePerformIO (IO (Maybe (Char, ByteString)) -> Maybe (Char, ByteString))
-> IO (Maybe (Char, ByteString)) -> Maybe (Char, ByteString)
forall a b. (a -> b) -> a -> b
$
ForeignPtr Word8
-> (Ptr Word8 -> IO (Maybe (Char, ByteString)))
-> IO (Maybe (Char, ByteString))
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
fptr ((Ptr Word8 -> IO (Maybe (Char, ByteString)))
-> IO (Maybe (Char, ByteString)))
-> (Ptr Word8 -> IO (Maybe (Char, ByteString)))
-> IO (Maybe (Char, ByteString))
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr -> do
let (Char
c,Int
n) = Ptr Word8 -> (Char, Int)
utf8DecodeCharPtr (Ptr Word8
ptr Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
offset)
Maybe (Char, ByteString) -> IO (Maybe (Char, ByteString))
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe (Char, ByteString) -> IO (Maybe (Char, ByteString)))
-> Maybe (Char, ByteString) -> IO (Maybe (Char, ByteString))
forall a b. (a -> b) -> a -> b
$ (Char, ByteString) -> Maybe (Char, ByteString)
forall a. a -> Maybe a
Just (Char
c, ForeignPtr Word8 -> Int -> Int -> ByteString
BS.PS ForeignPtr Word8
fptr (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n) (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
n))
utf8CompareShortByteString :: ShortByteString -> ShortByteString -> Ordering
utf8CompareShortByteString :: ShortByteString -> ShortByteString -> Ordering
utf8CompareShortByteString (SBS ByteArray#
a1) (SBS ByteArray#
a2) = ByteArray# -> ByteArray# -> Ordering
utf8CompareByteArray# ByteArray#
a1 ByteArray#
a2