{-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples, GeneralizedNewtypeDeriving, DerivingStrategies #-}
{-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
-- gross hack: we maneuvered ourselves into a position where we can't boot GHC with a LLVM based GHC anymore.
-- LLVM based GHC's fail to compile memcmp ffi calls.  These end up as memcmp$def in the llvm ir, however we
-- don't have any prototypes and subsequently the llvm toolchain chokes on them.  Since 7fdcce6d, we use
-- ShortText for the package database.  This however introduces this very module; which through inlining ends
-- up bringing memcmp_ByteArray from bytestring:Data.ByteString.Short.Internal into scope, which results in
-- the memcmp call we choke on.
--
-- The solution thusly is to force late binding via the linker instead of inlining when comping with the
-- bootstrap compiler.  This will produce a slower (slightly less optimised) stage1 compiler only.
--
-- See issue 18857. hsyl20 deserves credit for coming up with the idea for the solution.
-- |
-- An Unicode string for internal GHC use. Meant to replace String
-- in places where being a lazy linked is not very useful and a more
-- memory efficient data structure is desirable.

-- Very similar to FastString, but not hash-consed and with some extra instances and
-- functions for serialisation and I/O. Should be imported qualified.
--
-- /Note:/ This string is stored in Modified UTF8 format,
-- thus it's not byte-compatible with @ShortText@ type in @text-short@
-- package.

module GHC.Data.ShortText (
        -- * ShortText
        ShortText(..),
        -- ** Conversion to and from String
        singleton,
        pack,
        unpack,
        -- ** Operations
        codepointLength,
        byteLength,
        GHC.Data.ShortText.null,
        splitFilePath,
        GHC.Data.ShortText.head,
        stripPrefix
  ) where

import Prelude

import Control.Monad (guard)
import Control.DeepSeq as DeepSeq
import Data.Binary
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Short.Internal as SBS
import GHC.Exts
import GHC.IO
import GHC.Utils.Encoding
import System.FilePath (isPathSeparator)

{-| A 'ShortText' is a modified UTF-8 encoded string meant for short strings like
file paths, module descriptions, etc.
-}
newtype ShortText = ShortText { ShortText -> ShortByteString
contents :: SBS.ShortByteString
                              }
                              deriving stock (Int -> ShortText -> ShowS
[ShortText] -> ShowS
ShortText -> String
(Int -> ShortText -> ShowS)
-> (ShortText -> String)
-> ([ShortText] -> ShowS)
-> Show ShortText
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> ShortText -> ShowS
showsPrec :: Int -> ShortText -> ShowS
$cshow :: ShortText -> String
show :: ShortText -> String
$cshowList :: [ShortText] -> ShowS
showList :: [ShortText] -> ShowS
Show)
                              deriving newtype (ShortText -> ShortText -> Bool
(ShortText -> ShortText -> Bool)
-> (ShortText -> ShortText -> Bool) -> Eq ShortText
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: ShortText -> ShortText -> Bool
== :: ShortText -> ShortText -> Bool
$c/= :: ShortText -> ShortText -> Bool
/= :: ShortText -> ShortText -> Bool
Eq, Eq ShortText
Eq ShortText =>
(ShortText -> ShortText -> Ordering)
-> (ShortText -> ShortText -> Bool)
-> (ShortText -> ShortText -> Bool)
-> (ShortText -> ShortText -> Bool)
-> (ShortText -> ShortText -> Bool)
-> (ShortText -> ShortText -> ShortText)
-> (ShortText -> ShortText -> ShortText)
-> Ord ShortText
ShortText -> ShortText -> Bool
ShortText -> ShortText -> Ordering
ShortText -> ShortText -> ShortText
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: ShortText -> ShortText -> Ordering
compare :: ShortText -> ShortText -> Ordering
$c< :: ShortText -> ShortText -> Bool
< :: ShortText -> ShortText -> Bool
$c<= :: ShortText -> ShortText -> Bool
<= :: ShortText -> ShortText -> Bool
$c> :: ShortText -> ShortText -> Bool
> :: ShortText -> ShortText -> Bool
$c>= :: ShortText -> ShortText -> Bool
>= :: ShortText -> ShortText -> Bool
$cmax :: ShortText -> ShortText -> ShortText
max :: ShortText -> ShortText -> ShortText
$cmin :: ShortText -> ShortText -> ShortText
min :: ShortText -> ShortText -> ShortText
Ord, Get ShortText
[ShortText] -> Put
ShortText -> Put
(ShortText -> Put)
-> Get ShortText -> ([ShortText] -> Put) -> Binary ShortText
forall t. (t -> Put) -> Get t -> ([t] -> Put) -> Binary t
$cput :: ShortText -> Put
put :: ShortText -> Put
$cget :: Get ShortText
get :: Get ShortText
$cputList :: [ShortText] -> Put
putList :: [ShortText] -> Put
Binary, NonEmpty ShortText -> ShortText
ShortText -> ShortText -> ShortText
(ShortText -> ShortText -> ShortText)
-> (NonEmpty ShortText -> ShortText)
-> (forall b. Integral b => b -> ShortText -> ShortText)
-> Semigroup ShortText
forall b. Integral b => b -> ShortText -> ShortText
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
$c<> :: ShortText -> ShortText -> ShortText
<> :: ShortText -> ShortText -> ShortText
$csconcat :: NonEmpty ShortText -> ShortText
sconcat :: NonEmpty ShortText -> ShortText
$cstimes :: forall b. Integral b => b -> ShortText -> ShortText
stimes :: forall b. Integral b => b -> ShortText -> ShortText
Semigroup, Semigroup ShortText
ShortText
Semigroup ShortText =>
ShortText
-> (ShortText -> ShortText -> ShortText)
-> ([ShortText] -> ShortText)
-> Monoid ShortText
[ShortText] -> ShortText
ShortText -> ShortText -> ShortText
forall a.
Semigroup a =>
a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
$cmempty :: ShortText
mempty :: ShortText
$cmappend :: ShortText -> ShortText -> ShortText
mappend :: ShortText -> ShortText -> ShortText
$cmconcat :: [ShortText] -> ShortText
mconcat :: [ShortText] -> ShortText
Monoid, ShortText -> ()
(ShortText -> ()) -> NFData ShortText
forall a. (a -> ()) -> NFData a
$crnf :: ShortText -> ()
rnf :: ShortText -> ()
NFData)

-- We don't want to derive this one from ShortByteString since that one won't handle
-- UTF-8 characters correctly.
instance IsString ShortText where
  fromString :: String -> ShortText
fromString = String -> ShortText
pack

-- | /O(n)/ Returns the length of the 'ShortText' in characters.
codepointLength :: ShortText -> Int
codepointLength :: ShortText -> Int
codepointLength ShortText
st = ShortByteString -> Int
utf8CountCharsShortByteString (ShortText -> ShortByteString
contents ShortText
st)

-- | /O(1)/ Returns the length of the 'ShortText' in bytes.
byteLength :: ShortText -> Int
byteLength :: ShortText -> Int
byteLength ShortText
st = ShortByteString -> Int
SBS.length (ShortByteString -> Int) -> ShortByteString -> Int
forall a b. (a -> b) -> a -> b
$ ShortText -> ShortByteString
contents ShortText
st

-- | /O(n)/ Convert a 'String' into a 'ShortText'.
pack :: String -> ShortText
pack :: String -> ShortText
pack String
s = ShortByteString -> ShortText
ShortText (ShortByteString -> ShortText) -> ShortByteString -> ShortText
forall a b. (a -> b) -> a -> b
$ String -> ShortByteString
utf8EncodeShortByteString String
s

-- | Create a singleton
singleton :: Char -> ShortText
singleton :: Char -> ShortText
singleton Char
s = String -> ShortText
pack [Char
s]

-- | /O(n)/ Convert a 'ShortText' into a 'String'.
unpack :: ShortText -> String
unpack :: ShortText -> String
unpack ShortText
st = ShortByteString -> String
utf8DecodeShortByteString (ShortByteString -> String) -> ShortByteString -> String
forall a b. (a -> b) -> a -> b
$ ShortText -> ShortByteString
contents ShortText
st

-- | /O(1)/ Test whether the 'ShortText' is the empty string.
null :: ShortText -> Bool
null :: ShortText -> Bool
null ShortText
st = ShortByteString -> Bool
SBS.null (ShortByteString -> Bool) -> ShortByteString -> Bool
forall a b. (a -> b) -> a -> b
$ ShortText -> ShortByteString
contents ShortText
st

-- | /O(n)/ Split a 'ShortText' representing a file path into its components by separating
-- on the file separator characters for this platform.
splitFilePath :: ShortText -> [ShortText]
-- This seems dangerous, but since the path separators are in the ASCII set they map down
-- to a single byte when encoded in UTF-8 and so this should work even when casting to ByteString.
-- We DeepSeq.force the resulting list so that we can be sure that no references to the
-- bytestring in `st'` remain in unevaluated thunks, which might prevent `st'` from being
-- collected by the GC.
splitFilePath :: ShortText -> [ShortText]
splitFilePath ShortText
st = [ShortText] -> [ShortText]
forall a. NFData a => a -> a
DeepSeq.force ([ShortText] -> [ShortText]) -> [ShortText] -> [ShortText]
forall a b. (a -> b) -> a -> b
$ (ByteString -> ShortText) -> [ByteString] -> [ShortText]
forall a b. (a -> b) -> [a] -> [b]
map (ShortByteString -> ShortText
ShortText (ShortByteString -> ShortText)
-> (ByteString -> ShortByteString) -> ByteString -> ShortText
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ShortByteString
SBS.toShort) ([ByteString] -> [ShortText]) -> [ByteString] -> [ShortText]
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> ByteString -> [ByteString]
B8.splitWith Char -> Bool
isPathSeparator ByteString
st'
  where st' :: ByteString
st' = ShortByteString -> ByteString
SBS.fromShort (ShortByteString -> ByteString) -> ShortByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ ShortText -> ShortByteString
contents ShortText
st

-- | /O(1)/ Returns the first UTF-8 codepoint in the 'ShortText'. Depending on the string in
-- question, this may or may not be the actual first character in the string due to Unicode
-- non-printable characters.
head :: ShortText -> Char
head :: ShortText -> Char
head ShortText
st
  | Char
hd:String
_ <- ShortText -> String
unpack ShortText
st
  = Char
hd
  | Bool
otherwise
  = String -> Char
forall a. HasCallStack => String -> a
error String
"head: Empty ShortText"

-- | /O(n)/ The 'stripPrefix' function takes two 'ShortText's and returns 'Just' the remainder of
-- the second iff the first is its prefix, and otherwise Nothing.
stripPrefix :: ShortText -> ShortText -> Maybe ShortText
stripPrefix :: ShortText -> ShortText -> Maybe ShortText
stripPrefix ShortText
prefix ShortText
st = do
  let !(SBS.SBS ByteArray#
prefixBA) = ShortText -> ShortByteString
contents ShortText
prefix
  let !(SBS.SBS ByteArray#
stBA)     = ShortText -> ShortByteString
contents ShortText
st
  let prefixLength :: Int#
prefixLength        = ByteArray# -> Int#
sizeofByteArray# ByteArray#
prefixBA
  let stLength :: Int#
stLength            = ByteArray# -> Int#
sizeofByteArray# ByteArray#
stBA
  -- If the length of 'st' is not >= than the length of 'prefix', it is impossible for 'prefix'
  -- to be the prefix of `st`.
  Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Maybe ()) -> Bool -> Maybe ()
forall a b. (a -> b) -> a -> b
$ (Int# -> Int
I# Int#
stLength) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= (Int# -> Int
I# Int#
prefixLength)
  -- 'prefix' is a prefix of 'st' if the first <length of prefix> bytes of 'st'
  -- are equal to 'prefix'
  Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Maybe ()) -> Bool -> Maybe ()
forall a b. (a -> b) -> a -> b
$ Int# -> Int
I# (ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int#
compareByteArrays# ByteArray#
prefixBA Int#
0# ByteArray#
stBA Int#
0# Int#
prefixLength) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
  -- Allocate a new ByteArray# and copy the remainder of the 'st' into it
  IO (Maybe ShortText) -> Maybe ShortText
forall a. IO a -> a
unsafeDupablePerformIO (IO (Maybe ShortText) -> Maybe ShortText)
-> IO (Maybe ShortText) -> Maybe ShortText
forall a b. (a -> b) -> a -> b
$ do
    let newBAsize :: Int#
newBAsize = (Int#
stLength Int# -> Int# -> Int#
-# Int#
prefixLength)
    newSBS <- (State# RealWorld -> (# State# RealWorld, ShortByteString #))
-> IO ShortByteString
forall a. (State# RealWorld -> (# State# RealWorld, a #)) -> IO a
IO ((State# RealWorld -> (# State# RealWorld, ShortByteString #))
 -> IO ShortByteString)
-> (State# RealWorld -> (# State# RealWorld, ShortByteString #))
-> IO ShortByteString
forall a b. (a -> b) -> a -> b
$ \State# RealWorld
s0 ->
      let !(# State# RealWorld
s1, MutableByteArray# RealWorld
ba #)  = Int#
-> State# RealWorld
-> (# State# RealWorld, MutableByteArray# RealWorld #)
forall d. Int# -> State# d -> (# State# d, MutableByteArray# d #)
newByteArray# Int#
newBAsize State# RealWorld
s0
          s2 :: State# RealWorld
s2             = ByteArray#
-> Int#
-> MutableByteArray# RealWorld
-> Int#
-> Int#
-> State# RealWorld
-> State# RealWorld
forall d.
ByteArray#
-> Int#
-> MutableByteArray# d
-> Int#
-> Int#
-> State# d
-> State# d
copyByteArray# ByteArray#
stBA Int#
prefixLength MutableByteArray# RealWorld
ba Int#
0# Int#
newBAsize State# RealWorld
s1
          !(# State# RealWorld
s3, ByteArray#
fba #) = MutableByteArray# RealWorld
-> State# RealWorld -> (# State# RealWorld, ByteArray# #)
forall d.
MutableByteArray# d -> State# d -> (# State# d, ByteArray# #)
unsafeFreezeByteArray# MutableByteArray# RealWorld
ba State# RealWorld
s2
      in  (# State# RealWorld
s3, ByteArray# -> ShortByteString
SBS.SBS ByteArray#
fba #)
    return . Just . ShortText $ newSBS