{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE NoImplicitPrelude #-}
-- |
-- Module      : System.OsString.Data.ByteString.Short
-- Copyright   : (c) Duncan Coutts 2012-2013, Julian Ospald 2022
-- License     : BSD-style
--
-- Maintainer  : hasufell@posteo.de
-- Stability   : stable
-- Portability : ghc only
--
-- A compact representation suitable for storing short byte strings in memory.
--
-- In typical use cases it can be imported alongside "Data.ByteString", e.g.
--
-- > import qualified Data.ByteString       as B
-- > import qualified Data.ByteString.Short as B
-- >          (ShortByteString, toShort, fromShort)
--
-- Other 'ShortByteString' operations clash with "Data.ByteString" or "Prelude"
-- functions however, so they should be imported @qualified@ with a different
-- alias e.g.
--
-- > import qualified Data.ByteString.Short as B.Short
--
module System.OsString.Data.ByteString.Short (

    -- * The @ShortByteString@ type

    ShortByteString(..),

    -- ** Memory overhead
    -- | With GHC, the memory overheads are as follows, expressed in words and
    -- in bytes (words are 4 and 8 bytes on 32 or 64bit machines respectively).
    --
    -- * 'B.ByteString' unshared: 8 words; 32 or 64 bytes.
    --
    -- * 'B.ByteString' shared substring: 4 words; 16 or 32 bytes.
    --
    -- * 'ShortByteString': 4 words; 16 or 32 bytes.
    --
    -- For the string data itself, both 'ShortByteString' and 'B.ByteString' use
    -- one byte per element, rounded up to the nearest word. For example,
    -- including the overheads, a length 10 'ShortByteString' would take
    -- @16 + 12 = 28@ bytes on a 32bit platform and @32 + 16 = 48@ bytes on a
    -- 64bit platform.
    --
    -- These overheads can all be reduced by 1 word (4 or 8 bytes) when the
    -- 'ShortByteString' or 'B.ByteString' is unpacked into another constructor.
    --
    -- For example:
    --
    -- > data ThingId = ThingId {-# UNPACK #-} !Int
    -- >                        {-# UNPACK #-} !ShortByteString
    --
    -- This will take @1 + 1 + 3@ words (the @ThingId@ constructor +
    -- unpacked @Int@ + unpacked @ShortByteString@), plus the words for the
    -- string data.

    -- ** Heap fragmentation
    -- | With GHC, the 'B.ByteString' representation uses /pinned/ memory,
    -- meaning it cannot be moved by the GC. This is usually the right thing to
    -- do for larger strings, but for small strings using pinned memory can
    -- lead to heap fragmentation which wastes space. The 'ShortByteString'
    -- type (and the @Text@ type from the @text@ package) use /unpinned/ memory
    -- so they do not contribute to heap fragmentation. In addition, with GHC,
    -- small unpinned strings are allocated in the same way as normal heap
    -- allocations, rather than in a separate pinned area.

    -- * Introducing and eliminating 'ShortByteString's
    empty,
    singleton,
    pack,
    unpack,
    fromShort,
    toShort,

    -- * Basic interface
    snoc,
    cons,
    append,
    last,
    tail,
    uncons,
    uncons2,
    head,
    init,
    unsnoc,
    null,
    length,

    -- * Transforming ShortByteStrings
    map,
    reverse,
    intercalate,

    -- * Reducing 'ShortByteString's (folds)
    foldl,
    foldl',
    foldl1,
    foldl1',

    foldr,
    foldr',
    foldr1,
    foldr1',

    -- ** Special folds
    all,
    any,
    concat,

    -- ** Generating and unfolding ByteStrings
    replicate,
    unfoldr,
    unfoldrN,

    -- * Substrings

    -- ** Breaking strings
    take,
    takeEnd,
    takeWhileEnd,
    takeWhile,
    drop,
    dropEnd,
    dropWhile,
    dropWhileEnd,
    breakEnd,
    break,
    span,
    spanEnd,
    splitAt,
    split,
    splitWith,
    stripSuffix,
    stripPrefix,

    -- * Predicates
    isInfixOf,
    isPrefixOf,
    isSuffixOf,

    -- ** Search for arbitrary substrings
    breakSubstring,

    -- * Searching ShortByteStrings

    -- ** Searching by equality
    elem,

    -- ** Searching with a predicate
    find,
    filter,
    partition,

    -- * Indexing ShortByteStrings
    index,
    indexMaybe,
    (!?),
    elemIndex,
    elemIndices,
    count,
    findIndex,
    findIndices,

    -- * Low level conversions
    -- ** Packing 'Foreign.C.String.CString's and pointers
    packCString,
    packCStringLen,

    -- ** Using ShortByteStrings as 'Foreign.C.String.CString's
    useAsCString,
    useAsCStringLen,
  ) where

import Data.ByteString.Short.Internal
import System.OsString.Data.ByteString.Short.Internal

import Prelude (Maybe(..), Ord(..), Num(..), ($), otherwise)
import Data.Word (Word8)

uncons2 :: ShortByteString -> Maybe (Word8, Word8, ShortByteString)
uncons2 :: ShortByteString -> Maybe (Word8, Word8, ShortByteString)
uncons2 = \ShortByteString
sbs ->
  let l :: Int
l  = ShortByteString -> Int
length ShortByteString
sbs
      nl :: Int
nl = Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2
  in if | Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
1 -> Maybe (Word8, Word8, ShortByteString)
forall a. Maybe a
Nothing
        | Bool
otherwise -> let h :: Word8
h  = BA -> Int -> Word8
indexWord8Array (ShortByteString -> BA
asBA ShortByteString
sbs) Int
0
                           h' :: Word8
h' = BA -> Int -> Word8
indexWord8Array (ShortByteString -> BA
asBA ShortByteString
sbs) Int
1
                           t :: ShortByteString
t  = Int -> (forall s. MBA s -> ST s ()) -> ShortByteString
create Int
nl ((forall s. MBA s -> ST s ()) -> ShortByteString)
-> (forall s. MBA s -> ST s ()) -> ShortByteString
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> BA -> Int -> MBA s -> Int -> Int -> ST s ()
forall s. BA -> Int -> MBA s -> Int -> Int -> ST s ()
copyByteArray (ShortByteString -> BA
asBA ShortByteString
sbs) Int
1 MBA s
mba Int
0 Int
nl
                       in (Word8, Word8, ShortByteString)
-> Maybe (Word8, Word8, ShortByteString)
forall a. a -> Maybe a
Just (Word8
h, Word8
h', ShortByteString
t)