{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE CPP
           , NoImplicitPrelude
           , NondecreasingIndentation
           , UnboxedTuples
           , MagicHash
  #-}
{-# OPTIONS_HADDOCK not-home #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  GHC.Internal.IO.Encoding.Iconv
-- Copyright   :  (c) The University of Glasgow, 2008-2009
-- License     :  see libraries/base/LICENSE
--
-- Maintainer  :  libraries@haskell.org
-- Stability   :  internal
-- Portability :  non-portable
--
-- This module provides text encoding/decoding using iconv
--
-----------------------------------------------------------------------------

module GHC.Internal.IO.Encoding.Iconv (
#if !defined(mingw32_HOST_OS)
   iconvEncoding, mkIconvEncoding,
   localeEncodingName
#endif
 ) where

#include "MachDeps.h"
#include "HsBaseConfig.h"

#if defined(mingw32_HOST_OS)

-- See W1 of Note [Tracking dependencies on primitives] in GHC.Internal.Base
import GHC.Internal.Types ()

#else

import GHC.Internal.Foreign.C.Types
import GHC.Internal.Foreign.C.String (withCAString, peekCAString)
import GHC.Internal.Foreign.C.String.Encoding
import GHC.Internal.Foreign.Marshal.Utils
import GHC.Internal.Foreign.Storable
import GHC.Internal.Foreign.C.Error
import GHC.Internal.Bits
import GHC.Internal.Ptr
import GHC.Internal.Data.Maybe
import GHC.Internal.Base
import GHC.Internal.IO.Buffer
import GHC.Internal.IO.Encoding.Failure
import GHC.Internal.IO.Encoding.Types
import GHC.Internal.List (span)
import GHC.Internal.Num
import GHC.Internal.Show
import GHC.Internal.Word
import GHC.Internal.Real
import GHC.Internal.IO.Unsafe (unsafePerformIO)
import GHC.Internal.System.Posix.Internals

c_DEBUG_DUMP :: Bool
c_DEBUG_DUMP :: Bool
c_DEBUG_DUMP = Bool
False

iconv_trace :: String -> IO ()
iconv_trace :: String -> IO ()
iconv_trace String
s
 | Bool
c_DEBUG_DUMP = String -> IO ()
puts String
s
 | Bool
otherwise    = () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()

-- -----------------------------------------------------------------------------
-- iconv encoders/decoders

{-# NOINLINE localeEncodingName #-}
localeEncodingName :: String
localeEncodingName :: String
localeEncodingName = IO String -> String
forall a. IO a -> a
unsafePerformIO (IO String -> String) -> IO String -> String
forall a b. (a -> b) -> a -> b
$ do
   -- Use locale_charset() or nl_langinfo(CODESET) to get the encoding
   -- if we have either of them.
   cstr <- IO CString
c_localeEncoding
   peekCAString cstr -- Assume charset names are ASCII

-- We hope iconv_t is a storable type.  It should be, since it has at least the
-- value -1, which is a possible return value from iconv_open.
type IConv = CLong -- ToDo: (#type iconv_t)

foreign import ccall unsafe "hs_iconv_open"
    hs_iconv_open :: CString -> CString -> IO IConv

foreign import ccall unsafe "hs_iconv_close"
    hs_iconv_close :: IConv -> IO CInt

foreign import ccall unsafe "hs_iconv"
    hs_iconv :: IConv -> Ptr CString -> Ptr CSize -> Ptr CString -> Ptr CSize
          -> IO CSize

foreign import ccall unsafe "localeEncoding"
    c_localeEncoding :: IO CString

haskellChar :: String
#if defined(WORDS_BIGENDIAN)
haskellChar | charSize == 2 = "UTF-16BE"
            | otherwise     = "UTF-32BE"
#else
haskellChar :: String
haskellChar | Int
charSize Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2 = String
"UTF-16LE"
            | Bool
otherwise     = String
"UTF-32LE"
#endif

char_shift :: Int
char_shift :: Int
char_shift | Int
charSize Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2 = Int
1
           | Bool
otherwise     = Int
2

iconvEncoding :: String -> IO (Maybe TextEncoding)
iconvEncoding :: String -> IO (Maybe TextEncoding)
iconvEncoding = CodingFailureMode -> String -> IO (Maybe TextEncoding)
mkIconvEncoding CodingFailureMode
ErrorOnCodingFailure

-- | Construct an iconv-based 'TextEncoding' for the given character set and
-- 'CodingFailureMode'.
--
-- As iconv is missing in some minimal environments (e.g. #10298), this
-- checks to ensure that iconv is working properly before returning the
-- encoding, returning 'Nothing' if not.
mkIconvEncoding :: CodingFailureMode -> String -> IO (Maybe TextEncoding)
mkIconvEncoding :: CodingFailureMode -> String -> IO (Maybe TextEncoding)
mkIconvEncoding CodingFailureMode
cfm String
charset = do
    let enc :: TextEncoding
enc = TextEncoding {
                  textEncodingName :: String
textEncodingName = String
charset,
                  mkTextDecoder :: IO (TextDecoder ())
mkTextDecoder = String
-> String
-> (Buffer Word8
    -> Buffer CharBufElem -> IO (Buffer Word8, Buffer CharBufElem))
-> (IConv
    -> Buffer Word8
    -> Buffer CharBufElem
    -> IO (CodingProgress, Buffer Word8, Buffer CharBufElem))
-> IO (TextDecoder ())
forall a b.
String
-> String
-> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
-> (IConv
    -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (BufferCodec a b ())
newIConv String
raw_charset (String
haskellChar String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
suffix)
                                           (CodingFailureMode
-> Buffer Word8
-> Buffer CharBufElem
-> IO (Buffer Word8, Buffer CharBufElem)
recoverDecode CodingFailureMode
cfm) IConv
-> Buffer Word8
-> Buffer CharBufElem
-> IO (CodingProgress, Buffer Word8, Buffer CharBufElem)
iconvDecode,
                  mkTextEncoder :: IO (TextEncoder ())
mkTextEncoder = String
-> String
-> (Buffer CharBufElem
    -> Buffer Word8 -> IO (Buffer CharBufElem, Buffer Word8))
-> (IConv
    -> Buffer CharBufElem
    -> Buffer Word8
    -> IO (CodingProgress, Buffer CharBufElem, Buffer Word8))
-> IO (TextEncoder ())
forall a b.
String
-> String
-> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
-> (IConv
    -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (BufferCodec a b ())
newIConv String
haskellChar String
charset
                                           (CodingFailureMode
-> Buffer CharBufElem
-> Buffer Word8
-> IO (Buffer CharBufElem, Buffer Word8)
recoverEncode CodingFailureMode
cfm) IConv
-> Buffer CharBufElem
-> Buffer Word8
-> IO (CodingProgress, Buffer CharBufElem, Buffer Word8)
iconvEncode}
    good <- TextEncoding -> CharBufElem -> IO Bool
charIsRepresentable TextEncoding
enc CharBufElem
'a'
    return $ if good
               then Just enc
               else Nothing
  where
    -- An annoying feature of GNU iconv is that the //PREFIXES only take
    -- effect when they appear on the tocode parameter to iconv_open:
    (String
raw_charset, String
suffix) = (CharBufElem -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (CharBufElem -> CharBufElem -> Bool
forall a. Eq a => a -> a -> Bool
/= CharBufElem
'/') String
charset

newIConv :: String -> String
   -> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
   -> (IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b))
   -> IO (BufferCodec a b ())
newIConv :: forall a b.
String
-> String
-> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
-> (IConv
    -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (BufferCodec a b ())
newIConv String
from String
to Buffer a -> Buffer b -> IO (Buffer a, Buffer b)
rec IConv
-> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b)
fn =
  -- Assume charset names are ASCII
  String
-> (CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ())
forall a. String -> (CString -> IO a) -> IO a
withCAString String
from ((CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ()))
-> (CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ())
forall a b. (a -> b) -> a -> b
$ \ CString
from_str ->
  String
-> (CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ())
forall a. String -> (CString -> IO a) -> IO a
withCAString String
to   ((CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ()))
-> (CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ())
forall a b. (a -> b) -> a -> b
$ \ CString
to_str -> do
    iconvt <- String -> IO IConv -> IO IConv
forall a. (Eq a, Num a) => String -> IO a -> IO a
throwErrnoIfMinus1 String
"mkTextEncoding" (IO IConv -> IO IConv) -> IO IConv -> IO IConv
forall a b. (a -> b) -> a -> b
$ CString -> CString -> IO IConv
hs_iconv_open CString
to_str CString
from_str
    let iclose = String -> IO CInt -> IO ()
forall a. (Eq a, Num a) => String -> IO a -> IO ()
throwErrnoIfMinus1_ String
"Iconv.close" (IO CInt -> IO ()) -> IO CInt -> IO ()
forall a b. (a -> b) -> a -> b
$ IConv -> IO CInt
hs_iconv_close IConv
iconvt
        fn_iconvt Buffer a
ibuf Buffer b
obuf State# RealWorld
st = case IO (CodingProgress, Buffer a, Buffer b)
-> State# RealWorld
-> (# State# RealWorld, (CodingProgress, Buffer a, Buffer b) #)
forall a. IO a -> State# RealWorld -> (# State# RealWorld, a #)
unIO (IConv
-> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b)
fn IConv
iconvt Buffer a
ibuf Buffer b
obuf) State# RealWorld
st of
          (# State# RealWorld
st', (CodingProgress
prog, Buffer a
ibuf', Buffer b
obuf') #) -> (# State# RealWorld
st', CodingProgress
prog, Buffer a
ibuf', Buffer b
obuf' #)
    return BufferCodec# {
                encode#   = fn_iconvt,
                recover#  = rec#,
                close#    = iclose,
                -- iconv doesn't supply a way to save/restore the state
                getState# = return (),
                setState# = const $ return ()
                }
  where
    rec# :: Buffer a
-> Buffer b
-> State# RealWorld
-> (# State# RealWorld, Buffer a, Buffer b #)
rec# Buffer a
ibuf Buffer b
obuf State# RealWorld
st = case IO (Buffer a, Buffer b)
-> State# RealWorld -> (# State# RealWorld, (Buffer a, Buffer b) #)
forall a. IO a -> State# RealWorld -> (# State# RealWorld, a #)
unIO (Buffer a -> Buffer b -> IO (Buffer a, Buffer b)
rec Buffer a
ibuf Buffer b
obuf) State# RealWorld
st of
      (# State# RealWorld
st', (Buffer a
ibuf', Buffer b
obuf') #) -> (# State# RealWorld
st', Buffer a
ibuf', Buffer b
obuf' #)

iconvDecode :: IConv -> Buffer Word8 -> Buffer Char -> IO (CodingProgress, Buffer Word8, Buffer Char)
iconvDecode :: IConv
-> Buffer Word8
-> Buffer CharBufElem
-> IO (CodingProgress, Buffer Word8, Buffer CharBufElem)
iconvDecode IConv
iconv_t Buffer Word8
ibuf Buffer CharBufElem
obuf = IConv
-> Buffer Word8
-> Int
-> Buffer CharBufElem
-> Int
-> IO (CodingProgress, Buffer Word8, Buffer CharBufElem)
forall a b.
IConv
-> Buffer a
-> Int
-> Buffer b
-> Int
-> IO (CodingProgress, Buffer a, Buffer b)
iconvRecode IConv
iconv_t Buffer Word8
ibuf Int
0 Buffer CharBufElem
obuf Int
char_shift

iconvEncode :: IConv -> Buffer Char -> Buffer Word8 -> IO (CodingProgress, Buffer Char, Buffer Word8)
iconvEncode :: IConv
-> Buffer CharBufElem
-> Buffer Word8
-> IO (CodingProgress, Buffer CharBufElem, Buffer Word8)
iconvEncode IConv
iconv_t Buffer CharBufElem
ibuf Buffer Word8
obuf = IConv
-> Buffer CharBufElem
-> Int
-> Buffer Word8
-> Int
-> IO (CodingProgress, Buffer CharBufElem, Buffer Word8)
forall a b.
IConv
-> Buffer a
-> Int
-> Buffer b
-> Int
-> IO (CodingProgress, Buffer a, Buffer b)
iconvRecode IConv
iconv_t Buffer CharBufElem
ibuf Int
char_shift Buffer Word8
obuf Int
0

iconvRecode :: IConv -> Buffer a -> Int -> Buffer b -> Int
            -> IO (CodingProgress, Buffer a, Buffer b)
iconvRecode :: forall a b.
IConv
-> Buffer a
-> Int
-> Buffer b
-> Int
-> IO (CodingProgress, Buffer a, Buffer b)
iconvRecode IConv
iconv_t
  input :: Buffer a
input@Buffer{  bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer a
iraw, bufL :: forall e. Buffer e -> Int
bufL=Int
ir, bufR :: forall e. Buffer e -> Int
bufR=Int
iw, bufSize :: forall e. Buffer e -> Int
bufSize=Int
_  }  Int
iscale
  output :: Buffer b
output@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer b
oraw, bufL :: forall e. Buffer e -> Int
bufL=Int
_,  bufR :: forall e. Buffer e -> Int
bufR=Int
ow, bufSize :: forall e. Buffer e -> Int
bufSize=Int
os }  Int
oscale
  = do
    String -> IO ()
iconv_trace (String
"haskellChar=" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
haskellChar)
    String -> IO ()
iconv_trace (String
"iconvRecode before, input=" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show (Buffer a -> String
forall a. Buffer a -> String
summaryBuffer Buffer a
input))
    String -> IO ()
iconv_trace (String
"iconvRecode before, output=" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show (Buffer b -> String
forall a. Buffer a -> String
summaryBuffer Buffer b
output))
    RawBuffer a
-> (Ptr a -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (CodingProgress, Buffer a, Buffer b)
forall e a. RawBuffer e -> (Ptr e -> IO a) -> IO a
withRawBuffer RawBuffer a
iraw ((Ptr a -> IO (CodingProgress, Buffer a, Buffer b))
 -> IO (CodingProgress, Buffer a, Buffer b))
-> (Ptr a -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (CodingProgress, Buffer a, Buffer b)
forall a b. (a -> b) -> a -> b
$ \ Ptr a
piraw -> do
    RawBuffer b
-> (Ptr b -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (CodingProgress, Buffer a, Buffer b)
forall e a. RawBuffer e -> (Ptr e -> IO a) -> IO a
withRawBuffer RawBuffer b
oraw ((Ptr b -> IO (CodingProgress, Buffer a, Buffer b))
 -> IO (CodingProgress, Buffer a, Buffer b))
-> (Ptr b -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (CodingProgress, Buffer a, Buffer b)
forall a b. (a -> b) -> a -> b
$ \ Ptr b
poraw -> do
    CString
-> (Ptr CString -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (CodingProgress, Buffer a, Buffer b)
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with (Ptr a
piraw Ptr a -> Int -> CString
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` (Int
ir Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
iscale)) ((Ptr CString -> IO (CodingProgress, Buffer a, Buffer b))
 -> IO (CodingProgress, Buffer a, Buffer b))
-> (Ptr CString -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (CodingProgress, Buffer a, Buffer b)
forall a b. (a -> b) -> a -> b
$ \ Ptr CString
p_inbuf -> do
    CString
-> (Ptr CString -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (CodingProgress, Buffer a, Buffer b)
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with (Ptr b
poraw Ptr b -> Int -> CString
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` (Int
ow Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
oscale)) ((Ptr CString -> IO (CodingProgress, Buffer a, Buffer b))
 -> IO (CodingProgress, Buffer a, Buffer b))
-> (Ptr CString -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (CodingProgress, Buffer a, Buffer b)
forall a b. (a -> b) -> a -> b
$ \ Ptr CString
p_outbuf -> do
    CSize
-> (Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (CodingProgress, Buffer a, Buffer b)
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral ((Int
iwInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
ir) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
iscale)) ((Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b))
 -> IO (CodingProgress, Buffer a, Buffer b))
-> (Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (CodingProgress, Buffer a, Buffer b)
forall a b. (a -> b) -> a -> b
$ \ Ptr CSize
p_inleft -> do
    CSize
-> (Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (CodingProgress, Buffer a, Buffer b)
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral ((Int
osInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
ow) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
oscale)) ((Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b))
 -> IO (CodingProgress, Buffer a, Buffer b))
-> (Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (CodingProgress, Buffer a, Buffer b)
forall a b. (a -> b) -> a -> b
$ \ Ptr CSize
p_outleft -> do
      res <- IConv
-> Ptr CString -> Ptr CSize -> Ptr CString -> Ptr CSize -> IO CSize
hs_iconv IConv
iconv_t Ptr CString
p_inbuf Ptr CSize
p_inleft Ptr CString
p_outbuf Ptr CSize
p_outleft
      new_inleft  <- peek p_inleft
      new_outleft <- peek p_outleft
      let
          new_inleft'  = CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
new_inleft Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
iscale
          new_outleft' = CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
new_outleft Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
oscale
          new_input
            | CSize
new_inleft CSize -> CSize -> Bool
forall a. Eq a => a -> a -> Bool
== CSize
0  = Buffer a
input { bufL = 0, bufR = 0 }
            | Bool
otherwise        = Buffer a
input { bufL = iw - new_inleft' }
          new_output = Buffer b
output{ bufR = os - new_outleft' }
      iconv_trace ("iconv res=" ++ show res)
      iconv_trace ("iconvRecode after,  input=" ++ show (summaryBuffer new_input))
      iconv_trace ("iconvRecode after,  output=" ++ show (summaryBuffer new_output))
      if (res /= -1)
        then -- all input translated
           return (InputUnderflow, new_input, new_output)
        else do
      errno <- getErrno
      case errno of
        Errno
e | Errno
e Errno -> Errno -> Bool
forall a. Eq a => a -> a -> Bool
== Errno
e2BIG  -> (CodingProgress, Buffer a, Buffer b)
-> IO (CodingProgress, Buffer a, Buffer b)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (CodingProgress
OutputUnderflow, Buffer a
new_input, Buffer b
new_output)
          | Errno
e Errno -> Errno -> Bool
forall a. Eq a => a -> a -> Bool
== Errno
eINVAL -> (CodingProgress, Buffer a, Buffer b)
-> IO (CodingProgress, Buffer a, Buffer b)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (CodingProgress
InputUnderflow, Buffer a
new_input, Buffer b
new_output)
           -- Sometimes iconv reports EILSEQ for a
           -- character in the input even when there is no room
           -- in the output; in this case we might be about to
           -- change the encoding anyway, so the following bytes
           -- could very well be in a different encoding.
           --
           -- Because we can only say InvalidSequence if there is at least
           -- one element left in the output, we have to special case this.
          | Errno
e Errno -> Errno -> Bool
forall a. Eq a => a -> a -> Bool
== Errno
eILSEQ -> (CodingProgress, Buffer a, Buffer b)
-> IO (CodingProgress, Buffer a, Buffer b)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (if Int
new_outleft' Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then CodingProgress
OutputUnderflow else CodingProgress
InvalidSequence, Buffer a
new_input, Buffer b
new_output)
          | Bool
otherwise -> do
              String -> IO ()
iconv_trace (String
"iconv returned error: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ IOError -> String
forall a. Show a => a -> String
show (String -> Errno -> Maybe Handle -> Maybe String -> IOError
errnoToIOError String
"iconv" Errno
e Maybe Handle
forall a. Maybe a
Nothing Maybe String
forall a. Maybe a
Nothing))
              String -> IO (CodingProgress, Buffer a, Buffer b)
forall a. String -> IO a
throwErrno String
"iconvRecoder"

#endif /* !mingw32_HOST_OS */