{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE ViewPatterns #-}

module GHC.Parser.String (
  StringLexError (..),
  lexString,
  lexMultilineString,

  -- * StringMeta
  StringMeta (..),
  defaultStrMeta,

  -- * Unicode smart quote helpers
  isDoubleSmartQuote,
  isSingleSmartQuote,
) where

import GHC.Prelude hiding (getChar)

import Control.Arrow ((>>>))
import Control.Monad (when)
import Data.Char (chr, ord)
import Data.Data (Data)
import qualified Data.Foldable1 as Foldable1
import qualified Data.List.NonEmpty as NonEmpty
import Data.Maybe (listToMaybe, mapMaybe)
import GHC.Data.OrdList (fromOL, nilOL, snocOL)
import GHC.Data.StringBuffer (StringBuffer)
import qualified GHC.Data.StringBuffer as StringBuffer
import GHC.Parser.CharClass (
  hexDigit,
  is_decdigit,
  is_hexdigit,
  is_octdigit,
  is_space,
  octDecDigit,
 )
import GHC.Parser.Errors.Types (LexErr (..))
import GHC.Utils.Panic (panic)
import Language.Haskell.Syntax.Module.Name (ModuleName)

type BufPos = Int
data StringLexError = StringLexError LexErr BufPos
  deriving (Int -> StringLexError -> ShowS
[StringLexError] -> ShowS
StringLexError -> String
(Int -> StringLexError -> ShowS)
-> (StringLexError -> String)
-> ([StringLexError] -> ShowS)
-> Show StringLexError
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> StringLexError -> ShowS
showsPrec :: Int -> StringLexError -> ShowS
$cshow :: StringLexError -> String
show :: StringLexError -> String
$cshowList :: [StringLexError] -> ShowS
showList :: [StringLexError] -> ShowS
Show, StringLexError -> StringLexError -> Bool
(StringLexError -> StringLexError -> Bool)
-> (StringLexError -> StringLexError -> Bool) -> Eq StringLexError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: StringLexError -> StringLexError -> Bool
== :: StringLexError -> StringLexError -> Bool
$c/= :: StringLexError -> StringLexError -> Bool
/= :: StringLexError -> StringLexError -> Bool
Eq)

lexString :: Int -> StringBuffer -> Either StringLexError String
lexString :: Int -> StringBuffer -> Either StringLexError String
lexString = (String -> Either (Char, LexErr) String)
-> ([CharPos] -> Either (CharPos, LexErr) [CharPos])
-> Int
-> StringBuffer
-> Either StringLexError String
lexStringWith String -> Either (Char, LexErr) String
forall c. HasChar c => [c] -> Either (c, LexErr) [c]
processChars [CharPos] -> Either (CharPos, LexErr) [CharPos]
forall c. HasChar c => [c] -> Either (c, LexErr) [c]
processChars
  where
    processChars :: HasChar c => [c] -> Either (c, LexErr) [c]
    processChars :: forall c. HasChar c => [c] -> Either (c, LexErr) [c]
processChars =
          [c] -> [c]
forall c. HasChar c => [c] -> [c]
collapseGaps
      ([c] -> [c])
-> ([c] -> Either (c, LexErr) [c]) -> [c] -> Either (c, LexErr) [c]
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> [c] -> Either (c, LexErr) [c]
forall c. HasChar c => [c] -> Either (c, LexErr) [c]
resolveEscapes

-- -----------------------------------------------------------------------------
-- Lexing interface

{-
Note [Lexing strings]
~~~~~~~~~~~~~~~~~~~~~

After verifying if a string is lexically valid with Alex, we still need to do
some post processing of the string, namely:
1. Collapse string gaps
2. Resolve escape characters

The problem: 'lexemeToString' is more performant than manually reading
characters from the StringBuffer. However, that completely erases the position
of each character, which we need in order to report the correct position for
error messages (e.g. when resolving escape characters).

So what we'll do is do two passes. The first pass is optimistic; just convert
to a plain String and process it. If this results in an error, we do a second
pass, this time where each character is annotated with its position. Now, the
error has all the information it needs.

Ideally, lexStringWith would take a single (forall c. HasChar c => ...) function,
but to help the specializer, we pass it in twice to concretize it for the two
types we actually use.
-}

-- | See Note [Lexing strings]
lexStringWith ::
  ([Char] -> Either (Char, LexErr) [Char])
  -> ([CharPos] -> Either (CharPos, LexErr) [CharPos])
  -> Int
  -> StringBuffer
  -> Either StringLexError String
lexStringWith :: (String -> Either (Char, LexErr) String)
-> ([CharPos] -> Either (CharPos, LexErr) [CharPos])
-> Int
-> StringBuffer
-> Either StringLexError String
lexStringWith String -> Either (Char, LexErr) String
processChars [CharPos] -> Either (CharPos, LexErr) [CharPos]
processCharsPos Int
len StringBuffer
buf =
  case String -> Either (Char, LexErr) String
processChars (String -> Either (Char, LexErr) String)
-> String -> Either (Char, LexErr) String
forall a b. (a -> b) -> a -> b
$ StringBuffer -> Int -> String
bufferChars StringBuffer
buf Int
len of
    Right String
s -> String -> Either StringLexError String
forall a b. b -> Either a b
Right String
s
    Left (Char, LexErr)
_ ->
      case [CharPos] -> Either (CharPos, LexErr) [CharPos]
processCharsPos ([CharPos] -> Either (CharPos, LexErr) [CharPos])
-> [CharPos] -> Either (CharPos, LexErr) [CharPos]
forall a b. (a -> b) -> a -> b
$ StringBuffer -> Int -> [CharPos]
bufferLocatedChars StringBuffer
buf Int
len of
        Right [CharPos]
_ -> String -> Either StringLexError String
forall a. HasCallStack => String -> a
panic String
"expected lex error on second pass"
        Left ((Char
_, Int
pos), LexErr
e) -> StringLexError -> Either StringLexError String
StringLexError -> Either StringLexError String
forall a b. a -> Either a b
Left (StringLexError -> Either StringLexError String)
-> StringLexError -> Either StringLexError String
forall a b. (a -> b) -> a -> b
$ LexErr -> Int -> StringLexError
StringLexError LexErr
e Int
pos

class HasChar c where
  getChar :: c -> Char
  setChar :: Char -> c -> c

instance HasChar Char where
  getChar :: Char -> Char
getChar = Char -> Char
forall a. a -> a
id
  setChar :: Char -> Char -> Char
setChar = Char -> Char -> Char
forall a b. a -> b -> a
const

instance HasChar (Char, x) where
  getChar :: (Char, x) -> Char
getChar = (Char, x) -> Char
forall a b. (a, b) -> a
fst
  setChar :: Char -> (Char, x) -> (Char, x)
setChar Char
c (Char
_, x
x) = (Char
c, x
x)

pattern Char :: HasChar c => Char -> c
pattern $mChar :: forall {r} {c}. HasChar c => c -> (Char -> r) -> ((# #) -> r) -> r
Char c <- (getChar -> c)
{-# COMPLETE Char #-}

bufferChars :: StringBuffer -> Int -> [Char]
bufferChars :: StringBuffer -> Int -> String
bufferChars = StringBuffer -> Int -> String
StringBuffer.lexemeToString

type CharPos = (Char, BufPos)

bufferLocatedChars :: StringBuffer -> Int -> [CharPos]
bufferLocatedChars :: StringBuffer -> Int -> [CharPos]
bufferLocatedChars StringBuffer
initialBuf Int
len = StringBuffer -> [CharPos]
go StringBuffer
initialBuf
  where
    go :: StringBuffer -> [CharPos]
go StringBuffer
buf
      | StringBuffer -> Bool
atEnd StringBuffer
buf = []
      | Bool
otherwise  =
          let (Char
c, StringBuffer
buf') = StringBuffer -> (Char, StringBuffer)
StringBuffer.nextChar StringBuffer
buf
           in (Char
c, StringBuffer -> Int
StringBuffer.cur StringBuffer
buf) CharPos -> [CharPos] -> [CharPos]
forall a. a -> [a] -> [a]
: StringBuffer -> [CharPos]
go StringBuffer
buf'

    atEnd :: StringBuffer -> Bool
atEnd StringBuffer
buf = StringBuffer -> StringBuffer -> Int
StringBuffer.byteDiff StringBuffer
initialBuf StringBuffer
buf Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len

-- -----------------------------------------------------------------------------
-- Lexing phases

-- | Collapse all string gaps in the given input.
--
-- Iterates through the input in `go` until we encounter a backslash. The
-- @stringchar Alex regex only allows backslashes in two places: escape codes
-- and string gaps.
--
--   * If the next character is a space, it has to be the start of a string gap
--     AND it must end, since the @gap Alex regex will only match if it ends.
--     Collapse the gap and continue the main iteration loop.
--
--   * Otherwise, this is an escape code. If it's an escape code, there are
--     ONLY three possibilities (see the @escape Alex regex):
--       1. The escape code is "\\"
--       2. The escape code is "\^\"
--       3. The escape code does not have a backslash, other than the initial
--          backslash
--
--     In the first two possibilities, just skip them and continue the main
--     iteration loop ("skip" as in "keep in the list as-is"). In the last one,
--     we can just skip the backslash, then continue the main iteration loop.
--     the rest of the escape code will be skipped as normal characters in the
--     string; no need to fully parse a proper escape code.
collapseGaps :: HasChar c => [c] -> [c]
collapseGaps :: forall c. HasChar c => [c] -> [c]
collapseGaps = [c] -> [c]
go
  where
    go :: [c] -> [c]
go = \case
      -- Match the start of a string gap + drop gap
      -- #25784: string gaps are semantically equivalent to "\&"
      c1 :: c
c1@(Char Char
'\\') : Char Char
c : [c]
cs
        | Char -> Bool
is_space Char
c -> c
c1 c -> [c] -> [c]
forall a. a -> [a] -> [a]
: Char -> c -> c
forall c. HasChar c => Char -> c -> c
setChar Char
'&' c
c1 c -> [c] -> [c]
forall a. a -> [a] -> [a]
: [c] -> [c]
go ([c] -> [c]
dropGap [c]
cs)
      -- Match all possible escape characters that include a backslash
      c1 :: c
c1@(Char Char
'\\') : c2 :: c
c2@(Char Char
'\\') : [c]
cs
        -> c
c1 c -> [c] -> [c]
forall a. a -> [a] -> [a]
: c
c2 c -> [c] -> [c]
forall a. a -> [a] -> [a]
: [c] -> [c]
go [c]
cs
      c1 :: c
c1@(Char Char
'\\') : c2 :: c
c2@(Char Char
'^') : c3 :: c
c3@(Char Char
'\\') : [c]
cs
        -> c
c1 c -> [c] -> [c]
forall a. a -> [a] -> [a]
: c
c2 c -> [c] -> [c]
forall a. a -> [a] -> [a]
: c
c3 c -> [c] -> [c]
forall a. a -> [a] -> [a]
: [c] -> [c]
go [c]
cs
      -- Otherwise, just keep looping
      c
c : [c]
cs -> c
c c -> [c] -> [c]
forall a. a -> [a] -> [a]
: [c] -> [c]
go [c]
cs
      [] -> []

    dropGap :: [c] -> [c]
dropGap = \case
      Char Char
'\\' : [c]
cs -> [c]
cs
      c
_ : [c]
cs -> [c] -> [c]
dropGap [c]
cs
      -- Unreachable since gaps must end; see docstring
      [] -> String -> [c]
forall a. HasCallStack => String -> a
panic String
"gap unexpectedly ended"

resolveEscapes :: HasChar c => [c] -> Either (c, LexErr) [c]
resolveEscapes :: forall c. HasChar c => [c] -> Either (c, LexErr) [c]
resolveEscapes = OrdList c -> [c] -> Either (c, LexErr) [c]
forall {a}. HasChar a => OrdList a -> [a] -> Either (a, LexErr) [a]
go OrdList c
forall a. OrdList a
nilOL
  where
    go :: OrdList a -> [a] -> Either (a, LexErr) [a]
go !OrdList a
acc = \case
      [] -> [a] -> Either (a, LexErr) [a]
forall a. a -> Either (a, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([a] -> Either (a, LexErr) [a]) -> [a] -> Either (a, LexErr) [a]
forall a b. (a -> b) -> a -> b
$ OrdList a -> [a]
forall a. OrdList a -> [a]
fromOL OrdList a
acc
      Char Char
'\\' : Char Char
'&' : [a]
cs -> OrdList a -> [a] -> Either (a, LexErr) [a]
go OrdList a
acc [a]
cs
      backslash :: a
backslash@(Char Char
'\\') : [a]
cs ->
        case [a] -> Either (a, LexErr) (Char, [a])
forall c. HasChar c => [c] -> Either (c, LexErr) (Char, [c])
resolveEscapeChar [a]
cs of
          Right (Char
esc, [a]
cs') -> OrdList a -> [a] -> Either (a, LexErr) [a]
go (OrdList a
acc OrdList a -> a -> OrdList a
forall a. OrdList a -> a -> OrdList a
`snocOL` Char -> a -> a
forall c. HasChar c => Char -> c -> c
setChar Char
esc a
backslash) [a]
cs'
          Left (a
c, LexErr
e) -> (a, LexErr) -> Either (a, LexErr) [a]
forall a b. a -> Either a b
Left (a
c, LexErr
e)
      a
c : [a]
cs -> OrdList a -> [a] -> Either (a, LexErr) [a]
go (OrdList a
acc OrdList a -> a -> OrdList a
forall a. OrdList a -> a -> OrdList a
`snocOL` a
c) [a]
cs

-- -----------------------------------------------------------------------------
-- Escape characters

-- | Resolve a escape character, after having just lexed a backslash.
-- Assumes escape character is valid.
resolveEscapeChar :: HasChar c => [c] -> Either (c, LexErr) (Char, [c])
resolveEscapeChar :: forall c. HasChar c => [c] -> Either (c, LexErr) (Char, [c])
resolveEscapeChar = \case
  Char Char
'a'  : [c]
cs -> (Char, [c]) -> Either (c, LexErr) (Char, [c])
forall a. a -> Either (c, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\a', [c]
cs)
  Char Char
'b'  : [c]
cs -> (Char, [c]) -> Either (c, LexErr) (Char, [c])
forall a. a -> Either (c, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\b', [c]
cs)
  Char Char
'f'  : [c]
cs -> (Char, [c]) -> Either (c, LexErr) (Char, [c])
forall a. a -> Either (c, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\f', [c]
cs)
  Char Char
'n'  : [c]
cs -> (Char, [c]) -> Either (c, LexErr) (Char, [c])
forall a. a -> Either (c, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\n', [c]
cs)
  Char Char
'r'  : [c]
cs -> (Char, [c]) -> Either (c, LexErr) (Char, [c])
forall a. a -> Either (c, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\r', [c]
cs)
  Char Char
't'  : [c]
cs -> (Char, [c]) -> Either (c, LexErr) (Char, [c])
forall a. a -> Either (c, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\t', [c]
cs)
  Char Char
'v'  : [c]
cs -> (Char, [c]) -> Either (c, LexErr) (Char, [c])
forall a. a -> Either (c, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\v', [c]
cs)
  Char Char
'\\' : [c]
cs -> (Char, [c]) -> Either (c, LexErr) (Char, [c])
forall a. a -> Either (c, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\\', [c]
cs)
  Char Char
'"'  : [c]
cs -> (Char, [c]) -> Either (c, LexErr) (Char, [c])
forall a. a -> Either (c, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\"', [c]
cs)
  Char Char
'\'' : [c]
cs -> (Char, [c]) -> Either (c, LexErr) (Char, [c])
forall a. a -> Either (c, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\'', [c]
cs)
  -- escape codes
  Char Char
'x' : [c]
cs -> (Char -> Bool)
-> Int -> (Char -> Int) -> [c] -> Either (c, LexErr) (Char, [c])
forall {a}.
HasChar a =>
(Char -> Bool)
-> Int -> (Char -> Int) -> [a] -> Either (a, LexErr) (Char, [a])
parseNum Char -> Bool
is_hexdigit Int
16 Char -> Int
hexDigit [c]
cs
  Char Char
'o' : [c]
cs -> (Char -> Bool)
-> Int -> (Char -> Int) -> [c] -> Either (c, LexErr) (Char, [c])
forall {a}.
HasChar a =>
(Char -> Bool)
-> Int -> (Char -> Int) -> [a] -> Either (a, LexErr) (Char, [a])
parseNum Char -> Bool
is_octdigit Int
8 Char -> Int
octDecDigit [c]
cs
  cs :: [c]
cs@(Char Char
c : [c]
_) | Char -> Bool
is_decdigit Char
c -> (Char -> Bool)
-> Int -> (Char -> Int) -> [c] -> Either (c, LexErr) (Char, [c])
forall {a}.
HasChar a =>
(Char -> Bool)
-> Int -> (Char -> Int) -> [a] -> Either (a, LexErr) (Char, [a])
parseNum Char -> Bool
is_decdigit Int
10 Char -> Int
octDecDigit [c]
cs
  -- control characters (e.g. '\^M')
  Char Char
'^' : Char Char
c : [c]
cs -> (Char, [c]) -> Either (c, LexErr) (Char, [c])
forall a. a -> Either (c, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'@', [c]
cs)
  -- long form escapes (e.g. '\NUL')
  [c]
cs | Just (Char
esc, [c]
cs') <- [c] -> Maybe (Char, [c])
forall c. HasChar c => [c] -> Maybe (Char, [c])
parseLongEscape [c]
cs -> (Char, [c]) -> Either (c, LexErr) (Char, [c])
forall a. a -> Either (c, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
esc, [c]
cs')
  -- shouldn't happen
  Char Char
c : [c]
_ -> String -> Either (c, LexErr) (Char, [c])
forall a. HasCallStack => String -> a
panic (String -> Either (c, LexErr) (Char, [c]))
-> String -> Either (c, LexErr) (Char, [c])
forall a b. (a -> b) -> a -> b
$ String
"found unexpected escape character: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Char -> String
forall a. Show a => a -> String
show Char
c
  [] -> String -> Either (c, LexErr) (Char, [c])
forall a. HasCallStack => String -> a
panic String
"escape character unexpectedly ended"
  where
    parseNum :: (Char -> Bool)
-> Int -> (Char -> Int) -> [a] -> Either (a, LexErr) (Char, [a])
parseNum Char -> Bool
isDigit Int
base Char -> Int
toDigit =
      let go :: Int -> [a] -> Either (a, LexErr) (Char, [a])
go Int
x = \case
            ch :: a
ch@(Char Char
c) : [a]
cs | Char -> Bool
isDigit Char
c -> do
              let x' :: Int
x' = Int
x Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
base Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
toDigit Char
c
              Bool -> Either (a, LexErr) () -> Either (a, LexErr) ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
x' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0x10ffff) (Either (a, LexErr) () -> Either (a, LexErr) ())
-> Either (a, LexErr) () -> Either (a, LexErr) ()
forall a b. (a -> b) -> a -> b
$ (a, LexErr) -> Either (a, LexErr) ()
forall a b. a -> Either a b
Left (a
ch, LexErr
LexNumEscapeRange)
              Int -> [a] -> Either (a, LexErr) (Char, [a])
go Int
x' [a]
cs
            [a]
cs -> (Char, [a]) -> Either (a, LexErr) (Char, [a])
forall a. a -> Either (a, LexErr) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> Char
chr Int
x, [a]
cs)
       in Int -> [a] -> Either (a, LexErr) (Char, [a])
go Int
0

parseLongEscape :: HasChar c => [c] -> Maybe (Char, [c])
parseLongEscape :: forall c. HasChar c => [c] -> Maybe (Char, [c])
parseLongEscape [c]
cs = [(Char, [c])] -> Maybe (Char, [c])
forall a. [a] -> Maybe a
listToMaybe (((String, Char) -> Maybe (Char, [c]))
-> [(String, Char)] -> [(Char, [c])]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (String, Char) -> Maybe (Char, [c])
tryParse [(String, Char)]
longEscapeCodes)
  where
    tryParse :: (String, Char) -> Maybe (Char, [c])
tryParse (String
code, Char
esc) =
      case Int -> [c] -> ([c], [c])
forall a. Int -> [a] -> ([a], [a])
splitAt (String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
code) [c]
cs of
        ([c]
pre, [c]
cs') | (c -> Char) -> [c] -> String
forall a b. (a -> b) -> [a] -> [b]
map c -> Char
forall c. HasChar c => c -> Char
getChar [c]
pre String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
code -> (Char, [c]) -> Maybe (Char, [c])
forall a. a -> Maybe a
Just (Char
esc, [c]
cs')
        ([c], [c])
_ -> Maybe (Char, [c])
forall a. Maybe a
Nothing

    longEscapeCodes :: [(String, Char)]
longEscapeCodes =
      [ (String
"NUL", Char
'\NUL')
      , (String
"SOH", Char
'\SOH')
      , (String
"STX", Char
'\STX')
      , (String
"ETX", Char
'\ETX')
      , (String
"EOT", Char
'\EOT')
      , (String
"ENQ", Char
'\ENQ')
      , (String
"ACK", Char
'\ACK')
      , (String
"BEL", Char
'\BEL')
      , (String
"BS" , Char
'\BS' )
      , (String
"HT" , Char
'\HT' )
      , (String
"LF" , Char
'\LF' )
      , (String
"VT" , Char
'\VT' )
      , (String
"FF" , Char
'\FF' )
      , (String
"CR" , Char
'\CR' )
      , (String
"SO" , Char
'\SO' )
      , (String
"SI" , Char
'\SI' )
      , (String
"DLE", Char
'\DLE')
      , (String
"DC1", Char
'\DC1')
      , (String
"DC2", Char
'\DC2')
      , (String
"DC3", Char
'\DC3')
      , (String
"DC4", Char
'\DC4')
      , (String
"NAK", Char
'\NAK')
      , (String
"SYN", Char
'\SYN')
      , (String
"ETB", Char
'\ETB')
      , (String
"CAN", Char
'\CAN')
      , (String
"EM" , Char
'\EM' )
      , (String
"SUB", Char
'\SUB')
      , (String
"ESC", Char
'\ESC')
      , (String
"FS" , Char
'\FS' )
      , (String
"GS" , Char
'\GS' )
      , (String
"RS" , Char
'\RS' )
      , (String
"US" , Char
'\US' )
      , (String
"SP" , Char
'\SP' )
      , (String
"DEL", Char
'\DEL')
      ]

-- -----------------------------------------------------------------------------
-- Unicode Smart Quote detection (#21843)

isDoubleSmartQuote :: Char -> Bool
isDoubleSmartQuote :: Char -> Bool
isDoubleSmartQuote = \case
  Char
'“' -> Bool
True
  Char
'”' -> Bool
True
  Char
_ -> Bool
False

isSingleSmartQuote :: Char -> Bool
isSingleSmartQuote :: Char -> Bool
isSingleSmartQuote = \case
  Char
'‘' -> Bool
True
  Char
'’' -> Bool
True
  Char
_ -> Bool
False

-- -----------------------------------------------------------------------------
-- StringMeta

data StringMeta = StringMeta
  { StringMeta -> Bool
strMetaMultiline  :: Bool
  , StringMeta -> Maybe ModuleName
strMetaQualified  :: Maybe ModuleName
  }
  deriving (Int -> StringMeta -> ShowS
[StringMeta] -> ShowS
StringMeta -> String
(Int -> StringMeta -> ShowS)
-> (StringMeta -> String)
-> ([StringMeta] -> ShowS)
-> Show StringMeta
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> StringMeta -> ShowS
showsPrec :: Int -> StringMeta -> ShowS
$cshow :: StringMeta -> String
show :: StringMeta -> String
$cshowList :: [StringMeta] -> ShowS
showList :: [StringMeta] -> ShowS
Show, Typeable StringMeta
Typeable StringMeta =>
(forall (c :: * -> *).
 (forall d b. Data d => c (d -> b) -> d -> c b)
 -> (forall g. g -> c g) -> StringMeta -> c StringMeta)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c StringMeta)
-> (StringMeta -> Constr)
-> (StringMeta -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c StringMeta))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e))
    -> Maybe (c StringMeta))
-> ((forall b. Data b => b -> b) -> StringMeta -> StringMeta)
-> (forall r r'.
    (r -> r' -> r)
    -> r -> (forall d. Data d => d -> r') -> StringMeta -> r)
-> (forall r r'.
    (r' -> r -> r)
    -> r -> (forall d. Data d => d -> r') -> StringMeta -> r)
-> (forall u. (forall d. Data d => d -> u) -> StringMeta -> [u])
-> (forall u.
    Int -> (forall d. Data d => d -> u) -> StringMeta -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> StringMeta -> m StringMeta)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> StringMeta -> m StringMeta)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> StringMeta -> m StringMeta)
-> Data StringMeta
StringMeta -> Constr
StringMeta -> DataType
(forall b. Data b => b -> b) -> StringMeta -> StringMeta
forall a.
Typeable a =>
(forall (c :: * -> *).
 (forall d b. Data d => c (d -> b) -> d -> c b)
 -> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
    (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
    (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u. Int -> (forall d. Data d => d -> u) -> StringMeta -> u
forall u. (forall d. Data d => d -> u) -> StringMeta -> [u]
forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> StringMeta -> r
forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> StringMeta -> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> StringMeta -> m StringMeta
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> StringMeta -> m StringMeta
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c StringMeta
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> StringMeta -> c StringMeta
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c StringMeta)
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c StringMeta)
$cgfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> StringMeta -> c StringMeta
gfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> StringMeta -> c StringMeta
$cgunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c StringMeta
gunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c StringMeta
$ctoConstr :: StringMeta -> Constr
toConstr :: StringMeta -> Constr
$cdataTypeOf :: StringMeta -> DataType
dataTypeOf :: StringMeta -> DataType
$cdataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c StringMeta)
dataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c StringMeta)
$cdataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c StringMeta)
dataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c StringMeta)
$cgmapT :: (forall b. Data b => b -> b) -> StringMeta -> StringMeta
gmapT :: (forall b. Data b => b -> b) -> StringMeta -> StringMeta
$cgmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> StringMeta -> r
gmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> StringMeta -> r
$cgmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> StringMeta -> r
gmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> StringMeta -> r
$cgmapQ :: forall u. (forall d. Data d => d -> u) -> StringMeta -> [u]
gmapQ :: forall u. (forall d. Data d => d -> u) -> StringMeta -> [u]
$cgmapQi :: forall u. Int -> (forall d. Data d => d -> u) -> StringMeta -> u
gmapQi :: forall u. Int -> (forall d. Data d => d -> u) -> StringMeta -> u
$cgmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> StringMeta -> m StringMeta
gmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> StringMeta -> m StringMeta
$cgmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> StringMeta -> m StringMeta
gmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> StringMeta -> m StringMeta
$cgmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> StringMeta -> m StringMeta
gmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> StringMeta -> m StringMeta
Data)

defaultStrMeta :: StringMeta
defaultStrMeta :: StringMeta
defaultStrMeta =
  StringMeta
    { strMetaMultiline :: Bool
strMetaMultiline = Bool
False
    , strMetaQualified :: Maybe ModuleName
strMetaQualified = Maybe ModuleName
forall a. Maybe a
Nothing
    }

{- Note [Implementation of QualifiedStrings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This Note describes the implementation of the QualifiedStrings extension
from GHC proposal #723 (https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0723-qualified-strings.rst).

The extension allows users to prefix a string literal with a module qualifier,
e.g. M."foo" which desugars to M.fromString "foo".

AST representation
------------------
Qualified strings are represented in the Haskell AST using the 'HsQualString'
constructor of 'QualLitVal'.

Lexing & Parsing
----------------
When the lexer encounters a module qualifier followed immediately by a string
(e.g., M."..." or M."""..."""), it parses it as a single unit. The qualifier
is extracted and stored. The parser then constructs 'HsQualLit' with the module
qualifier, and 'HsQualString' with the string content.

Desugaring in renamer
---------------------
GHC.Rename.Expr.rnExpr desugars qualified string expressions using
'mkExpandedExpr' to desugar the expression M."foo" into M.fromString "foo".

GHC.Rename.Pat.rnPatAndThen desugars qualified string patterns using
'mkExpandedPat' to desugar the pattern M."foo" into the view pattern
  ( (M.fromString "foo" ==) -> True )

'HsQualLit'/'QualLitPat' should not exist in any 'HsExpr GhcRn'/'HsPat GhcRn'
values because of this desugaring, so we simply panic in all relevant locations.
We can't simply make these constructors uninhabited
(e.g. 'type instance XQualLitE GhcRn = DataConCantHappen')
because we still need a 'HsExpr GhcRn' to put inside 'mkExpandedExpr'.

Pattern-match overlap checking
------------------------------
Since qualified string patterns are desugared into view patterns, the pattern
match checker needs help to see through the desugaring to determine coverage.

In 'GHC.HsToCore.Pmc.Desugar.desugarPat', we treat qualified strings similarly
to standard string literals for overlap checking, ensuring that
`f M."a" = ...; f M."a" = ...` is detected as redundant.
-}

-- -----------------------------------------------------------------------------
-- Multiline strings

-- | See Note [Multiline string literals]
--
-- Assumes string is lexically valid. Skips the steps about splitting
-- and rejoining lines, and instead manually find newline characters,
-- for performance.
lexMultilineString :: Int -> StringBuffer -> Either StringLexError String
lexMultilineString :: Int -> StringBuffer -> Either StringLexError String
lexMultilineString = (String -> Either (Char, LexErr) String)
-> ([CharPos] -> Either (CharPos, LexErr) [CharPos])
-> Int
-> StringBuffer
-> Either StringLexError String
lexStringWith String -> Either (Char, LexErr) String
forall c. HasChar c => [c] -> Either (c, LexErr) [c]
processChars [CharPos] -> Either (CharPos, LexErr) [CharPos]
forall c. HasChar c => [c] -> Either (c, LexErr) [c]
processChars
  where
    processChars :: HasChar c => [c] -> Either (c, LexErr) [c]
    processChars :: forall c. HasChar c => [c] -> Either (c, LexErr) [c]
processChars =
          [c] -> [c]
forall c. HasChar c => [c] -> [c]
collapseGaps             -- Step 1
      ([c] -> [c])
-> ([c] -> Either (c, LexErr) [c]) -> [c] -> Either (c, LexErr) [c]
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> [c] -> [c]
forall c. HasChar c => [c] -> [c]
normalizeEOL
      ([c] -> [c])
-> ([c] -> Either (c, LexErr) [c]) -> [c] -> Either (c, LexErr) [c]
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> [c] -> [c]
forall c. HasChar c => [c] -> [c]
expandLeadingTabs        -- Step 3
      ([c] -> [c])
-> ([c] -> Either (c, LexErr) [c]) -> [c] -> Either (c, LexErr) [c]
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> [c] -> [c]
forall c. HasChar c => [c] -> [c]
rmCommonWhitespacePrefix -- Step 4
      ([c] -> [c])
-> ([c] -> Either (c, LexErr) [c]) -> [c] -> Either (c, LexErr) [c]
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> [c] -> [c]
forall c. HasChar c => [c] -> [c]
collapseOnlyWsLines      -- Step 5
      ([c] -> [c])
-> ([c] -> Either (c, LexErr) [c]) -> [c] -> Either (c, LexErr) [c]
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> [c] -> [c]
forall c. HasChar c => [c] -> [c]
rmFirstNewline           -- Step 7a
      ([c] -> [c])
-> ([c] -> Either (c, LexErr) [c]) -> [c] -> Either (c, LexErr) [c]
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> [c] -> [c]
forall c. HasChar c => [c] -> [c]
rmLastNewline            -- Step 7b
      ([c] -> [c])
-> ([c] -> Either (c, LexErr) [c]) -> [c] -> Either (c, LexErr) [c]
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> [c] -> Either (c, LexErr) [c]
forall c. HasChar c => [c] -> Either (c, LexErr) [c]
resolveEscapes           -- Step 8

    -- Normalize line endings to LF. The spec dictates that lines should be
    -- split on newline characters and rejoined with ``\n``. But because we
    -- aren't actually splitting/rejoining, we'll manually normalize here
    normalizeEOL :: HasChar c => [c] -> [c]
    normalizeEOL :: forall c. HasChar c => [c] -> [c]
normalizeEOL =
      let go :: [c] -> [c]
go = \case
            Char Char
'\r' : c :: c
c@(Char Char
'\n') : [c]
cs -> c
c c -> [c] -> [c]
forall a. a -> [a] -> [a]
: [c] -> [c]
go [c]
cs
            c :: c
c@(Char Char
'\r') : [c]
cs -> Char -> c -> c
forall c. HasChar c => Char -> c -> c
setChar Char
'\n' c
c c -> [c] -> [c]
forall a. a -> [a] -> [a]
: [c] -> [c]
go [c]
cs
            c :: c
c@(Char Char
'\f') : [c]
cs -> Char -> c -> c
forall c. HasChar c => Char -> c -> c
setChar Char
'\n' c
c c -> [c] -> [c]
forall a. a -> [a] -> [a]
: [c] -> [c]
go [c]
cs
            c
c : [c]
cs -> c
c c -> [c] -> [c]
forall a. a -> [a] -> [a]
: [c] -> [c]
go [c]
cs
            [] -> []
       in [c] -> [c]
go

    -- expands all tabs, since the lexer will verify that tabs can only appear
    -- as leading indentation
    expandLeadingTabs :: HasChar c => [c] -> [c]
    expandLeadingTabs :: forall c. HasChar c => [c] -> [c]
expandLeadingTabs =
      let go :: Int -> [c] -> [c]
go !Int
col = \case
            c :: c
c@(Char Char
'\t') : [c]
cs ->
              let fill :: Int
fill = Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
- (Int
col Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
8)
               in Int -> c -> [c]
forall a. Int -> a -> [a]
replicate Int
fill (Char -> c -> c
forall c. HasChar c => Char -> c -> c
setChar Char
' ' c
c) [c] -> [c] -> [c]
forall a. [a] -> [a] -> [a]
++ Int -> [c] -> [c]
go (Int
col Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
fill) [c]
cs
            c
c : [c]
cs -> c
c c -> [c] -> [c]
forall a. a -> [a] -> [a]
: Int -> [c] -> [c]
go (if c -> Char
forall c. HasChar c => c -> Char
getChar c
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n' then Int
0 else Int
col Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [c]
cs
            [] -> []
       in Int -> [c] -> [c]
forall {c}. HasChar c => Int -> [c] -> [c]
go Int
0

    rmCommonWhitespacePrefix :: HasChar c => [c] -> [c]
    rmCommonWhitespacePrefix :: forall c. HasChar c => [c] -> [c]
rmCommonWhitespacePrefix [c]
cs0 =
      let commonWSPrefix :: Int
commonWSPrefix = String -> Int
getCommonWsPrefix ((c -> Char) -> [c] -> String
forall a b. (a -> b) -> [a] -> [b]
map c -> Char
forall c. HasChar c => c -> Char
getChar [c]
cs0)
          go :: [c] -> [c]
go = \case
            c :: c
c@(Char Char
'\n') : [c]
cs -> c
c c -> [c] -> [c]
forall a. a -> [a] -> [a]
: [c] -> [c]
go (Int -> [c] -> [c]
forall {t} {c}. (Ord t, Num t, HasChar c) => t -> [c] -> [c]
dropLine Int
commonWSPrefix [c]
cs)
            c
c : [c]
cs -> c
c c -> [c] -> [c]
forall a. a -> [a] -> [a]
: [c] -> [c]
go [c]
cs
            [] -> []
          -- drop x characters from the string, or up to a newline, whichever
          -- comes first
          dropLine :: t -> [c] -> [c]
dropLine !t
x = \case
            [c]
cs | t
x t -> t -> Bool
forall a. Ord a => a -> a -> Bool
<= t
0 -> [c]
cs
            cs :: [c]
cs@(Char Char
'\n' : [c]
_) -> [c]
cs
            c
_ : [c]
cs -> t -> [c] -> [c]
dropLine (t
x t -> t -> t
forall a. Num a => a -> a -> a
- t
1) [c]
cs
            [] -> []
       in [c] -> [c]
go [c]
cs0

    collapseOnlyWsLines :: HasChar c => [c] -> [c]
    collapseOnlyWsLines :: forall c. HasChar c => [c] -> [c]
collapseOnlyWsLines =
      let go :: [c] -> [c]
go = \case
            c :: c
c@(Char Char
'\n') : [c]
cs | Just [c]
cs' <- [c] -> Maybe [c]
checkAllWs [c]
cs -> c
c c -> [c] -> [c]
forall a. a -> [a] -> [a]
: [c] -> [c]
go [c]
cs'
            c
c : [c]
cs -> c
c c -> [c] -> [c]
forall a. a -> [a] -> [a]
: [c] -> [c]
go [c]
cs
            [] -> []
          checkAllWs :: [c] -> Maybe [c]
checkAllWs = \case
            -- got all the way to a newline or the end of the string, return
            cs :: [c]
cs@(Char Char
'\n' : [c]
_) -> [c] -> Maybe [c]
forall a. a -> Maybe a
Just [c]
cs
            cs :: [c]
cs@[] -> [c] -> Maybe [c]
forall a. a -> Maybe a
Just [c]
cs
            -- found whitespace, continue
            Char Char
c : [c]
cs | Char -> Bool
is_space Char
c -> [c] -> Maybe [c]
checkAllWs [c]
cs
            -- anything else, stop
            [c]
_ -> Maybe [c]
forall a. Maybe a
Nothing
       in [c] -> [c]
go

    rmFirstNewline :: HasChar c => [c] -> [c]
    rmFirstNewline :: forall c. HasChar c => [c] -> [c]
rmFirstNewline = \case
      Char Char
'\n' : [c]
cs -> [c]
cs
      [c]
cs -> [c]
cs

    rmLastNewline :: HasChar c => [c] -> [c]
    rmLastNewline :: forall c. HasChar c => [c] -> [c]
rmLastNewline =
      let go :: [c] -> [c]
go = \case
            [] -> []
            [Char Char
'\n'] -> []
            c
c : [c]
cs -> c
c c -> [c] -> [c]
forall a. a -> [a] -> [a]
: [c] -> [c]
go [c]
cs
       in [c] -> [c]
go

-- | See step 4 in Note [Multiline string literals]
--
-- Assumes tabs have already been expanded.
getCommonWsPrefix :: String -> Int
getCommonWsPrefix :: String -> Int
getCommonWsPrefix String
s =
  case [String] -> Maybe (NonEmpty String)
forall a. [a] -> Maybe (NonEmpty a)
NonEmpty.nonEmpty [String]
includedLines of
    Maybe (NonEmpty String)
Nothing -> Int
0
    Just NonEmpty String
ls -> NonEmpty Int -> Int
forall a. Ord a => NonEmpty a -> a
forall (t :: * -> *) a. (Foldable1 t, Ord a) => t a -> a
Foldable1.minimum (NonEmpty Int -> Int) -> NonEmpty Int -> Int
forall a b. (a -> b) -> a -> b
$ (String -> Int) -> NonEmpty String -> NonEmpty Int
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
NonEmpty.map (String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (String -> Int) -> ShowS -> String -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile Char -> Bool
is_space) NonEmpty String
ls
  where
    includedLines :: [String]
includedLines =
        (String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (String -> Bool) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
is_space) -- ignore whitespace-only lines
      ([String] -> [String])
-> ([String] -> [String]) -> [String] -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [String] -> [String]
forall a. Int -> [a] -> [a]
drop Int
1                      -- ignore first line in calculation
      ([String] -> [String]) -> [String] -> [String]
forall a b. (a -> b) -> a -> b
$ String -> [String]
lines String
s

{-
Note [Multiline string literals]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Multiline string literals were added following the acceptance of the
proposal: https://github.com/ghc-proposals/ghc-proposals/pull/569

Multiline string literals are syntax sugar for normal string literals,
with an extra post processing step. This all happens in the Lexer; that
is, the multi-line HsString will contain the post-processed string.
This matches the behavior of the single-line HsString, which contains
the normalized string too (see Note [Literal source text]).

The canonical steps for post processing a multiline string are:
1. Collapse string gaps
2. Split the string by newlines
3. Convert leading tabs into spaces
    * In each line, any tabs preceding non-whitespace characters are replaced with spaces up to the next tab stop
4. Remove common whitespace prefix in every line except the first (see below)
5. If a line contains only whitespace, remove all of the whitespace
6. Join the string back with `\n` delimiters
7a. If the first character of the string is a newline, remove it
7b. If the last character of the string is a newline, remove it
8. Interpret escaped characters

The common whitespace prefix can be informally defined as "The longest
prefix of whitespace shared by all lines in the string, excluding the
first line and any whitespace-only lines".

It's more precisely defined with the following algorithm:

1. Take a list representing the lines in the string
2. Ignore the following elements in the list:
    * The first line (we want to ignore everything before the first newline)
    * Empty lines
    * Lines with only whitespace characters
3. Calculate the longest prefix of whitespace shared by all lines in the remaining list
-}