{-# LANGUAGE LambdaCase #-}
module GHC.Parser.String (
StringLexError (..),
ContainsSmartQuote (..),
LexStringType (..),
lexString,
isDoubleSmartQuote,
isSingleSmartQuote,
isAnyChar,
resolveEscapeCharacter,
) where
import GHC.Prelude
import Control.Arrow ((>>>))
import Control.Monad (guard, unless, when)
import Data.Char (chr, isPrint, ord)
import Data.List (unfoldr)
import Data.Maybe (listToMaybe, mapMaybe)
import GHC.Parser.CharClass (
hexDigit,
is_any,
is_decdigit,
is_hexdigit,
is_octdigit,
is_space,
octDecDigit,
)
import GHC.Utils.Panic (panic)
data LexStringType = StringTypeSingle | StringTypeMulti
data LexStringState loc = LexStringState
{ forall loc. LexStringState loc -> String
stringAcc :: !String
, forall loc. LexStringState loc -> Int
multilineCommonWsPrefix :: !Int
, forall loc. LexStringState loc -> loc
initialLoc :: !loc
}
type GetChar loc = loc -> Maybe (Char, loc)
lexString :: LexStringType -> GetChar loc -> loc -> Either (StringLexError loc) (String, loc)
lexString :: forall loc.
LexStringType
-> GetChar loc -> loc -> Either (StringLexError loc) (String, loc)
lexString LexStringType
strType GetChar loc
getChar loc
initialLoc = LexStringState loc
-> loc -> Either (StringLexError loc) (String, loc)
go LexStringState loc
initialState loc
initialLoc
where
initialState :: LexStringState loc
initialState =
LexStringState
{ stringAcc :: String
stringAcc = String
""
, multilineCommonWsPrefix :: Int
multilineCommonWsPrefix =
case LexStringType
strType of
LexStringType
StringTypeMulti -> Int
forall a. Bounded a => a
maxBound
LexStringType
_ -> Int
0
, initialLoc :: loc
initialLoc = loc
initialLoc
}
go :: LexStringState loc
-> loc -> Either (StringLexError loc) (String, loc)
go !LexStringState loc
s loc
loc0 =
case GetChar loc
getChar loc
loc0 of
Just (Char
'"', loc
_) | Just loc
loc1 <- LexStringType -> GetChar loc -> loc -> Maybe loc
forall loc. LexStringType -> GetChar loc -> loc -> Maybe loc
checkDelimiter LexStringType
strType GetChar loc
getChar loc
loc0 -> do
let postprocess :: String -> String
postprocess =
case LexStringType
strType of
LexStringType
StringTypeSingle -> String -> String
forall a. a -> a
id
LexStringType
StringTypeMulti -> Int -> String -> String
postprocessMultiline (LexStringState loc -> Int
forall loc. LexStringState loc -> Int
multilineCommonWsPrefix LexStringState loc
s)
(String, loc) -> Either (StringLexError loc) (String, loc)
forall a b. b -> Either a b
Right (String -> String
postprocess (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. [a] -> [a]
reverse (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ LexStringState loc -> String
forall loc. LexStringState loc -> String
stringAcc LexStringState loc
s, loc
loc1)
Just (c0 :: Char
c0@Char
'\\', loc
loc1) -> do
case GetChar loc
getChar loc
loc1 of
Just (Char
'&', loc
loc2) -> LexStringState loc
-> loc -> Either (StringLexError loc) (String, loc)
go LexStringState loc
s loc
loc2
Just (Char
c1, loc
loc2) | Char -> Bool
is_space Char
c1 -> GetChar loc
-> LexStringState loc -> loc -> Either (StringLexError loc) loc
forall loc.
GetChar loc
-> LexStringState loc -> loc -> Either (StringLexError loc) loc
collapseStringGap GetChar loc
getChar LexStringState loc
s loc
loc2 Either (StringLexError loc) loc
-> (loc -> Either (StringLexError loc) (String, loc))
-> Either (StringLexError loc) (String, loc)
forall a b.
Either (StringLexError loc) a
-> (a -> Either (StringLexError loc) b)
-> Either (StringLexError loc) b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= LexStringState loc
-> loc -> Either (StringLexError loc) (String, loc)
go LexStringState loc
s
Just (Char
c1, loc
loc2) ->
case LexStringType
strType of
LexStringType
StringTypeSingle -> do
(c', loc') <- GetChar loc -> loc -> Either (StringLexError loc) (Char, loc)
forall loc.
GetChar loc -> loc -> Either (StringLexError loc) (Char, loc)
resolveEscapeCharacter GetChar loc
getChar loc
loc1
go (addChar c' s) loc'
LexStringType
StringTypeMulti -> do
_ <- GetChar loc -> loc -> Either (StringLexError loc) (Char, loc)
forall loc.
GetChar loc -> loc -> Either (StringLexError loc) (Char, loc)
resolveEscapeCharacter GetChar loc
getChar loc
loc1
go (addChar c1 . addChar c0 $ s) loc2
Maybe (Char, loc)
Nothing -> StringLexError loc -> Either (StringLexError loc) (String, loc)
forall a b. a -> Either a b
Left (StringLexError loc -> Either (StringLexError loc) (String, loc))
-> StringLexError loc -> Either (StringLexError loc) (String, loc)
forall a b. (a -> b) -> a -> b
$ loc -> ContainsSmartQuote loc -> StringLexError loc
forall loc. loc -> ContainsSmartQuote loc -> StringLexError loc
BadCharInitialLex loc
loc1 (GetChar loc -> LexStringState loc -> ContainsSmartQuote loc
forall loc.
GetChar loc -> LexStringState loc -> ContainsSmartQuote loc
hasSQuote GetChar loc
getChar LexStringState loc
s)
Just (c0 :: Char
c0@Char
'\n', loc
loc1) | LexStringType
StringTypeMulti <- LexStringType
strType ->
(LexStringState loc
-> loc -> Either (StringLexError loc) (String, loc))
-> (LexStringState loc, loc)
-> Either (StringLexError loc) (String, loc)
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry LexStringState loc
-> loc -> Either (StringLexError loc) (String, loc)
go ((LexStringState loc, loc)
-> Either (StringLexError loc) (String, loc))
-> (LexStringState loc, loc)
-> Either (StringLexError loc) (String, loc)
forall a b. (a -> b) -> a -> b
$ GetChar loc
-> LexStringState loc -> loc -> (LexStringState loc, loc)
forall loc.
GetChar loc
-> LexStringState loc -> loc -> (LexStringState loc, loc)
parseLeadingWS GetChar loc
getChar (Char -> LexStringState loc -> LexStringState loc
forall loc. Char -> LexStringState loc -> LexStringState loc
addChar Char
c0 LexStringState loc
s) loc
loc1
Just (Char
c0, loc
loc1) | Char -> Bool
isAnyChar Char
c0 -> LexStringState loc
-> loc -> Either (StringLexError loc) (String, loc)
go (Char -> LexStringState loc -> LexStringState loc
forall loc. Char -> LexStringState loc -> LexStringState loc
addChar Char
c0 LexStringState loc
s) loc
loc1
Just (Char
_, loc
_) -> StringLexError loc -> Either (StringLexError loc) (String, loc)
forall a b. a -> Either a b
Left (StringLexError loc -> Either (StringLexError loc) (String, loc))
-> StringLexError loc -> Either (StringLexError loc) (String, loc)
forall a b. (a -> b) -> a -> b
$ loc -> ContainsSmartQuote loc -> StringLexError loc
forall loc. loc -> ContainsSmartQuote loc -> StringLexError loc
BadCharInitialLex loc
loc0 (GetChar loc -> LexStringState loc -> ContainsSmartQuote loc
forall loc.
GetChar loc -> LexStringState loc -> ContainsSmartQuote loc
hasSQuote GetChar loc
getChar LexStringState loc
s)
Maybe (Char, loc)
Nothing -> StringLexError loc -> Either (StringLexError loc) (String, loc)
forall a b. a -> Either a b
Left (StringLexError loc -> Either (StringLexError loc) (String, loc))
-> StringLexError loc -> Either (StringLexError loc) (String, loc)
forall a b. (a -> b) -> a -> b
$ loc -> ContainsSmartQuote loc -> StringLexError loc
forall loc. loc -> ContainsSmartQuote loc -> StringLexError loc
BadCharInitialLex loc
loc0 (GetChar loc -> LexStringState loc -> ContainsSmartQuote loc
forall loc.
GetChar loc -> LexStringState loc -> ContainsSmartQuote loc
hasSQuote GetChar loc
getChar LexStringState loc
s)
{-# INLINE lexString #-}
checkDelimiter :: LexStringType -> GetChar loc -> loc -> Maybe loc
checkDelimiter :: forall loc. LexStringType -> GetChar loc -> loc -> Maybe loc
checkDelimiter LexStringType
strType GetChar loc
getChar loc
loc0 =
case LexStringType
strType of
LexStringType
StringTypeSingle -> do
('"', loc1) <- GetChar loc
getChar loc
loc0
Just loc1
LexStringType
StringTypeMulti -> do
('"', loc1) <- GetChar loc
getChar loc
loc0
('"', loc2) <- getChar loc1
('"', loc3) <- getChar loc2
Just loc3
{-# INLINE checkDelimiter #-}
addChar :: Char -> LexStringState loc -> LexStringState loc
addChar :: forall loc. Char -> LexStringState loc -> LexStringState loc
addChar Char
c LexStringState loc
s = LexStringState loc
s{stringAcc = c : stringAcc s}
{-# INLINE addChar #-}
hasSQuote :: GetChar loc -> LexStringState loc -> ContainsSmartQuote loc
hasSQuote :: forall loc.
GetChar loc -> LexStringState loc -> ContainsSmartQuote loc
hasSQuote GetChar loc
getChar LexStringState loc
s
| (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any Char -> Bool
isDoubleSmartQuote (LexStringState loc -> String
forall loc. LexStringState loc -> String
stringAcc LexStringState loc
s)
, (Char
c, loc
loc) : [(Char, loc)]
_ <- ((Char, loc) -> Bool) -> [(Char, loc)] -> [(Char, loc)]
forall a. (a -> Bool) -> [a] -> [a]
filter (Char -> Bool
isDoubleSmartQuote (Char -> Bool) -> ((Char, loc) -> Char) -> (Char, loc) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char, loc) -> Char
forall a b. (a, b) -> a
fst) [(Char, loc)]
allChars =
Char -> loc -> ContainsSmartQuote loc
forall loc. Char -> loc -> ContainsSmartQuote loc
SmartQuote Char
c loc
loc
| Bool
otherwise =
ContainsSmartQuote loc
forall loc. ContainsSmartQuote loc
NoSmartQuote
where
allChars :: [(Char, loc)]
allChars = (loc -> Maybe ((Char, loc), loc)) -> loc -> [(Char, loc)]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr loc -> Maybe ((Char, loc), loc)
getCharWithLoc (LexStringState loc -> loc
forall loc. LexStringState loc -> loc
initialLoc LexStringState loc
s)
getCharWithLoc :: loc -> Maybe ((Char, loc), loc)
getCharWithLoc loc
loc =
case GetChar loc
getChar loc
loc of
Just (Char
c, loc
loc') -> ((Char, loc), loc) -> Maybe ((Char, loc), loc)
forall a. a -> Maybe a
Just ((Char
c, loc
loc), loc
loc')
Maybe (Char, loc)
Nothing -> Maybe ((Char, loc), loc)
forall a. Maybe a
Nothing
{-# INLINE hasSQuote #-}
collapseStringGap :: GetChar loc -> LexStringState loc -> loc -> Either (StringLexError loc) loc
collapseStringGap :: forall loc.
GetChar loc
-> LexStringState loc -> loc -> Either (StringLexError loc) loc
collapseStringGap GetChar loc
getChar LexStringState loc
s = loc -> Either (StringLexError loc) loc
go
where
go :: loc -> Either (StringLexError loc) loc
go loc
loc0 =
case GetChar loc
getChar loc
loc0 of
Just (Char
'\\', loc
loc1) -> loc -> Either (StringLexError loc) loc
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure loc
loc1
Just (Char
c0, loc
loc1) | Char -> Bool
is_space Char
c0 -> loc -> Either (StringLexError loc) loc
go loc
loc1
Just (Char, loc)
_ -> StringLexError loc -> Either (StringLexError loc) loc
forall a b. a -> Either a b
Left (StringLexError loc -> Either (StringLexError loc) loc)
-> StringLexError loc -> Either (StringLexError loc) loc
forall a b. (a -> b) -> a -> b
$ loc -> ContainsSmartQuote loc -> StringLexError loc
forall loc. loc -> ContainsSmartQuote loc -> StringLexError loc
BadCharInitialLex loc
loc0 (GetChar loc -> LexStringState loc -> ContainsSmartQuote loc
forall loc.
GetChar loc -> LexStringState loc -> ContainsSmartQuote loc
hasSQuote GetChar loc
getChar LexStringState loc
s)
Maybe (Char, loc)
Nothing -> StringLexError loc -> Either (StringLexError loc) loc
forall a b. a -> Either a b
Left (StringLexError loc -> Either (StringLexError loc) loc)
-> StringLexError loc -> Either (StringLexError loc) loc
forall a b. (a -> b) -> a -> b
$ loc -> ContainsSmartQuote loc -> StringLexError loc
forall loc. loc -> ContainsSmartQuote loc -> StringLexError loc
UnexpectedEOF loc
loc0 (GetChar loc -> LexStringState loc -> ContainsSmartQuote loc
forall loc.
GetChar loc -> LexStringState loc -> ContainsSmartQuote loc
hasSQuote GetChar loc
getChar LexStringState loc
s)
{-# INLINE collapseStringGap #-}
parseLeadingWS :: GetChar loc -> LexStringState loc -> loc -> (LexStringState loc, loc)
parseLeadingWS :: forall loc.
GetChar loc
-> LexStringState loc -> loc -> (LexStringState loc, loc)
parseLeadingWS GetChar loc
getChar = Int -> LexStringState loc -> loc -> (LexStringState loc, loc)
go Int
0
where
go :: Int -> LexStringState loc -> loc -> (LexStringState loc, loc)
go !Int
col LexStringState loc
s loc
loc =
case GetChar loc
getChar loc
loc of
Just (c :: Char
c@Char
' ', loc
loc') -> Int -> LexStringState loc -> loc -> (LexStringState loc, loc)
go (Int
col Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Char -> LexStringState loc -> LexStringState loc
forall loc. Char -> LexStringState loc -> LexStringState loc
addChar Char
c LexStringState loc
s) loc
loc'
Just (Char
'\t', loc
loc') ->
let fill :: Int
fill = Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
- (Int
col Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
8)
s' :: LexStringState loc
s' = Int
-> (LexStringState loc -> LexStringState loc)
-> LexStringState loc
-> LexStringState loc
forall a. Int -> (a -> a) -> a -> a
applyN Int
fill (Char -> LexStringState loc -> LexStringState loc
forall loc. Char -> LexStringState loc -> LexStringState loc
addChar Char
' ') LexStringState loc
s
in Int -> LexStringState loc -> loc -> (LexStringState loc, loc)
go (Int
col Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
fill) LexStringState loc
s' loc
loc'
Just (Char
'\n', loc
_) -> (LexStringState loc
s, loc
loc)
Just (Char
'"', loc
_) | Just loc
_ <- LexStringType -> GetChar loc -> loc -> Maybe loc
forall loc. LexStringType -> GetChar loc -> loc -> Maybe loc
checkDelimiter LexStringType
StringTypeMulti GetChar loc
getChar loc
loc -> (LexStringState loc
s, loc
loc)
Maybe (Char, loc)
_ ->
let s' :: LexStringState loc
s' = LexStringState loc
s{multilineCommonWsPrefix = min col (multilineCommonWsPrefix s)}
in (LexStringState loc
s', loc
loc)
applyN :: Int -> (a -> a) -> a -> a
applyN :: forall a. Int -> (a -> a) -> a -> a
applyN Int
n a -> a
f a
x0 = (a -> a) -> a -> [a]
forall a. (a -> a) -> a -> [a]
iterate a -> a
f a
x0 [a] -> Int -> a
forall a. HasCallStack => [a] -> Int -> a
!! Int
n
{-# INLINE parseLeadingWS #-}
data StringLexError loc
= UnexpectedEOF !loc !(ContainsSmartQuote loc)
| BadCharInitialLex !loc !(ContainsSmartQuote loc)
| EscapeBadChar !loc
| EscapeUnexpectedEOF !loc
| EscapeNumRangeError !loc
| EscapeSmartQuoteError !Char !loc
deriving (Int -> StringLexError loc -> String -> String
[StringLexError loc] -> String -> String
StringLexError loc -> String
(Int -> StringLexError loc -> String -> String)
-> (StringLexError loc -> String)
-> ([StringLexError loc] -> String -> String)
-> Show (StringLexError loc)
forall loc.
Show loc =>
Int -> StringLexError loc -> String -> String
forall loc. Show loc => [StringLexError loc] -> String -> String
forall loc. Show loc => StringLexError loc -> String
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
$cshowsPrec :: forall loc.
Show loc =>
Int -> StringLexError loc -> String -> String
showsPrec :: Int -> StringLexError loc -> String -> String
$cshow :: forall loc. Show loc => StringLexError loc -> String
show :: StringLexError loc -> String
$cshowList :: forall loc. Show loc => [StringLexError loc] -> String -> String
showList :: [StringLexError loc] -> String -> String
Show)
data ContainsSmartQuote loc
= NoSmartQuote
| SmartQuote !Char !loc
deriving (Int -> ContainsSmartQuote loc -> String -> String
[ContainsSmartQuote loc] -> String -> String
ContainsSmartQuote loc -> String
(Int -> ContainsSmartQuote loc -> String -> String)
-> (ContainsSmartQuote loc -> String)
-> ([ContainsSmartQuote loc] -> String -> String)
-> Show (ContainsSmartQuote loc)
forall loc.
Show loc =>
Int -> ContainsSmartQuote loc -> String -> String
forall loc.
Show loc =>
[ContainsSmartQuote loc] -> String -> String
forall loc. Show loc => ContainsSmartQuote loc -> String
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
$cshowsPrec :: forall loc.
Show loc =>
Int -> ContainsSmartQuote loc -> String -> String
showsPrec :: Int -> ContainsSmartQuote loc -> String -> String
$cshow :: forall loc. Show loc => ContainsSmartQuote loc -> String
show :: ContainsSmartQuote loc -> String
$cshowList :: forall loc.
Show loc =>
[ContainsSmartQuote loc] -> String -> String
showList :: [ContainsSmartQuote loc] -> String -> String
Show)
resolveEscapeCharacter :: GetChar loc -> loc -> Either (StringLexError loc) (Char, loc)
resolveEscapeCharacter :: forall loc.
GetChar loc -> loc -> Either (StringLexError loc) (Char, loc)
resolveEscapeCharacter GetChar loc
getChar loc
loc0 = do
(c0, loc1) <- loc -> Either (StringLexError loc) (Char, loc)
expectChar loc
loc0
case c0 of
Char
'a' -> (Char, loc) -> Either (StringLexError loc) (Char, loc)
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\a', loc
loc1)
Char
'b' -> (Char, loc) -> Either (StringLexError loc) (Char, loc)
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\b', loc
loc1)
Char
'f' -> (Char, loc) -> Either (StringLexError loc) (Char, loc)
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\f', loc
loc1)
Char
'n' -> (Char, loc) -> Either (StringLexError loc) (Char, loc)
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\n', loc
loc1)
Char
'r' -> (Char, loc) -> Either (StringLexError loc) (Char, loc)
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\r', loc
loc1)
Char
't' -> (Char, loc) -> Either (StringLexError loc) (Char, loc)
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\t', loc
loc1)
Char
'v' -> (Char, loc) -> Either (StringLexError loc) (Char, loc)
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\v', loc
loc1)
Char
'\\' -> (Char, loc) -> Either (StringLexError loc) (Char, loc)
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\\', loc
loc1)
Char
'"' -> (Char, loc) -> Either (StringLexError loc) (Char, loc)
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\"', loc
loc1)
Char
'\'' -> (Char, loc) -> Either (StringLexError loc) (Char, loc)
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
'\'', loc
loc1)
Char
'x' -> (Char -> Bool)
-> Int
-> (Char -> Int)
-> loc
-> Either (StringLexError loc) (Char, loc)
expectNum Char -> Bool
is_hexdigit Int
16 Char -> Int
hexDigit loc
loc1
Char
'o' -> (Char -> Bool)
-> Int
-> (Char -> Int)
-> loc
-> Either (StringLexError loc) (Char, loc)
expectNum Char -> Bool
is_octdigit Int
8 Char -> Int
octDecDigit loc
loc1
Char
_ | Char -> Bool
is_decdigit Char
c0 -> (Char -> Bool)
-> Int
-> (Char -> Int)
-> loc
-> Either (StringLexError loc) (Char, loc)
expectNum Char -> Bool
is_decdigit Int
10 Char -> Int
octDecDigit loc
loc0
Char
'^' -> do
(c1, loc2) <- loc -> Either (StringLexError loc) (Char, loc)
expectChar loc
loc1
unless ('@' <= c1 && c1 <= '_') $ Left $ EscapeBadChar loc1
pure (chr $ ord c1 - ord '@', loc2)
Char
_ | Just (Char
c1, loc
loc2) <- GetChar loc -> Char -> GetChar loc
forall loc. GetChar loc -> Char -> GetChar loc
parseLongEscape GetChar loc
getChar Char
c0 loc
loc1 -> (Char, loc) -> Either (StringLexError loc) (Char, loc)
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char
c1, loc
loc2)
Char
_ | Char -> Bool
isDoubleSmartQuote Char
c0 -> StringLexError loc -> Either (StringLexError loc) (Char, loc)
forall a b. a -> Either a b
Left (StringLexError loc -> Either (StringLexError loc) (Char, loc))
-> StringLexError loc -> Either (StringLexError loc) (Char, loc)
forall a b. (a -> b) -> a -> b
$ Char -> loc -> StringLexError loc
forall loc. Char -> loc -> StringLexError loc
EscapeSmartQuoteError Char
c0 loc
loc0
Char
_ | Char -> Bool
isSingleSmartQuote Char
c0 -> StringLexError loc -> Either (StringLexError loc) (Char, loc)
forall a b. a -> Either a b
Left (StringLexError loc -> Either (StringLexError loc) (Char, loc))
-> StringLexError loc -> Either (StringLexError loc) (Char, loc)
forall a b. (a -> b) -> a -> b
$ Char -> loc -> StringLexError loc
forall loc. Char -> loc -> StringLexError loc
EscapeSmartQuoteError Char
c0 loc
loc0
Char
_ -> StringLexError loc -> Either (StringLexError loc) (Char, loc)
forall a b. a -> Either a b
Left (StringLexError loc -> Either (StringLexError loc) (Char, loc))
-> StringLexError loc -> Either (StringLexError loc) (Char, loc)
forall a b. (a -> b) -> a -> b
$ loc -> StringLexError loc
forall loc. loc -> StringLexError loc
EscapeBadChar loc
loc0
where
expectChar :: loc -> Either (StringLexError loc) (Char, loc)
expectChar loc
loc =
case GetChar loc
getChar loc
loc of
Just (Char, loc)
x -> (Char, loc) -> Either (StringLexError loc) (Char, loc)
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char, loc)
x
Maybe (Char, loc)
Nothing -> StringLexError loc -> Either (StringLexError loc) (Char, loc)
forall a b. a -> Either a b
Left (StringLexError loc -> Either (StringLexError loc) (Char, loc))
-> StringLexError loc -> Either (StringLexError loc) (Char, loc)
forall a b. (a -> b) -> a -> b
$ loc -> StringLexError loc
forall loc. loc -> StringLexError loc
EscapeUnexpectedEOF loc
loc
expectNum :: (Char -> Bool)
-> Int
-> (Char -> Int)
-> loc
-> Either (StringLexError loc) (Char, loc)
expectNum Char -> Bool
isDigit Int
base Char -> Int
toDigit loc
loc1 = do
(c1, loc2) <- loc -> Either (StringLexError loc) (Char, loc)
expectChar loc
loc1
unless (isDigit c1) $ Left $ EscapeBadChar loc1
let parseNum Int
x loc
loc =
case GetChar loc
getChar loc
loc of
Just (Char
c, loc
loc') | Char -> Bool
isDigit Char
c -> do
let x' :: Int
x' = Int
x Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
base Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
toDigit Char
c
Bool
-> Either (StringLexError loc) () -> Either (StringLexError loc) ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
x' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0x10ffff) (Either (StringLexError loc) () -> Either (StringLexError loc) ())
-> Either (StringLexError loc) () -> Either (StringLexError loc) ()
forall a b. (a -> b) -> a -> b
$ StringLexError loc -> Either (StringLexError loc) ()
forall a b. a -> Either a b
Left (StringLexError loc -> Either (StringLexError loc) ())
-> StringLexError loc -> Either (StringLexError loc) ()
forall a b. (a -> b) -> a -> b
$ loc -> StringLexError loc
forall loc. loc -> StringLexError loc
EscapeNumRangeError loc
loc
Int -> loc -> Either (StringLexError loc) (Char, loc)
parseNum Int
x' loc
loc'
Maybe (Char, loc)
_ ->
(Char, loc) -> Either (StringLexError loc) (Char, loc)
forall a. a -> Either (StringLexError loc) a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> Char
chr Int
x, loc
loc)
parseNum (toDigit c1) loc2
{-# INLINE resolveEscapeCharacter #-}
parseLongEscape :: GetChar loc -> Char -> loc -> Maybe (Char, loc)
parseLongEscape :: forall loc. GetChar loc -> Char -> GetChar loc
parseLongEscape GetChar loc
getChar Char
c0 loc
loc1 = [(Char, loc)] -> Maybe (Char, loc)
forall a. [a] -> Maybe a
listToMaybe ([(Char, loc)] -> Maybe (Char, loc))
-> [(Char, loc)] -> Maybe (Char, loc)
forall a b. (a -> b) -> a -> b
$ ((String, Char) -> Maybe (Char, loc))
-> [(String, Char)] -> [(Char, loc)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (String, Char) -> Maybe (Char, loc)
tryParse [(String, Char)]
longEscapeCodes
where
tryParse :: (String, Char) -> Maybe (Char, loc)
tryParse (String
prefix, Char
c) = do
p0 : p <- String -> Maybe String
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure String
prefix
guard (p0 == c0)
loc <- parsePrefix loc1 p
pure (c, loc)
parsePrefix :: loc -> String -> Maybe loc
parsePrefix loc
loc = \case
[] -> loc -> Maybe loc
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure loc
loc
Char
p : String
ps -> do
(c, loc') <- GetChar loc
getChar loc
loc
guard (p == c)
parsePrefix loc' ps
longEscapeCodes :: [(String, Char)]
longEscapeCodes =
[ (String
"NUL", Char
'\NUL')
, (String
"SOH", Char
'\SOH')
, (String
"STX", Char
'\STX')
, (String
"ETX", Char
'\ETX')
, (String
"EOT", Char
'\EOT')
, (String
"ENQ", Char
'\ENQ')
, (String
"ACK", Char
'\ACK')
, (String
"BEL", Char
'\BEL')
, (String
"BS", Char
'\BS')
, (String
"HT", Char
'\HT')
, (String
"LF", Char
'\LF')
, (String
"VT", Char
'\VT')
, (String
"FF", Char
'\FF')
, (String
"CR", Char
'\CR')
, (String
"SO", Char
'\SO')
, (String
"SI", Char
'\SI')
, (String
"DLE", Char
'\DLE')
, (String
"DC1", Char
'\DC1')
, (String
"DC2", Char
'\DC2')
, (String
"DC3", Char
'\DC3')
, (String
"DC4", Char
'\DC4')
, (String
"NAK", Char
'\NAK')
, (String
"SYN", Char
'\SYN')
, (String
"ETB", Char
'\ETB')
, (String
"CAN", Char
'\CAN')
, (String
"EM", Char
'\EM')
, (String
"SUB", Char
'\SUB')
, (String
"ESC", Char
'\ESC')
, (String
"FS", Char
'\FS')
, (String
"GS", Char
'\GS')
, (String
"RS", Char
'\RS')
, (String
"US", Char
'\US')
, (String
"SP", Char
'\SP')
, (String
"DEL", Char
'\DEL')
]
{-# INLINE parseLongEscape #-}
isDoubleSmartQuote :: Char -> Bool
isDoubleSmartQuote :: Char -> Bool
isDoubleSmartQuote = \case
Char
'“' -> Bool
True
Char
'”' -> Bool
True
Char
_ -> Bool
False
isSingleSmartQuote :: Char -> Bool
isSingleSmartQuote :: Char -> Bool
isSingleSmartQuote = \case
Char
'‘' -> Bool
True
Char
'’' -> Bool
True
Char
_ -> Bool
False
postprocessMultiline :: Int -> String -> String
postprocessMultiline :: Int -> String -> String
postprocessMultiline Int
commonWSPrefix =
String -> String
rmCommonWhitespacePrefix
(String -> String) -> (String -> String) -> String -> String
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> String -> String
collapseOnlyWsLines
(String -> String) -> (String -> String) -> String -> String
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> String -> String
rmFirstNewline
(String -> String) -> (String -> String) -> String -> String
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> String -> String
rmLastNewline
(String -> String) -> (String -> String) -> String -> String
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> String -> String
resolveEscapeChars
where
rmCommonWhitespacePrefix :: String -> String
rmCommonWhitespacePrefix =
let go :: String -> String
go = \case
Char
'\n' : String
s -> Char
'\n' Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
go (Int -> String -> String
forall {t}. (Ord t, Num t) => t -> String -> String
dropLine Int
commonWSPrefix String
s)
Char
c : String
s -> Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
go String
s
[] -> []
dropLine :: t -> String -> String
dropLine !t
x = \case
String
s | t
x t -> t -> Bool
forall a. Ord a => a -> a -> Bool
<= t
0 -> String
s
s :: String
s@(Char
'\n' : String
_) -> String
s
Char
_ : String
s -> t -> String -> String
dropLine (t
x t -> t -> t
forall a. Num a => a -> a -> a
- t
1) String
s
[] -> []
in String -> String
go
collapseOnlyWsLines :: String -> String
collapseOnlyWsLines =
let go :: String -> String
go = \case
Char
'\n' : String
s | Just String
s' <- String -> Maybe String
checkAllWs String
s -> Char
'\n' Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
go String
s'
Char
c : String
s -> Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
go String
s
[] -> []
checkAllWs :: String -> Maybe String
checkAllWs = \case
s :: String
s@(Char
'\n' : String
_) -> String -> Maybe String
forall a. a -> Maybe a
Just String
s
s :: String
s@[] -> String -> Maybe String
forall a. a -> Maybe a
Just String
s
Char
c : String
s | Char -> Bool
is_space Char
c -> String -> Maybe String
checkAllWs String
s
String
_ -> Maybe String
forall a. Maybe a
Nothing
in String -> String
go
rmFirstNewline :: String -> String
rmFirstNewline = \case
Char
'\n' : String
s -> String
s
String
s -> String
s
rmLastNewline :: String -> String
rmLastNewline =
let go :: String -> String
go = \case
[] -> []
[Char
'\n'] -> []
Char
c : String
cs -> Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
go String
cs
in String -> String
go
resolveEscapeChars :: String -> String
resolveEscapeChars = \case
[] -> []
Char
'\\' : String
s ->
case GetChar String
-> String -> Either (StringLexError String) (Char, String)
forall loc.
GetChar loc -> loc -> Either (StringLexError loc) (Char, loc)
resolveEscapeCharacter GetChar String
forall {a}. [a] -> Maybe (a, [a])
uncons String
s of
Left StringLexError String
e -> String -> String
forall a. HasCallStack => String -> a
panic (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String
"resolving escape characters in multiline string unexpectedly found errors: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ StringLexError String -> String
forall a. Show a => a -> String
show StringLexError String
e
Right (Char
c, String
s') -> Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
resolveEscapeChars String
s'
Char
c : String
s -> Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
resolveEscapeChars String
s
uncons :: [a] -> Maybe (a, [a])
uncons = \case
a
c : [a]
cs -> (a, [a]) -> Maybe (a, [a])
forall a. a -> Maybe a
Just (a
c, [a]
cs)
[] -> Maybe (a, [a])
forall a. Maybe a
Nothing
isAnyChar :: Char -> Bool
isAnyChar :: Char -> Bool
isAnyChar Char
c
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\x7f' = Char -> Bool
isPrint Char
c
| Bool
otherwise = Char -> Bool
is_any Char
c