{-# LANGUAGE CPP #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}

-----------------------------------------------------------------------------

-- |
-- Module      :  Distribution.Fields.Parser
-- License     :  BSD3
--
-- Maintainer  :  cabal-devel@haskell.org
-- Portability :  portable
{- FOURMOLU_DISABLE -}
module Distribution.Fields.Parser
  ( -- * Types
    Field (..)
  , Name (..)
  , FieldLine (..)
  , SectionArg (..)

    -- * Grammar and parsing
    -- $grammar
  , readFields
  , readFields'
#ifdef CABAL_PARSEC_DEBUG

    -- * Internal
  , parseFile
  , parseStr
  , parseBS
#endif
  ) where
{- FOURMOLU_ENABLE -}

import qualified Data.ByteString.Char8 as B8
import Data.Functor.Identity
import Distribution.Compat.Prelude
import Distribution.Fields.Field
import Distribution.Fields.Lexer
import Distribution.Fields.LexerMonad
  ( LexResult (..)
  , LexState (..)
  , LexWarning (..)
  , LexWarningType (..)
  , unLex
  )
import Distribution.Parsec.Position (Position (..), positionCol)
import Text.Parsec.Combinator hiding (eof, notFollowedBy)
import Text.Parsec.Error
import Text.Parsec.Pos
import Text.Parsec.Prim hiding (many, (<|>))
import Prelude ()

#ifdef CABAL_PARSEC_DEBUG
import qualified Data.Text                as T
import qualified Data.Text.Encoding       as T
import qualified Data.Text.Encoding.Error as T
#endif

-- $setup
-- >>> import Data.Either (isLeft)

-- | The 'LexState'' (with a prime) is an instance of parsec's 'Stream'
-- wrapped around lexer's 'LexState' (without a prime)
data LexState' = LexState' !LexState (LToken, LexState')

mkLexState' :: LexState -> LexState'
mkLexState' :: LexState -> LexState'
mkLexState' LexState
st =
  LexState -> (LToken, LexState') -> LexState'
LexState'
    LexState
st
    (case Lex LToken -> LexState -> LexResult LToken
forall a. Lex a -> LexState -> LexResult a
unLex Lex LToken
lexToken LexState
st of LexResult LexState
st' LToken
tok -> (LToken
tok, LexState -> LexState'
mkLexState' LexState
st'))

type Parser a = ParsecT LexState' () Identity a

instance Stream LexState' Identity LToken where
  uncons :: LexState' -> Identity (Maybe (LToken, LexState'))
uncons (LexState' LexState
_ (LToken
tok, LexState'
st')) =
    case LToken
tok of
      L Position
_ Token
EOF -> Maybe (LToken, LexState') -> Identity (Maybe (LToken, LexState'))
forall a. a -> Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (LToken, LexState')
forall a. Maybe a
Nothing
      LToken
_ -> Maybe (LToken, LexState') -> Identity (Maybe (LToken, LexState'))
forall a. a -> Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ((LToken, LexState') -> Maybe (LToken, LexState')
forall a. a -> Maybe a
Just (LToken
tok, LexState'
st'))

-- | Get lexer warnings accumulated so far
getLexerWarnings :: Parser [LexWarning]
getLexerWarnings :: Parser [LexWarning]
getLexerWarnings = do
  LexState' (LexState{warnings = ws}) _ <- ParsecT LexState' () Identity LexState'
forall (m :: * -> *) s u. Monad m => ParsecT s u m s
getInput
  return ws

addLexerWarning :: LexWarning -> Parser ()
addLexerWarning :: LexWarning -> Parser ()
addLexerWarning LexWarning
w = do
  LexState' ls@LexState{warnings = ws} _ <- ParsecT LexState' () Identity LexState'
forall (m :: * -> *) s u. Monad m => ParsecT s u m s
getInput
  setInput $! mkLexState' ls{warnings = w : ws}

-- | Set Alex code i.e. the mode "state" lexer is in.
setLexerMode :: Int -> Parser ()
setLexerMode :: Int -> Parser ()
setLexerMode Int
code = do
  LexState' ls _ <- ParsecT LexState' () Identity LexState'
forall (m :: * -> *) s u. Monad m => ParsecT s u m s
getInput
  setInput $! mkLexState' ls{curCode = code}

getToken :: (Token -> Maybe a) -> Parser a
getToken :: forall a. (Token -> Maybe a) -> Parser a
getToken Token -> Maybe a
getTok = (LToken -> Maybe a) -> Parser a
forall a. (LToken -> Maybe a) -> Parser a
getTokenWithPos (\(L Position
_ Token
t) -> Token -> Maybe a
getTok Token
t)

getTokenWithPos :: (LToken -> Maybe a) -> Parser a
getTokenWithPos :: forall a. (LToken -> Maybe a) -> Parser a
getTokenWithPos LToken -> Maybe a
getTok = (LToken -> [Char])
-> (SourcePos -> LToken -> LexState' -> SourcePos)
-> (LToken -> Maybe a)
-> ParsecT LexState' () Identity a
forall s (m :: * -> *) t a u.
Stream s m t =>
(t -> [Char])
-> (SourcePos -> t -> s -> SourcePos)
-> (t -> Maybe a)
-> ParsecT s u m a
tokenPrim (\(L Position
_ Token
t) -> Token -> [Char]
describeToken Token
t) SourcePos -> LToken -> LexState' -> SourcePos
updatePos LToken -> Maybe a
getTok
  where
    updatePos :: SourcePos -> LToken -> LexState' -> SourcePos
    updatePos :: SourcePos -> LToken -> LexState' -> SourcePos
updatePos SourcePos
pos (L (Position Int
col Int
line) Token
_) LexState'
_ = [Char] -> Int -> Int -> SourcePos
newPos (SourcePos -> [Char]
sourceName SourcePos
pos) Int
col Int
line

describeToken :: Token -> String
describeToken :: Token -> [Char]
describeToken Token
t = case Token
t of
  TokSym ByteString
s -> [Char]
"symbol " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
s
  TokStr ByteString
s -> [Char]
"string " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
s
  TokOther ByteString
s -> [Char]
"operator " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
s
  Indent Int
_ -> [Char]
"new line"
  TokFieldLine ByteString
_ -> [Char]
"field content"
  Token
Colon -> [Char]
"\":\""
  Token
OpenBrace -> [Char]
"\"{\""
  Token
CloseBrace -> [Char]
"\"}\""
  --  SemiColon       -> "\";\""
  Token
EOF -> [Char]
"end of file"
  LexicalError ByteString
is -> [Char]
"character in input " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Char -> [Char]
forall a. Show a => a -> [Char]
show (ByteString -> Char
B8.head ByteString
is)

tokSym :: Parser (Name Position)
tokSym', tokStr, tokOther :: Parser (SectionArg Position)
tokIndent :: Parser Int
tokColon, tokCloseBrace :: Parser ()
tokOpenBrace :: Parser Position
tokFieldLine :: Parser (FieldLine Position)
tokSym :: Parser (Name Position)
tokSym = (LToken -> Maybe (Name Position)) -> Parser (Name Position)
forall a. (LToken -> Maybe a) -> Parser a
getTokenWithPos ((LToken -> Maybe (Name Position)) -> Parser (Name Position))
-> (LToken -> Maybe (Name Position)) -> Parser (Name Position)
forall a b. (a -> b) -> a -> b
$ \LToken
t -> case LToken
t of L Position
pos (TokSym ByteString
x) -> Name Position -> Maybe (Name Position)
forall a. a -> Maybe a
Just (Position -> ByteString -> Name Position
forall ann. ann -> ByteString -> Name ann
mkName Position
pos ByteString
x); LToken
_ -> Maybe (Name Position)
forall a. Maybe a
Nothing
tokSym' :: Parser (SectionArg Position)
tokSym' = (LToken -> Maybe (SectionArg Position))
-> Parser (SectionArg Position)
forall a. (LToken -> Maybe a) -> Parser a
getTokenWithPos ((LToken -> Maybe (SectionArg Position))
 -> Parser (SectionArg Position))
-> (LToken -> Maybe (SectionArg Position))
-> Parser (SectionArg Position)
forall a b. (a -> b) -> a -> b
$ \LToken
t -> case LToken
t of L Position
pos (TokSym ByteString
x) -> SectionArg Position -> Maybe (SectionArg Position)
forall a. a -> Maybe a
Just (Position -> ByteString -> SectionArg Position
forall ann. ann -> ByteString -> SectionArg ann
SecArgName Position
pos ByteString
x); LToken
_ -> Maybe (SectionArg Position)
forall a. Maybe a
Nothing
tokStr :: Parser (SectionArg Position)
tokStr = (LToken -> Maybe (SectionArg Position))
-> Parser (SectionArg Position)
forall a. (LToken -> Maybe a) -> Parser a
getTokenWithPos ((LToken -> Maybe (SectionArg Position))
 -> Parser (SectionArg Position))
-> (LToken -> Maybe (SectionArg Position))
-> Parser (SectionArg Position)
forall a b. (a -> b) -> a -> b
$ \LToken
t -> case LToken
t of L Position
pos (TokStr ByteString
x) -> SectionArg Position -> Maybe (SectionArg Position)
forall a. a -> Maybe a
Just (Position -> ByteString -> SectionArg Position
forall ann. ann -> ByteString -> SectionArg ann
SecArgStr Position
pos ByteString
x); LToken
_ -> Maybe (SectionArg Position)
forall a. Maybe a
Nothing
tokOther :: Parser (SectionArg Position)
tokOther = (LToken -> Maybe (SectionArg Position))
-> Parser (SectionArg Position)
forall a. (LToken -> Maybe a) -> Parser a
getTokenWithPos ((LToken -> Maybe (SectionArg Position))
 -> Parser (SectionArg Position))
-> (LToken -> Maybe (SectionArg Position))
-> Parser (SectionArg Position)
forall a b. (a -> b) -> a -> b
$ \LToken
t -> case LToken
t of L Position
pos (TokOther ByteString
x) -> SectionArg Position -> Maybe (SectionArg Position)
forall a. a -> Maybe a
Just (Position -> ByteString -> SectionArg Position
forall ann. ann -> ByteString -> SectionArg ann
SecArgOther Position
pos ByteString
x); LToken
_ -> Maybe (SectionArg Position)
forall a. Maybe a
Nothing
tokIndent :: Parser Int
tokIndent = (Token -> Maybe Int) -> Parser Int
forall a. (Token -> Maybe a) -> Parser a
getToken ((Token -> Maybe Int) -> Parser Int)
-> (Token -> Maybe Int) -> Parser Int
forall a b. (a -> b) -> a -> b
$ \Token
t -> case Token
t of Indent Int
x -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
x; Token
_ -> Maybe Int
forall a. Maybe a
Nothing
tokColon :: Parser ()
tokColon = (Token -> Maybe ()) -> Parser ()
forall a. (Token -> Maybe a) -> Parser a
getToken ((Token -> Maybe ()) -> Parser ())
-> (Token -> Maybe ()) -> Parser ()
forall a b. (a -> b) -> a -> b
$ \Token
t -> case Token
t of Token
Colon -> () -> Maybe ()
forall a. a -> Maybe a
Just (); Token
_ -> Maybe ()
forall a. Maybe a
Nothing
tokOpenBrace :: Parser Position
tokOpenBrace = (LToken -> Maybe Position) -> Parser Position
forall a. (LToken -> Maybe a) -> Parser a
getTokenWithPos ((LToken -> Maybe Position) -> Parser Position)
-> (LToken -> Maybe Position) -> Parser Position
forall a b. (a -> b) -> a -> b
$ \LToken
t -> case LToken
t of L Position
pos Token
OpenBrace -> Position -> Maybe Position
forall a. a -> Maybe a
Just Position
pos; LToken
_ -> Maybe Position
forall a. Maybe a
Nothing
tokCloseBrace :: Parser ()
tokCloseBrace = (Token -> Maybe ()) -> Parser ()
forall a. (Token -> Maybe a) -> Parser a
getToken ((Token -> Maybe ()) -> Parser ())
-> (Token -> Maybe ()) -> Parser ()
forall a b. (a -> b) -> a -> b
$ \Token
t -> case Token
t of Token
CloseBrace -> () -> Maybe ()
forall a. a -> Maybe a
Just (); Token
_ -> Maybe ()
forall a. Maybe a
Nothing
tokFieldLine :: Parser (FieldLine Position)
tokFieldLine = (LToken -> Maybe (FieldLine Position))
-> Parser (FieldLine Position)
forall a. (LToken -> Maybe a) -> Parser a
getTokenWithPos ((LToken -> Maybe (FieldLine Position))
 -> Parser (FieldLine Position))
-> (LToken -> Maybe (FieldLine Position))
-> Parser (FieldLine Position)
forall a b. (a -> b) -> a -> b
$ \LToken
t -> case LToken
t of L Position
pos (TokFieldLine ByteString
s) -> FieldLine Position -> Maybe (FieldLine Position)
forall a. a -> Maybe a
Just (Position -> ByteString -> FieldLine Position
forall ann. ann -> ByteString -> FieldLine ann
FieldLine Position
pos ByteString
s); LToken
_ -> Maybe (FieldLine Position)
forall a. Maybe a
Nothing

colon, openBrace, closeBrace :: Parser ()
sectionArg :: Parser (SectionArg Position)
sectionArg :: Parser (SectionArg Position)
sectionArg = Parser (SectionArg Position)
tokSym' Parser (SectionArg Position)
-> Parser (SectionArg Position) -> Parser (SectionArg Position)
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (SectionArg Position)
tokStr Parser (SectionArg Position)
-> Parser (SectionArg Position) -> Parser (SectionArg Position)
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (SectionArg Position)
tokOther Parser (SectionArg Position)
-> [Char] -> Parser (SectionArg Position)
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Char] -> ParsecT s u m a
<?> [Char]
"section parameter"

fieldSecName :: Parser (Name Position)
fieldSecName :: Parser (Name Position)
fieldSecName = Parser (Name Position)
tokSym Parser (Name Position) -> [Char] -> Parser (Name Position)
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Char] -> ParsecT s u m a
<?> [Char]
"field or section name"

colon :: Parser ()
colon = Parser ()
tokColon Parser () -> [Char] -> Parser ()
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Char] -> ParsecT s u m a
<?> [Char]
"\":\""
openBrace :: Parser ()
openBrace = do
  pos <- Parser Position
tokOpenBrace Parser Position -> [Char] -> Parser Position
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Char] -> ParsecT s u m a
<?> [Char]
"\"{\""
  addLexerWarning (LexWarning LexBraces pos)
closeBrace :: Parser ()
closeBrace = Parser ()
tokCloseBrace Parser () -> [Char] -> Parser ()
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Char] -> ParsecT s u m a
<?> [Char]
"\"}\""

fieldContent :: Parser (FieldLine Position)
fieldContent :: Parser (FieldLine Position)
fieldContent = Parser (FieldLine Position)
tokFieldLine Parser (FieldLine Position)
-> [Char] -> Parser (FieldLine Position)
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Char] -> ParsecT s u m a
<?> [Char]
"field contents"

newtype IndentLevel = IndentLevel Int

zeroIndentLevel :: IndentLevel
zeroIndentLevel :: IndentLevel
zeroIndentLevel = Int -> IndentLevel
IndentLevel Int
0

incIndentLevel :: IndentLevel -> IndentLevel
incIndentLevel :: IndentLevel -> IndentLevel
incIndentLevel (IndentLevel Int
i) = Int -> IndentLevel
IndentLevel (Int -> Int
forall a. Enum a => a -> a
succ Int
i)

indentOfAtLeast :: IndentLevel -> Parser IndentLevel
indentOfAtLeast :: IndentLevel -> Parser IndentLevel
indentOfAtLeast (IndentLevel Int
i) = Parser IndentLevel -> Parser IndentLevel
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (Parser IndentLevel -> Parser IndentLevel)
-> Parser IndentLevel -> Parser IndentLevel
forall a b. (a -> b) -> a -> b
$ do
  j <- Parser Int
tokIndent
  guard (j >= i) <?> "indentation of at least " ++ show i
  return (IndentLevel j)

newtype LexerMode = LexerMode Int

inLexerMode :: LexerMode -> Parser p -> Parser p
inLexerMode :: forall p. LexerMode -> Parser p -> Parser p
inLexerMode (LexerMode Int
mode) Parser p
p =
  do Int -> Parser ()
setLexerMode Int
mode; x <- Parser p
p; setLexerMode in_section; return x

-----------------------
-- Cabal file grammar
--

-- $grammar
--
-- @
-- CabalStyleFile ::= SecElems
--
-- SecElems       ::= SecElem* '\\n'?
-- SecElem        ::= '\\n' SecElemLayout | SecElemBraces
-- SecElemLayout  ::= FieldLayout | FieldBraces | SectionLayout | SectionBraces
-- SecElemBraces  ::= FieldInline | FieldBraces |                 SectionBraces
-- FieldLayout    ::= name ':' line? ('\\n' line)*
-- FieldBraces    ::= name ':' '\\n'? '{' content '}'
-- FieldInline    ::= name ':' content
-- SectionLayout  ::= name arg* SecElems
-- SectionBraces  ::= name arg* '\\n'? '{' SecElems '}'
-- @
--
-- and the same thing but left factored...
--
-- @
-- SecElems              ::= SecElem*
-- SecElem               ::= '\\n' name SecElemLayout
--                         |      name SecElemBraces
-- SecElemLayout         ::= ':'   FieldLayoutOrBraces
--                         | arg*  SectionLayoutOrBraces
-- FieldLayoutOrBraces   ::= '\\n'? '{' content '}'
--                         | line? ('\\n' line)*
-- SectionLayoutOrBraces ::= '\\n'? '{' SecElems '\\n'? '}'
--                         | SecElems
-- SecElemBraces         ::= ':' FieldInlineOrBraces
--                         | arg* '\\n'? '{' SecElems '\\n'? '}'
-- FieldInlineOrBraces   ::= '\\n'? '{' content '}'
--                         | content
-- @
--
-- Note how we have several productions with the sequence:
--
-- > '\\n'? '{'
--
-- That is, an optional newline (and indent) followed by a @{@ token.
-- In the @SectionLayoutOrBraces@ case you can see that this makes it
-- not fully left factored (because @SecElems@ can start with a @\\n@).
-- Fully left factoring here would be ugly, and though we could use a
-- lookahead of two tokens to resolve the alternatives, we can't
-- conveniently use Parsec's 'try' here to get a lookahead of only two.
-- So instead we deal with this case in the lexer by making a line
-- where the first non-space is @{@ lex as just the @{@ token, without
-- the usual indent token. Then in the parser we can resolve everything
-- with just one token of lookahead and so without using 'try'.

-- Top level of a file using cabal syntax
--
cabalStyleFile :: Parser [Field Position]
cabalStyleFile :: Parser [Field Position]
cabalStyleFile = do
  es <- IndentLevel -> Parser [Field Position]
elements IndentLevel
zeroIndentLevel
  eof
  return es

-- Elements that live at the top level or inside a section, i.e. fields
-- and sections content
--
-- elements ::= element*
elements :: IndentLevel -> Parser [Field Position]
elements :: IndentLevel -> Parser [Field Position]
elements IndentLevel
ilevel = ParsecT LexState' () Identity (Field Position)
-> Parser [Field Position]
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (IndentLevel -> ParsecT LexState' () Identity (Field Position)
element IndentLevel
ilevel)

-- An individual element, ie a field or a section. These can either use
-- layout style or braces style. For layout style then it must start on
-- a line on its own (so that we know its indentation level).
--
-- element ::= '\\n' name elementInLayoutContext
--           |      name elementInNonLayoutContext
element :: IndentLevel -> Parser (Field Position)
element :: IndentLevel -> ParsecT LexState' () Identity (Field Position)
element IndentLevel
ilevel =
  ( do
      ilevel' <- IndentLevel -> Parser IndentLevel
indentOfAtLeast IndentLevel
ilevel
      name <- fieldSecName
      elementInLayoutContext (incIndentLevel ilevel') name
  )
    ParsecT LexState' () Identity (Field Position)
-> ParsecT LexState' () Identity (Field Position)
-> ParsecT LexState' () Identity (Field Position)
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ( do
            name <- Parser (Name Position)
fieldSecName
            elementInNonLayoutContext name
        )

-- An element (field or section) that is valid in a layout context.
-- In a layout context we can have fields and sections that themselves
-- either use layout style or that use braces style.
--
-- elementInLayoutContext ::= ':'  fieldLayoutOrBraces
--                          | arg* sectionLayoutOrBraces
elementInLayoutContext :: IndentLevel -> Name Position -> Parser (Field Position)
elementInLayoutContext :: IndentLevel
-> Name Position -> ParsecT LexState' () Identity (Field Position)
elementInLayoutContext IndentLevel
ilevel Name Position
name =
  (do Parser ()
colon; IndentLevel
-> Name Position -> ParsecT LexState' () Identity (Field Position)
fieldLayoutOrBraces IndentLevel
ilevel Name Position
name)
    ParsecT LexState' () Identity (Field Position)
-> ParsecT LexState' () Identity (Field Position)
-> ParsecT LexState' () Identity (Field Position)
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ( do
            args <- Parser (SectionArg Position)
-> ParsecT LexState' () Identity [SectionArg Position]
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (SectionArg Position)
sectionArg
            elems <- sectionLayoutOrBraces ilevel
            return (Section name args elems)
        )

-- An element (field or section) that is valid in a non-layout context.
-- In a non-layout context we can have only have fields and sections that
-- themselves use braces style, or inline style fields.
--
-- elementInNonLayoutContext ::= ':' FieldInlineOrBraces
--                             | arg* '\\n'? '{' elements '\\n'? '}'
elementInNonLayoutContext :: Name Position -> Parser (Field Position)
elementInNonLayoutContext :: Name Position -> ParsecT LexState' () Identity (Field Position)
elementInNonLayoutContext Name Position
name =
  (do Parser ()
colon; Name Position -> ParsecT LexState' () Identity (Field Position)
fieldInlineOrBraces Name Position
name)
    ParsecT LexState' () Identity (Field Position)
-> ParsecT LexState' () Identity (Field Position)
-> ParsecT LexState' () Identity (Field Position)
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ( do
            args <- Parser (SectionArg Position)
-> ParsecT LexState' () Identity [SectionArg Position]
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (SectionArg Position)
sectionArg
            openBrace
            elems <- elements zeroIndentLevel
            optional tokIndent
            closeBrace
            return (Section name args elems)
        )

-- The body of a field, using either layout style or braces style.
--
-- fieldLayoutOrBraces   ::= '\\n'? '{' content '}'
--                         | line? ('\\n' line)*
fieldLayoutOrBraces :: IndentLevel -> Name Position -> Parser (Field Position)
fieldLayoutOrBraces :: IndentLevel
-> Name Position -> ParsecT LexState' () Identity (Field Position)
fieldLayoutOrBraces IndentLevel
ilevel Name Position
name = ParsecT LexState' () Identity (Field Position)
braces ParsecT LexState' () Identity (Field Position)
-> ParsecT LexState' () Identity (Field Position)
-> ParsecT LexState' () Identity (Field Position)
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT LexState' () Identity (Field Position)
fieldLayout
  where
    braces :: ParsecT LexState' () Identity (Field Position)
braces = do
      Parser ()
openBrace
      ls <- LexerMode
-> Parser [FieldLine Position] -> Parser [FieldLine Position]
forall p. LexerMode -> Parser p -> Parser p
inLexerMode (Int -> LexerMode
LexerMode Int
in_field_braces) (Parser (FieldLine Position) -> Parser [FieldLine Position]
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (FieldLine Position)
fieldContent)
      closeBrace
      return (Field name ls)
    fieldLayout :: ParsecT LexState' () Identity (Field Position)
fieldLayout = LexerMode
-> ParsecT LexState' () Identity (Field Position)
-> ParsecT LexState' () Identity (Field Position)
forall p. LexerMode -> Parser p -> Parser p
inLexerMode (Int -> LexerMode
LexerMode Int
in_field_layout) (ParsecT LexState' () Identity (Field Position)
 -> ParsecT LexState' () Identity (Field Position))
-> ParsecT LexState' () Identity (Field Position)
-> ParsecT LexState' () Identity (Field Position)
forall a b. (a -> b) -> a -> b
$ do
      l <- Parser (FieldLine Position)
-> ParsecT LexState' () Identity (Maybe (FieldLine Position))
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m (Maybe a)
optionMaybe Parser (FieldLine Position)
fieldContent
      ls <- many (do _ <- indentOfAtLeast ilevel; fieldContent)
      return $ case l of
        Maybe (FieldLine Position)
Nothing -> Name Position -> [FieldLine Position] -> Field Position
forall ann. Name ann -> [FieldLine ann] -> Field ann
Field Name Position
name [FieldLine Position]
ls
        Just FieldLine Position
l' -> Name Position -> [FieldLine Position] -> Field Position
forall ann. Name ann -> [FieldLine ann] -> Field ann
Field Name Position
name (FieldLine Position
l' FieldLine Position -> [FieldLine Position] -> [FieldLine Position]
forall a. a -> [a] -> [a]
: [FieldLine Position]
ls)

-- The body of a section, using either layout style or braces style.
--
-- sectionLayoutOrBraces ::= '\\n'? '{' elements \\n? '}'
--                         | elements
sectionLayoutOrBraces :: IndentLevel -> Parser [Field Position]
sectionLayoutOrBraces :: IndentLevel -> Parser [Field Position]
sectionLayoutOrBraces IndentLevel
ilevel =
  ( do
      Parser ()
openBrace
      elems <- IndentLevel -> Parser [Field Position]
elements IndentLevel
zeroIndentLevel
      optional tokIndent
      closeBrace
      return elems
  )
    Parser [Field Position]
-> Parser [Field Position] -> Parser [Field Position]
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (IndentLevel -> Parser [Field Position]
elements IndentLevel
ilevel)

-- The body of a field, using either inline style or braces.
--
-- fieldInlineOrBraces   ::= '\\n'? '{' content '}'
--                         | content
fieldInlineOrBraces :: Name Position -> Parser (Field Position)
fieldInlineOrBraces :: Name Position -> ParsecT LexState' () Identity (Field Position)
fieldInlineOrBraces Name Position
name =
  ( do
      Parser ()
openBrace
      ls <- LexerMode
-> Parser [FieldLine Position] -> Parser [FieldLine Position]
forall p. LexerMode -> Parser p -> Parser p
inLexerMode (Int -> LexerMode
LexerMode Int
in_field_braces) (Parser (FieldLine Position) -> Parser [FieldLine Position]
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (FieldLine Position)
fieldContent)
      closeBrace
      return (Field name ls)
  )
    ParsecT LexState' () Identity (Field Position)
-> ParsecT LexState' () Identity (Field Position)
-> ParsecT LexState' () Identity (Field Position)
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ( do
            ls <- LexerMode
-> Parser [FieldLine Position] -> Parser [FieldLine Position]
forall p. LexerMode -> Parser p -> Parser p
inLexerMode (Int -> LexerMode
LexerMode Int
in_field_braces) ([FieldLine Position]
-> Parser [FieldLine Position] -> Parser [FieldLine Position]
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option [] ((FieldLine Position -> [FieldLine Position])
-> Parser (FieldLine Position) -> Parser [FieldLine Position]
forall a b.
(a -> b)
-> ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\FieldLine Position
l -> [FieldLine Position
l]) Parser (FieldLine Position)
fieldContent))
            return (Field name ls)
        )

-- | Parse cabal style 'B8.ByteString' into list of 'Field's, i.e. the cabal AST.
--
-- 'readFields' assumes that input 'B8.ByteString' is valid UTF8, specifically it doesn't validate that file is valid UTF8.
-- Therefore bytestrings inside returned 'Field' will be invalid as UTF8 if the input were.
--
-- >>> readFields "foo: \223"
-- Right [Field (Name (Position 1 1) "foo") [FieldLine (Position 1 6) "\223"]]
--
-- 'readFields' won't (necessarily) fail on invalid UTF8 data, but the reported positions may be off.
--
-- __You may get weird errors on non-UTF8 input__, for example 'readFields' will fail on latin1 encoded non-breaking space:
--
-- >>> isLeft (readFields "\xa0 foo: bar")
-- True
--
-- That is rejected because parser thinks @\\xa0@ is a section name,
-- and section arguments may not contain colon.
-- If there are just latin1 non-breaking spaces, they become part of the name:
--
-- >>> readFields "\xa0\&foo: bar"
-- Right [Field (Name (Position 1 1) "\160foo") [FieldLine (Position 1 7) "bar"]]
--
-- The UTF8 non-breaking space is accepted as an indentation character (but warned about by 'readFields'').
--
-- >>> readFields' "\xc2\xa0 foo: bar"
-- Right ([Field (Name (Position 1 3) "foo") [FieldLine (Position 1 8) "bar"]],[LexWarning LexWarningNBSP (Position 1 1)])
readFields :: B8.ByteString -> Either ParseError [Field Position]
readFields :: ByteString -> Either ParseError [Field Position]
readFields ByteString
s = (([Field Position], [LexWarning]) -> [Field Position])
-> Either ParseError ([Field Position], [LexWarning])
-> Either ParseError [Field Position]
forall a b. (a -> b) -> Either ParseError a -> Either ParseError b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ([Field Position], [LexWarning]) -> [Field Position]
forall a b. (a, b) -> a
fst (ByteString -> Either ParseError ([Field Position], [LexWarning])
readFields' ByteString
s)

-- | Like 'readFields' but also return lexer warnings.
readFields' :: B8.ByteString -> Either ParseError ([Field Position], [LexWarning])
readFields' :: ByteString -> Either ParseError ([Field Position], [LexWarning])
readFields' ByteString
s = do
  Parsec LexState' () ([Field Position], [LexWarning])
-> [Char]
-> LexState'
-> Either ParseError ([Field Position], [LexWarning])
forall s t a.
Stream s Identity t =>
Parsec s () a -> [Char] -> s -> Either ParseError a
parse Parsec LexState' () ([Field Position], [LexWarning])
parser [Char]
"the input" LexState'
lexSt
  where
    parser :: Parsec LexState' () ([Field Position], [LexWarning])
parser = do
      fields <- Parser [Field Position]
cabalStyleFile
      ws <- getLexerWarnings -- lexer accumulates warnings in reverse (consing them to the list)
      pure (fields, reverse ws ++ checkIndentation fields [])

    lexSt :: LexState'
lexSt = LexState -> LexState'
mkLexState' (ByteString -> LexState
mkLexState ByteString
s)

-- | Check (recursively) that all fields inside a block are indented the same.
--
-- We have to do this as a post-processing check.
-- As the parser uses indentOfAtLeast approach, we don't know what is the "correct"
-- indentation for following fields.
--
-- To catch during parsing we would need to parse first field/section of a section
-- and then parse the following ones (softly) requiring the exactly the same indentation.
checkIndentation :: [Field Position] -> [LexWarning] -> [LexWarning]
checkIndentation :: [Field Position] -> [LexWarning] -> [LexWarning]
checkIndentation [] = [LexWarning] -> [LexWarning]
forall a. a -> a
id
checkIndentation (Field Name Position
name [FieldLine Position]
_ : [Field Position]
fs') = Position -> [Field Position] -> [LexWarning] -> [LexWarning]
checkIndentation' (Name Position -> Position
forall ann. Name ann -> ann
nameAnn Name Position
name) [Field Position]
fs'
checkIndentation (Section Name Position
name [SectionArg Position]
_ [Field Position]
fs : [Field Position]
fs') = [Field Position] -> [LexWarning] -> [LexWarning]
checkIndentation [Field Position]
fs ([LexWarning] -> [LexWarning])
-> ([LexWarning] -> [LexWarning]) -> [LexWarning] -> [LexWarning]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Position -> [Field Position] -> [LexWarning] -> [LexWarning]
checkIndentation' (Name Position -> Position
forall ann. Name ann -> ann
nameAnn Name Position
name) [Field Position]
fs'

-- | We compare adjacent fields to reduce the amount of reported indentation warnings.
checkIndentation' :: Position -> [Field Position] -> [LexWarning] -> [LexWarning]
checkIndentation' :: Position -> [Field Position] -> [LexWarning] -> [LexWarning]
checkIndentation' Position
_ [] = [LexWarning] -> [LexWarning]
forall a. a -> a
id
checkIndentation' Position
pos (Field Name Position
name [FieldLine Position]
_ : [Field Position]
fs') = Position -> Position -> [LexWarning] -> [LexWarning]
checkIndentation'' Position
pos (Name Position -> Position
forall ann. Name ann -> ann
nameAnn Name Position
name) ([LexWarning] -> [LexWarning])
-> ([LexWarning] -> [LexWarning]) -> [LexWarning] -> [LexWarning]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Position -> [Field Position] -> [LexWarning] -> [LexWarning]
checkIndentation' (Name Position -> Position
forall ann. Name ann -> ann
nameAnn Name Position
name) [Field Position]
fs'
checkIndentation' Position
pos (Section Name Position
name [SectionArg Position]
_ [Field Position]
fs : [Field Position]
fs') = Position -> Position -> [LexWarning] -> [LexWarning]
checkIndentation'' Position
pos (Name Position -> Position
forall ann. Name ann -> ann
nameAnn Name Position
name) ([LexWarning] -> [LexWarning])
-> ([LexWarning] -> [LexWarning]) -> [LexWarning] -> [LexWarning]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Field Position] -> [LexWarning] -> [LexWarning]
checkIndentation [Field Position]
fs ([LexWarning] -> [LexWarning])
-> ([LexWarning] -> [LexWarning]) -> [LexWarning] -> [LexWarning]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Position -> [Field Position] -> [LexWarning] -> [LexWarning]
checkIndentation' (Name Position -> Position
forall ann. Name ann -> ann
nameAnn Name Position
name) [Field Position]
fs'

-- | Check that positions' columns are the same.
checkIndentation'' :: Position -> Position -> [LexWarning] -> [LexWarning]
checkIndentation'' :: Position -> Position -> [LexWarning] -> [LexWarning]
checkIndentation'' Position
a Position
b
  | Position -> Int
positionCol Position
a Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Position -> Int
positionCol Position
b = [LexWarning] -> [LexWarning]
forall a. a -> a
id
  | Bool
otherwise = (LexWarningType -> Position -> LexWarning
LexWarning LexWarningType
LexInconsistentIndentation Position
b LexWarning -> [LexWarning] -> [LexWarning]
forall a. a -> [a] -> [a]
:)

#ifdef CABAL_PARSEC_DEBUG
parseTest' :: Show a => Parsec LexState' () a -> SourceName -> B8.ByteString -> IO ()
parseTest' p fname s =
    case parse p fname (lexSt s) of
      Left err -> putStrLn (formatError s err)

      Right x  -> print x
  where
    lexSt = mkLexState' . mkLexState

parseFile :: Show a => Parser a -> FilePath -> IO ()
parseFile p f = B8.readFile f >>= \s -> parseTest' p f s

parseStr  :: Show a => Parser a -> String -> IO ()
parseStr p = parseBS p . B8.pack

parseBS  :: Show a => Parser a -> B8.ByteString -> IO ()
parseBS p = parseTest' p "<input string>"

formatError :: B8.ByteString -> ParseError -> String
formatError input perr =
    unlines
      [ "Parse error "++ show (errorPos perr) ++ ":"
      , errLine
      , indicator ++ errmsg ]
  where
    pos       = errorPos perr
    ls        = lines' (T.decodeUtf8With T.lenientDecode input)
    errLine   = T.unpack (ls !! (sourceLine pos - 1))
    indicator = replicate (sourceColumn pos) ' ' ++ "^"
    errmsg    = showErrorMessages "or" "unknown parse error"
                                  "expecting" "unexpected" "end of file"
                                  (errorMessages perr)

-- | Handles windows/osx/unix line breaks uniformly
lines' :: T.Text -> [T.Text]
lines' s1
  | T.null s1 = []
  | otherwise = case T.break (\c -> c == '\r' || c == '\n') s1 of
                  (l, s2) | Just (c,s3) <- T.uncons s2
                         -> case T.uncons s3 of
                              Just ('\n', s4) | c == '\r' -> l : lines' s4
                              _               -> l : lines' s3
                          | otherwise -> [l]
#endif

eof :: Parser ()
eof :: Parser ()
eof = Parser LToken -> Parser ()
notFollowedBy Parser LToken
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m t
anyToken Parser () -> [Char] -> Parser ()
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Char] -> ParsecT s u m a
<?> [Char]
"end of file"
  where
    notFollowedBy :: Parser LToken -> Parser ()
    notFollowedBy :: Parser LToken -> Parser ()
notFollowedBy Parser LToken
p =
      Parser () -> Parser ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try
        ( (do L _ t <- Parser LToken -> Parser LToken
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try Parser LToken
p; unexpected (describeToken t))
            Parser () -> Parser () -> Parser ()
forall a.
ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
-> ParsecT LexState' () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> () -> Parser ()
forall a. a -> ParsecT LexState' () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
        )