{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}

-- |
-- Module      :  Documentation.Haddock.Parser.Monad
-- Copyright   :  (c) Alec Theriault 2018-2019,
-- License     :  BSD-like
--
-- Maintainer  :  haddock@projects.haskell.org
-- Stability   :  experimental
-- Portability :  portable
--
-- Defines the Parsec monad over which all parsing is done and also provides
-- more efficient versions of the usual parsec combinator functions (but
-- specialized to 'Text').
module Documentation.Haddock.Parser.Monad where

import Control.Applicative as App
import Control.Monad (mfilter)
import Data.Bits (Bits (..))
import Data.Char (ord)
import Data.Functor (($>))
import qualified Data.List as List
import Data.String (IsString (..))
import Data.Text (Text)
import qualified Data.Text as T
import Text.Parsec
  ( State (..)
  , getParserState
  , setParserState
  )
import qualified Text.Parsec as Parsec
import Text.Parsec.Pos (updatePosChar)
import Prelude hiding (takeWhile)

import Documentation.Haddock.Types (MetaSince (..))

-- | The only bit of information we really care about trudging along with us
-- through parsing is the version attached to a @\@since@ annotation - if
-- the doc even contained one.
newtype ParserState = ParserState
  { ParserState -> Maybe MetaSince
parserStateSince :: Maybe MetaSince
  }
  deriving (ParserState -> ParserState -> Bool
(ParserState -> ParserState -> Bool)
-> (ParserState -> ParserState -> Bool) -> Eq ParserState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: ParserState -> ParserState -> Bool
== :: ParserState -> ParserState -> Bool
$c/= :: ParserState -> ParserState -> Bool
/= :: ParserState -> ParserState -> Bool
Eq, Int -> ParserState -> ShowS
[ParserState] -> ShowS
ParserState -> String
(Int -> ParserState -> ShowS)
-> (ParserState -> String)
-> ([ParserState] -> ShowS)
-> Show ParserState
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> ParserState -> ShowS
showsPrec :: Int -> ParserState -> ShowS
$cshow :: ParserState -> String
show :: ParserState -> String
$cshowList :: [ParserState] -> ShowS
showList :: [ParserState] -> ShowS
Show)

initialParserState :: ParserState
initialParserState :: ParserState
initialParserState = Maybe MetaSince -> ParserState
ParserState Maybe MetaSince
forall a. Maybe a
Nothing

setSince :: MetaSince -> Parser ()
setSince :: MetaSince -> Parser ()
setSince MetaSince
since = (ParserState -> ParserState) -> Parser ()
forall (m :: Type -> Type) u s.
Monad m =>
(u -> u) -> ParsecT s u m ()
Parsec.modifyState (\ParserState
st -> ParserState
st{parserStateSince = Just since})

type Parser = Parsec.Parsec Text ParserState

instance a ~ Text => IsString (Parser a) where
  fromString :: String -> Parser a
fromString = (String -> a)
-> ParsecT Text ParserState Identity String -> Parser a
forall a b.
(a -> b)
-> ParsecT Text ParserState Identity a
-> ParsecT Text ParserState Identity b
forall (f :: Type -> Type) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> a
String -> Text
T.pack (ParsecT Text ParserState Identity String -> Parser a)
-> (String -> ParsecT Text ParserState Identity String)
-> String
-> Parser a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ParsecT Text ParserState Identity String
forall s (m :: Type -> Type) u.
Stream s m Char =>
String -> ParsecT s u m String
Parsec.string

parseOnly :: Parser a -> Text -> Either String (ParserState, a)
parseOnly :: forall a. Parser a -> Text -> Either String (ParserState, a)
parseOnly Parser a
p Text
t = case Parsec Text ParserState (a, ParserState)
-> ParserState
-> String
-> Text
-> Either ParseError (a, ParserState)
forall s t u a.
Stream s Identity t =>
Parsec s u a -> u -> String -> s -> Either ParseError a
Parsec.runParser Parsec Text ParserState (a, ParserState)
p' ParserState
initialParserState String
"<haddock>" Text
t of
  Left ParseError
e -> String -> Either String (ParserState, a)
forall a b. a -> Either a b
Left (ParseError -> String
forall a. Show a => a -> String
show ParseError
e)
  Right (a
x, ParserState
s) -> (ParserState, a) -> Either String (ParserState, a)
forall a b. b -> Either a b
Right (ParserState
s, a
x)
  where
    p' :: Parsec Text ParserState (a, ParserState)
p' = (,) (a -> ParserState -> (a, ParserState))
-> Parser a
-> ParsecT
     Text ParserState Identity (ParserState -> (a, ParserState))
forall (f :: Type -> Type) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser a
p ParsecT Text ParserState Identity (ParserState -> (a, ParserState))
-> ParsecT Text ParserState Identity ParserState
-> Parsec Text ParserState (a, ParserState)
forall a b.
ParsecT Text ParserState Identity (a -> b)
-> ParsecT Text ParserState Identity a
-> ParsecT Text ParserState Identity b
forall (f :: Type -> Type) a b.
Applicative f =>
f (a -> b) -> f a -> f b
<*> ParsecT Text ParserState Identity ParserState
forall (m :: Type -> Type) s u. Monad m => ParsecT s u m u
Parsec.getState

-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not
-- consume input.
--
-- Equivalent to @Parsec.optionMaybe . Parsec.lookAhead $ Parsec.anyChar@, but
-- more efficient.
peekChar :: Parser (Maybe Char)
peekChar :: Parser (Maybe Char)
peekChar = Text -> Maybe Char
headOpt (Text -> Maybe Char)
-> (State Text ParserState -> Text)
-> State Text ParserState
-> Maybe Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. State Text ParserState -> Text
forall s u. State s u -> s
stateInput (State Text ParserState -> Maybe Char)
-> ParsecT Text ParserState Identity (State Text ParserState)
-> Parser (Maybe Char)
forall (f :: Type -> Type) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: Type -> Type) s u.
Monad m =>
ParsecT s u m (State s u)
getParserState
  where
    headOpt :: Text -> Maybe Char
headOpt Text
t
      | Text -> Bool
T.null Text
t = Maybe Char
forall a. Maybe a
Nothing
      | Bool
otherwise = Char -> Maybe Char
forall a. a -> Maybe a
Just (HasCallStack => Text -> Char
Text -> Char
T.head Text
t)
{-# INLINE peekChar #-}

-- | Fails if at the end of input. Does not consume input.
--
-- Equivalent to @Parsec.lookAhead Parsec.anyChar@, but more efficient.
peekChar' :: Parser Char
peekChar' :: Parser Char
peekChar' = Text -> Parser Char
forall {s} {u} {m :: Type -> Type}. Text -> ParsecT s u m Char
headFail (Text -> Parser Char)
-> (State Text ParserState -> Text)
-> State Text ParserState
-> Parser Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. State Text ParserState -> Text
forall s u. State s u -> s
stateInput (State Text ParserState -> Parser Char)
-> ParsecT Text ParserState Identity (State Text ParserState)
-> Parser Char
forall (m :: Type -> Type) a b. Monad m => (a -> m b) -> m a -> m b
=<< ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: Type -> Type) s u.
Monad m =>
ParsecT s u m (State s u)
getParserState
  where
    headFail :: Text -> ParsecT s u m Char
headFail Text
t
      | Text -> Bool
T.null Text
t = String -> ParsecT s u m Char
forall s u (m :: Type -> Type) a. String -> ParsecT s u m a
Parsec.parserFail String
"peekChar': reached EOF"
      | Bool
otherwise = Char -> ParsecT s u m Char
forall a. a -> ParsecT s u m a
forall (f :: Type -> Type) a. Applicative f => a -> f a
App.pure (HasCallStack => Text -> Char
Text -> Char
T.head Text
t)
{-# INLINE peekChar' #-}

-- | Parses the given string. Returns the parsed string.
--
-- Equivalent to @Parsec.string (T.unpack t) $> t@, but more efficient.
string :: Text -> Parser Text
string :: Text -> Parser Text
string Text
t = do
  s@State{stateInput = inp, statePos = pos} <- ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: Type -> Type) s u.
Monad m =>
ParsecT s u m (State s u)
getParserState
  case T.stripPrefix t inp of
    Maybe Text
Nothing -> String -> Parser Text
forall s u (m :: Type -> Type) a. String -> ParsecT s u m a
Parsec.parserFail String
"string: Failed to match the input string"
    Just Text
inp' ->
      let pos' :: SourcePos
pos' = (SourcePos -> Char -> SourcePos) -> SourcePos -> Text -> SourcePos
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl SourcePos -> Char -> SourcePos
updatePosChar SourcePos
pos Text
t
          s' :: State Text ParserState
s' = State Text ParserState
s{stateInput = inp', statePos = pos'}
       in State Text ParserState
-> ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: Type -> Type) s u.
Monad m =>
State s u -> ParsecT s u m (State s u)
setParserState State Text ParserState
s' ParsecT Text ParserState Identity (State Text ParserState)
-> Text -> Parser Text
forall (f :: Type -> Type) a b. Functor f => f a -> b -> f b
$> Text
t

-- | Keep matching characters as long as the predicate function holds (and
-- return them).
--
-- Equivalent to @fmap T.pack . Parsec.many@, but more efficient.
takeWhile :: (Char -> Bool) -> Parser Text
takeWhile :: (Char -> Bool) -> Parser Text
takeWhile Char -> Bool
f = do
  s@State{stateInput = inp, statePos = pos} <- ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: Type -> Type) s u.
Monad m =>
ParsecT s u m (State s u)
getParserState
  let (t, inp') = T.span f inp
      pos' = (SourcePos -> Char -> SourcePos) -> SourcePos -> Text -> SourcePos
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl SourcePos -> Char -> SourcePos
updatePosChar SourcePos
pos Text
t
      s' = State Text ParserState
s{stateInput = inp', statePos = pos'}
  setParserState s' $> t

-- | Like 'takeWhile', but fails if no characters matched.
--
-- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient.
takeWhile1 :: (Char -> Bool) -> Parser Text
takeWhile1 :: (Char -> Bool) -> Parser Text
takeWhile1 = (Text -> Bool) -> Parser Text -> Parser Text
forall (m :: Type -> Type) a.
MonadPlus m =>
(a -> Bool) -> m a -> m a
mfilter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) (Parser Text -> Parser Text)
-> ((Char -> Bool) -> Parser Text) -> (Char -> Bool) -> Parser Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Parser Text
takeWhile

-- | Scan the input text, accumulating characters as long as the scanning
-- function returns true.
scan
  :: (s -> Char -> Maybe s)
  -- ^ scan function
  -> s
  -- ^ initial state
  -> Parser Text
scan :: forall s. (s -> Char -> Maybe s) -> s -> Parser Text
scan s -> Char -> Maybe s
f s
st = do
  s@State{stateInput = inp, statePos = pos} <- ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: Type -> Type) s u.
Monad m =>
ParsecT s u m (State s u)
getParserState
  go inp st pos 0 $ \Text
inp' SourcePos
pos' Int
n ->
    let s' :: State Text ParserState
s' = State Text ParserState
s{Parsec.stateInput = inp', Parsec.statePos = pos'}
     in State Text ParserState
-> ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: Type -> Type) s u.
Monad m =>
State s u -> ParsecT s u m (State s u)
setParserState State Text ParserState
s' ParsecT Text ParserState Identity (State Text ParserState)
-> Text -> Parser Text
forall (f :: Type -> Type) a b. Functor f => f a -> b -> f b
$> Int -> Text -> Text
T.take Int
n Text
inp
  where
    go :: Text
-> s
-> SourcePos
-> Int
-> (Text -> SourcePos -> Int -> Parser Text)
-> Parser Text
go Text
inp s
s !SourcePos
pos !Int
n Text -> SourcePos -> Int -> Parser Text
cont =
      case Text -> Maybe (Char, Text)
T.uncons Text
inp of
        Maybe (Char, Text)
Nothing -> Text -> SourcePos -> Int -> Parser Text
cont Text
inp SourcePos
pos Int
n -- ran out of input
        Just (Char
c, Text
inp') ->
          case s -> Char -> Maybe s
f s
s Char
c of
            Maybe s
Nothing -> Text -> SourcePos -> Int -> Parser Text
cont Text
inp SourcePos
pos Int
n -- scan function failed
            Just s
s' -> Text
-> s
-> SourcePos
-> Int
-> (Text -> SourcePos -> Int -> Parser Text)
-> Parser Text
go Text
inp' s
s' (SourcePos -> Char -> SourcePos
updatePosChar SourcePos
pos Char
c) (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Text -> SourcePos -> Int -> Parser Text
cont

-- | Parse a decimal number.
decimal :: Integral a => Parser a
decimal :: forall a. Integral a => Parser a
decimal = (a -> Char -> a) -> a -> String -> a
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: Type -> Type) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
List.foldl' a -> Char -> a
forall {a}. Num a => a -> Char -> a
step a
0 (String -> a)
-> ParsecT Text ParserState Identity String
-> ParsecT Text ParserState Identity a
forall a b.
(a -> b)
-> ParsecT Text ParserState Identity a
-> ParsecT Text ParserState Identity b
forall (f :: Type -> Type) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Parser Char -> ParsecT Text ParserState Identity String
forall s u (m :: Type -> Type) a.
ParsecT s u m a -> ParsecT s u m [a]
Parsec.many1 Parser Char
forall s (m :: Type -> Type) u.
Stream s m Char =>
ParsecT s u m Char
Parsec.digit
  where
    step :: a -> Char -> a
step a
a Char
c = a
a a -> a -> a
forall a. Num a => a -> a -> a
* a
10 a -> a -> a
forall a. Num a => a -> a -> a
+ Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
48)

-- | Parse a hexadecimal number.
hexadecimal :: (Integral a, Bits a) => Parser a
hexadecimal :: forall a. (Integral a, Bits a) => Parser a
hexadecimal = (a -> Char -> a) -> a -> String -> a
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: Type -> Type) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
List.foldl' a -> Char -> a
forall {a}. (Bits a, Num a) => a -> Char -> a
step a
0 (String -> a)
-> ParsecT Text ParserState Identity String
-> ParsecT Text ParserState Identity a
forall a b.
(a -> b)
-> ParsecT Text ParserState Identity a
-> ParsecT Text ParserState Identity b
forall (f :: Type -> Type) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Parser Char -> ParsecT Text ParserState Identity String
forall s u (m :: Type -> Type) a.
ParsecT s u m a -> ParsecT s u m [a]
Parsec.many1 Parser Char
forall s (m :: Type -> Type) u.
Stream s m Char =>
ParsecT s u m Char
Parsec.hexDigit
  where
    step :: a -> Char -> a
step a
a Char
c
      | Int
w Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
48 Bool -> Bool -> Bool
&& Int
w Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
57 = (a
a a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
4) a -> a -> a
forall a. Bits a => a -> a -> a
.|. Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
w Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
48)
      | Int
w Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
97 = (a
a a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
4) a -> a -> a
forall a. Bits a => a -> a -> a
.|. Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
w Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
87)
      | Bool
otherwise = (a
a a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
4) a -> a -> a
forall a. Bits a => a -> a -> a
.|. Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
w Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
55)
      where
        w :: Int
w = Char -> Int
ord Char
c