-----------------------------------------------------------------------------

-- This version is interesting because instead of striping comment lines, it
-- turns them into "-- " style comments. This allows using haddock markup
-- in literate scripts without having to use "> --" prefix.

-- |
-- Module      :  Distribution.Simple.PreProcess.Unlit
-- Copyright   :  ...
--
-- Maintainer  :  cabal-devel@haskell.org
-- Portability :  portable
--
-- Remove the \"literal\" markups from a Haskell source file, including
-- \"@>@\", \"@\\begin{code}@\", \"@\\end{code}@\", and \"@#@\"
module Distribution.Simple.PreProcess.Unlit (unlit, plain) where

import Data.List (mapAccumL)
import Distribution.Compat.Prelude
import Distribution.Simple.Errors
import Distribution.Utils.Generic (safeInit, safeLast, safeTail)
import Prelude ()

data Classified
  = BirdTrack String
  | Blank String
  | Ordinary String
  | Line !Int String
  | CPP String
  | BeginCode
  | EndCode
  | -- output only:
    Error String
  | Comment String

-- | No unliteration.
plain :: String -> String -> String
plain :: [Char] -> [Char] -> [Char]
plain [Char]
_ [Char]
hs = [Char]
hs

classify :: String -> Classified
classify :: [Char] -> Classified
classify (Char
'>' : [Char]
s) = [Char] -> Classified
BirdTrack [Char]
s
classify (Char
'#' : [Char]
s) = case [Char] -> [[Char]]
tokens [Char]
s of
  ([Char]
line : file :: [Char]
file@(Char
'"' : Char
_ : [Char]
_) : [[Char]]
_)
    | (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit [Char]
line
        Bool -> Bool -> Bool
&& [Char] -> Maybe Char
forall a. [a] -> Maybe a
safeLast [Char]
file Maybe Char -> Maybe Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'"' ->
        -- this shouldn't fail as we tested for 'all isDigit'
        Int -> [Char] -> Classified
Line (Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe ([Char] -> Int
forall a. HasCallStack => [Char] -> a
error ([Char] -> Int) -> [Char] -> Int
forall a b. (a -> b) -> a -> b
$ [Char]
"panic! read @Int " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char] -> [Char]
forall a. Show a => a -> [Char]
show [Char]
line) (Maybe Int -> Int) -> Maybe Int -> Int
forall a b. (a -> b) -> a -> b
$ [Char] -> Maybe Int
forall a. Read a => [Char] -> Maybe a
readMaybe [Char]
line) ([Char] -> [Char]
forall a. [a] -> [a]
safeTail ([Char] -> [Char]
forall a. [a] -> [a]
safeInit [Char]
file)) -- TODO:eradicateNoParse
  [[Char]]
_ -> [Char] -> Classified
CPP [Char]
s
  where
    tokens :: [Char] -> [[Char]]
tokens = ([Char] -> Maybe ([Char], [Char])) -> [Char] -> [[Char]]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr (([Char] -> Maybe ([Char], [Char])) -> [Char] -> [[Char]])
-> ([Char] -> Maybe ([Char], [Char])) -> [Char] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ \[Char]
str -> case ReadS [Char]
lex [Char]
str of
      (t :: [Char]
t@(Char
_ : [Char]
_), [Char]
str') : [([Char], [Char])]
_ -> ([Char], [Char]) -> Maybe ([Char], [Char])
forall a. a -> Maybe a
Just ([Char]
t, [Char]
str')
      [([Char], [Char])]
_ -> Maybe ([Char], [Char])
forall a. Maybe a
Nothing
classify (Char
'\\' : [Char]
s)
  | [Char]
"begin{code}" [Char] -> [Char] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` [Char]
s = Classified
BeginCode
  | [Char]
"end{code}" [Char] -> [Char] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` [Char]
s = Classified
EndCode
classify [Char]
s | (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isSpace [Char]
s = [Char] -> Classified
Blank [Char]
s
classify [Char]
s = [Char] -> Classified
Ordinary [Char]
s

-- So the weird exception for comment indenting is to make things work with
-- haddock, see classifyAndCheckForBirdTracks below.
unclassify :: Bool -> Classified -> String
unclassify :: Bool -> Classified -> [Char]
unclassify Bool
_ (BirdTrack [Char]
s) = Char
' ' Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: [Char]
s
unclassify Bool
_ (Blank [Char]
s) = [Char]
s
unclassify Bool
_ (Ordinary [Char]
s) = [Char]
s
unclassify Bool
_ (Line Int
n [Char]
file) = [Char]
"# " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
n [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char] -> [Char]
forall a. Show a => a -> [Char]
show [Char]
file
unclassify Bool
_ (CPP [Char]
s) = Char
'#' Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: [Char]
s
unclassify Bool
True (Comment [Char]
"") = [Char]
"  --"
unclassify Bool
True (Comment [Char]
s) = [Char]
"  -- " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
s
unclassify Bool
False (Comment [Char]
"") = [Char]
"--"
unclassify Bool
False (Comment [Char]
s) = [Char]
"-- " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
s
unclassify Bool
_ Classified
_ = [Char]
forall a. a
internalError

-- | 'unlit' takes a filename (for error reports), and transforms the
--   given string, to eliminate the literate comments from the program text.
unlit :: FilePath -> String -> Either String CabalException
unlit :: [Char] -> [Char] -> Either [Char] CabalException
unlit [Char]
file [Char]
input =
  let (Bool
usesBirdTracks, [Classified]
classified) =
        [[Char]] -> (Bool, [Classified])
classifyAndCheckForBirdTracks
          ([[Char]] -> (Bool, [Classified]))
-> ([Char] -> [[Char]]) -> [Char] -> (Bool, [Classified])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [[Char]]
inlines
          ([Char] -> (Bool, [Classified])) -> [Char] -> (Bool, [Classified])
forall a b. (a -> b) -> a -> b
$ [Char]
input
   in ([Classified] -> Either [Char] CabalException)
-> (CabalException -> Either [Char] CabalException)
-> Either [Classified] CabalException
-> Either [Char] CabalException
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
        ([Char] -> Either [Char] CabalException
forall a b. a -> Either a b
Left ([Char] -> Either [Char] CabalException)
-> ([Classified] -> [Char])
-> [Classified]
-> Either [Char] CabalException
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[Char]] -> [Char]
unlines ([[Char]] -> [Char])
-> ([Classified] -> [[Char]]) -> [Classified] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Classified -> [Char]) -> [Classified] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map (Bool -> Classified -> [Char]
unclassify Bool
usesBirdTracks))
        CabalException -> Either [Char] CabalException
forall a b. b -> Either a b
Right
        (Either [Classified] CabalException
 -> Either [Char] CabalException)
-> ([Classified] -> Either [Classified] CabalException)
-> [Classified]
-> Either [Char] CabalException
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Classified] -> Either [Classified] CabalException
checkErrors
        ([Classified] -> Either [Classified] CabalException)
-> ([Classified] -> [Classified])
-> [Classified]
-> Either [Classified] CabalException
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Classified] -> [Classified]
reclassify
        ([Classified] -> Either [Char] CabalException)
-> [Classified] -> Either [Char] CabalException
forall a b. (a -> b) -> a -> b
$ [Classified]
classified
  where
    -- So haddock requires comments and code to align, since it treats comments
    -- as following the layout rule. This is a pain for us since bird track
    -- style literate code typically gets indented by two since ">" is replaced
    -- by " " and people usually use one additional space of indent ie
    -- "> then the code". On the other hand we cannot just go and indent all
    -- the comments by two since that does not work for latex style literate
    -- code. So the hacky solution we use here is that if we see any bird track
    -- style code then we'll indent all comments by two, otherwise by none.
    -- Of course this will not work for mixed latex/bird track .lhs files but
    -- nobody does that, it's silly and specifically recommended against in the
    -- H98 unlit spec.
    --
    classifyAndCheckForBirdTracks :: [[Char]] -> (Bool, [Classified])
classifyAndCheckForBirdTracks =
      ((Bool -> [Char] -> (Bool, Classified))
 -> Bool -> [[Char]] -> (Bool, [Classified]))
-> Bool
-> (Bool -> [Char] -> (Bool, Classified))
-> [[Char]]
-> (Bool, [Classified])
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Bool -> [Char] -> (Bool, Classified))
-> Bool -> [[Char]] -> (Bool, [Classified])
forall (t :: * -> *) s a b.
Traversable t =>
(s -> a -> (s, b)) -> s -> t a -> (s, t b)
mapAccumL Bool
False ((Bool -> [Char] -> (Bool, Classified))
 -> [[Char]] -> (Bool, [Classified]))
-> (Bool -> [Char] -> (Bool, Classified))
-> [[Char]]
-> (Bool, [Classified])
forall a b. (a -> b) -> a -> b
$ \Bool
seenBirdTrack [Char]
line ->
        let classification :: Classified
classification = [Char] -> Classified
classify [Char]
line
         in (Bool
seenBirdTrack Bool -> Bool -> Bool
|| Classified -> Bool
isBirdTrack Classified
classification, Classified
classification)

    isBirdTrack :: Classified -> Bool
isBirdTrack (BirdTrack [Char]
_) = Bool
True
    isBirdTrack Classified
_ = Bool
False

    checkErrors :: [Classified] -> Either [Classified] CabalException
checkErrors [Classified]
ls = case [[Char]
e | Error [Char]
e <- [Classified]
ls] of
      [] -> [Classified] -> Either [Classified] CabalException
forall a b. a -> Either a b
Left [Classified]
ls
      ([Char]
message : [[Char]]
_) -> CabalException -> Either [Classified] CabalException
forall a b. b -> Either a b
Right ([Char] -> CabalException
UnlitException ([Char]
f [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
":" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
n [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
": " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
message))
        where
          ([Char]
f, Int
n) = [Char] -> Int -> [Classified] -> ([Char], Int)
errorPos [Char]
file Int
1 [Classified]
ls
    errorPos :: [Char] -> Int -> [Classified] -> ([Char], Int)
errorPos [Char]
f Int
n [] = ([Char]
f, Int
n)
    errorPos [Char]
f Int
n (Error [Char]
_ : [Classified]
_) = ([Char]
f, Int
n)
    errorPos [Char]
_ Int
_ (Line Int
n' [Char]
f' : [Classified]
ls) = [Char] -> Int -> [Classified] -> ([Char], Int)
errorPos [Char]
f' Int
n' [Classified]
ls
    errorPos [Char]
f Int
n (Classified
_ : [Classified]
ls) = [Char] -> Int -> [Classified] -> ([Char], Int)
errorPos [Char]
f (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [Classified]
ls

-- Here we model a state machine, with each state represented by
-- a local function. We only have four states (well, five,
-- if you count the error state), but the rules
-- to transition between then are not so simple.
-- Would it be simpler to have more states?
--
-- Each state represents the type of line that was last read
-- i.e. are we in a comment section, or a latex-code section,
-- or a bird-code section, etc?
reclassify :: [Classified] -> [Classified]
reclassify :: [Classified] -> [Classified]
reclassify = [Classified] -> [Classified]
blank -- begin in blank state
  where
    latex :: [Classified] -> [Classified]
latex [] = []
    latex (Classified
EndCode : [Classified]
ls) = [Char] -> Classified
Blank [Char]
"" Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    latex (Classified
BeginCode : [Classified]
_) = [[Char] -> Classified
Error [Char]
"\\begin{code} in code section"]
    latex (BirdTrack [Char]
l : [Classified]
ls) = [Char] -> Classified
Ordinary (Char
'>' Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: [Char]
l) Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls
    latex (Classified
l : [Classified]
ls) = Classified
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls

    blank :: [Classified] -> [Classified]
blank [] = []
    blank (Classified
EndCode : [Classified]
_) = [[Char] -> Classified
Error [Char]
"\\end{code} without \\begin{code}"]
    blank (Classified
BeginCode : [Classified]
ls) = [Char] -> Classified
Blank [Char]
"" Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls
    blank (BirdTrack [Char]
l : [Classified]
ls) = [Char] -> Classified
BirdTrack [Char]
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
bird [Classified]
ls
    blank (Ordinary [Char]
l : [Classified]
ls) = [Char] -> Classified
Comment [Char]
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    blank (Classified
l : [Classified]
ls) = Classified
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
blank [Classified]
ls

    bird :: [Classified] -> [Classified]
bird [] = []
    bird (Classified
EndCode : [Classified]
_) = [[Char] -> Classified
Error [Char]
"\\end{code} without \\begin{code}"]
    bird (Classified
BeginCode : [Classified]
ls) = [Char] -> Classified
Blank [Char]
"" Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls
    bird (Blank [Char]
l : [Classified]
ls) = [Char] -> Classified
Blank [Char]
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
blank [Classified]
ls
    bird (Ordinary [Char]
_ : [Classified]
_) = [[Char] -> Classified
Error [Char]
"program line before comment line"]
    bird (Classified
l : [Classified]
ls) = Classified
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
bird [Classified]
ls

    comment :: [Classified] -> [Classified]
comment [] = []
    comment (Classified
EndCode : [Classified]
_) = [[Char] -> Classified
Error [Char]
"\\end{code} without \\begin{code}"]
    comment (Classified
BeginCode : [Classified]
ls) = [Char] -> Classified
Blank [Char]
"" Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls
    comment (CPP [Char]
l : [Classified]
ls) = [Char] -> Classified
CPP [Char]
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    comment (BirdTrack [Char]
_ : [Classified]
_) = [[Char] -> Classified
Error [Char]
"comment line before program line"]
    -- a blank line and another ordinary line following a comment
    -- will be treated as continuing the comment. Otherwise it's
    -- then end of the comment, with a blank line.
    comment (Blank [Char]
l : ls :: [Classified]
ls@(Ordinary [Char]
_ : [Classified]
_)) = [Char] -> Classified
Comment [Char]
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    comment (Blank [Char]
l : [Classified]
ls) = [Char] -> Classified
Blank [Char]
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
blank [Classified]
ls
    comment (Line Int
n [Char]
f : [Classified]
ls) = Int -> [Char] -> Classified
Line Int
n [Char]
f Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    comment (Ordinary [Char]
l : [Classified]
ls) = [Char] -> Classified
Comment [Char]
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    comment (Comment [Char]
_ : [Classified]
_) = [Classified]
forall a. a
internalError
    comment (Error [Char]
_ : [Classified]
_) = [Classified]
forall a. a
internalError

-- Re-implementation of 'lines', for better efficiency (but decreased laziness).
-- Also, importantly, accepts non-standard DOS and Mac line ending characters.
inlines :: String -> [String]
inlines :: [Char] -> [[Char]]
inlines [Char]
xs = [Char] -> ([Char] -> [Char]) -> [[Char]]
lines' [Char]
xs [Char] -> [Char]
forall a. a -> a
id
  where
    lines' :: [Char] -> ([Char] -> [Char]) -> [[Char]]
lines' [] [Char] -> [Char]
acc = [[Char] -> [Char]
acc []]
    lines' (Char
'\^M' : Char
'\n' : [Char]
s) [Char] -> [Char]
acc = [Char] -> [Char]
acc [] [Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [Char] -> ([Char] -> [Char]) -> [[Char]]
lines' [Char]
s [Char] -> [Char]
forall a. a -> a
id -- DOS
    lines' (Char
'\^M' : [Char]
s) [Char] -> [Char]
acc = [Char] -> [Char]
acc [] [Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [Char] -> ([Char] -> [Char]) -> [[Char]]
lines' [Char]
s [Char] -> [Char]
forall a. a -> a
id -- MacOS
    lines' (Char
'\n' : [Char]
s) [Char] -> [Char]
acc = [Char] -> [Char]
acc [] [Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [Char] -> ([Char] -> [Char]) -> [[Char]]
lines' [Char]
s [Char] -> [Char]
forall a. a -> a
id -- Unix
    lines' (Char
c : [Char]
s) [Char] -> [Char]
acc = [Char] -> ([Char] -> [Char]) -> [[Char]]
lines' [Char]
s ([Char] -> [Char]
acc ([Char] -> [Char]) -> ([Char] -> [Char]) -> [Char] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char
c Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:))

internalError :: a
internalError :: forall a. a
internalError = [Char] -> a
forall a. HasCallStack => [Char] -> a
error [Char]
"unlit: internal error"