-----------------------------------------------------------------------------
-- |
-- Module      :  Distribution.Simple.PreProcess.Unlit
-- Copyright   :  ...
--
-- Maintainer  :  cabal-devel@haskell.org
-- Portability :  portable
--
-- Remove the \"literal\" markups from a Haskell source file, including
-- \"@>@\", \"@\\begin{code}@\", \"@\\end{code}@\", and \"@#@\"

-- This version is interesting because instead of striping comment lines, it
-- turns them into "-- " style comments. This allows using haddock markup
-- in literate scripts without having to use "> --" prefix.

module Distribution.Simple.PreProcess.Unlit (unlit,plain) where

import Prelude ()
import Distribution.Compat.Prelude

import Data.List (mapAccumL)

data Classified = BirdTrack String | Blank String | Ordinary String
                | Line !Int String | CPP String
                | BeginCode | EndCode
                -- output only:
                | Error String | Comment String

-- | No unliteration.
plain :: String -> String -> String
plain :: String -> String -> String
plain String
_ String
hs = String
hs

classify :: String -> Classified
classify :: String -> Classified
classify (Char
'>':String
s) = String -> Classified
BirdTrack String
s
classify (Char
'#':String
s) = case String -> [String]
tokens String
s of
                     (String
line:String
file:[String]
_) | (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit String
line
                                  Bool -> Bool -> Bool
&& String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
file Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
2
                                  Bool -> Bool -> Bool
&& String -> Char
forall a. [a] -> a
head String
file Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"'
                                  Bool -> Bool -> Bool
&& String -> Char
forall a. [a] -> a
last String
file Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"'
                                -- this shouldn't fail as we tested for 'all isDigit'
                                -> Int -> String -> Classified
Line (Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe (String -> Int
forall a. HasCallStack => String -> a
error (String -> Int) -> String -> Int
forall a b. (a -> b) -> a -> b
$ String
"panic! read @Int " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
line) (Maybe Int -> Int) -> Maybe Int -> Int
forall a b. (a -> b) -> a -> b
$ String -> Maybe Int
forall a. Read a => String -> Maybe a
readMaybe String
line) (String -> String
forall a. [a] -> [a]
tail (String -> String
forall a. [a] -> [a]
init String
file)) -- TODO:eradicateNoParse
                     [String]
_          -> String -> Classified
CPP String
s
  where tokens :: String -> [String]
tokens = (String -> Maybe (String, String)) -> String -> [String]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr ((String -> Maybe (String, String)) -> String -> [String])
-> (String -> Maybe (String, String)) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ \String
str -> case ReadS String
lex String
str of
                                   (t :: String
t@(Char
_:String
_), String
str'):[(String, String)]
_ -> (String, String) -> Maybe (String, String)
forall a. a -> Maybe a
Just (String
t, String
str')
                                   [(String, String)]
_                 -> Maybe (String, String)
forall a. Maybe a
Nothing
classify (Char
'\\':String
s)
  | String
"begin{code}" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` String
s = Classified
BeginCode
  | String
"end{code}"   String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` String
s = Classified
EndCode
classify String
s | (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isSpace String
s       = String -> Classified
Blank String
s
classify String
s                       = String -> Classified
Ordinary String
s

-- So the weird exception for comment indenting is to make things work with
-- haddock, see classifyAndCheckForBirdTracks below.
unclassify :: Bool -> Classified -> String
unclassify :: Bool -> Classified -> String
unclassify Bool
_     (BirdTrack String
s) = Char
' 'Char -> String -> String
forall a. a -> [a] -> [a]
:String
s
unclassify Bool
_     (Blank String
s)     = String
s
unclassify Bool
_     (Ordinary String
s)  = String
s
unclassify Bool
_     (Line Int
n String
file) = String
"# " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
file
unclassify Bool
_     (CPP String
s)       = Char
'#'Char -> String -> String
forall a. a -> [a] -> [a]
:String
s
unclassify Bool
True  (Comment String
"")  = String
"  --"
unclassify Bool
True  (Comment String
s)   = String
"  -- " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s
unclassify Bool
False (Comment String
"")  = String
"--"
unclassify Bool
False (Comment String
s)   = String
"-- " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s
unclassify Bool
_     Classified
_             = String
forall a. a
internalError

-- | 'unlit' takes a filename (for error reports), and transforms the
--   given string, to eliminate the literate comments from the program text.
unlit :: FilePath -> String -> Either String String
unlit :: String -> String -> Either String String
unlit String
file String
input =
  let (Bool
usesBirdTracks, [Classified]
classified) = [String] -> (Bool, [Classified])
classifyAndCheckForBirdTracks
                                   ([String] -> (Bool, [Classified]))
-> (String -> [String]) -> String -> (Bool, [Classified])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
inlines
                                   (String -> (Bool, [Classified])) -> String -> (Bool, [Classified])
forall a b. (a -> b) -> a -> b
$ String
input
   in ([Classified] -> Either String String)
-> (String -> Either String String)
-> Either [Classified] String
-> Either String String
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (String -> Either String String
forall a b. a -> Either a b
Left (String -> Either String String)
-> ([Classified] -> String) -> [Classified] -> Either String String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
unlines ([String] -> String)
-> ([Classified] -> [String]) -> [Classified] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Classified -> String) -> [Classified] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (Bool -> Classified -> String
unclassify Bool
usesBirdTracks))
              String -> Either String String
forall a b. b -> Either a b
Right
    (Either [Classified] String -> Either String String)
-> ([Classified] -> Either [Classified] String)
-> [Classified]
-> Either String String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Classified] -> Either [Classified] String
checkErrors
    ([Classified] -> Either [Classified] String)
-> ([Classified] -> [Classified])
-> [Classified]
-> Either [Classified] String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Classified] -> [Classified]
reclassify
    ([Classified] -> Either String String)
-> [Classified] -> Either String String
forall a b. (a -> b) -> a -> b
$ [Classified]
classified

  where
    -- So haddock requires comments and code to align, since it treats comments
    -- as following the layout rule. This is a pain for us since bird track
    -- style literate code typically gets indented by two since ">" is replaced
    -- by " " and people usually use one additional space of indent ie
    -- "> then the code". On the other hand we cannot just go and indent all
    -- the comments by two since that does not work for latex style literate
    -- code. So the hacky solution we use here is that if we see any bird track
    -- style code then we'll indent all comments by two, otherwise by none.
    -- Of course this will not work for mixed latex/bird track .lhs files but
    -- nobody does that, it's silly and specifically recommended against in the
    -- H98 unlit spec.
    --
    classifyAndCheckForBirdTracks :: [String] -> (Bool, [Classified])
classifyAndCheckForBirdTracks =
      ((Bool -> String -> (Bool, Classified))
 -> Bool -> [String] -> (Bool, [Classified]))
-> Bool
-> (Bool -> String -> (Bool, Classified))
-> [String]
-> (Bool, [Classified])
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Bool -> String -> (Bool, Classified))
-> Bool -> [String] -> (Bool, [Classified])
forall (t :: * -> *) a b c.
Traversable t =>
(a -> b -> (a, c)) -> a -> t b -> (a, t c)
mapAccumL Bool
False ((Bool -> String -> (Bool, Classified))
 -> [String] -> (Bool, [Classified]))
-> (Bool -> String -> (Bool, Classified))
-> [String]
-> (Bool, [Classified])
forall a b. (a -> b) -> a -> b
$ \Bool
seenBirdTrack String
line ->
        let classification :: Classified
classification = String -> Classified
classify String
line
         in (Bool
seenBirdTrack Bool -> Bool -> Bool
|| Classified -> Bool
isBirdTrack Classified
classification, Classified
classification)

    isBirdTrack :: Classified -> Bool
isBirdTrack (BirdTrack String
_) = Bool
True
    isBirdTrack Classified
_             = Bool
False

    checkErrors :: [Classified] -> Either [Classified] String
checkErrors [Classified]
ls = case [ String
e | Error String
e <- [Classified]
ls ] of
      []          -> [Classified] -> Either [Classified] String
forall a b. a -> Either a b
Left  [Classified]
ls
      (String
message:[String]
_) -> String -> Either [Classified] String
forall a b. b -> Either a b
Right (String
f String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
":" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
": " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
message)
        where (String
f, Int
n) = String -> Int -> [Classified] -> (String, Int)
errorPos String
file Int
1 [Classified]
ls
    errorPos :: String -> Int -> [Classified] -> (String, Int)
errorPos String
f Int
n []              = (String
f, Int
n)
    errorPos String
f Int
n (Error String
_:[Classified]
_)     = (String
f, Int
n)
    errorPos String
_ Int
_ (Line Int
n' String
f':[Classified]
ls) = String -> Int -> [Classified] -> (String, Int)
errorPos String
f' Int
n' [Classified]
ls
    errorPos String
f Int
n (Classified
_         :[Classified]
ls) = String -> Int -> [Classified] -> (String, Int)
errorPos String
f  (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) [Classified]
ls

-- Here we model a state machine, with each state represented by
-- a local function. We only have four states (well, five,
-- if you count the error state), but the rules
-- to transition between then are not so simple.
-- Would it be simpler to have more states?
--
-- Each state represents the type of line that was last read
-- i.e. are we in a comment section, or a latex-code section,
-- or a bird-code section, etc?
reclassify :: [Classified] -> [Classified]
reclassify :: [Classified] -> [Classified]
reclassify = [Classified] -> [Classified]
blank -- begin in blank state
  where
    latex :: [Classified] -> [Classified]
latex []               = []
    latex (Classified
EndCode    :[Classified]
ls) = String -> Classified
Blank String
"" Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    latex (Classified
BeginCode  :[Classified]
_ ) = [String -> Classified
Error String
"\\begin{code} in code section"]
    latex (BirdTrack String
l:[Classified]
ls) = String -> Classified
Ordinary (Char
'>'Char -> String -> String
forall a. a -> [a] -> [a]
:String
l) Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls
    latex (          Classified
l:[Classified]
ls) = Classified
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls

    blank :: [Classified] -> [Classified]
blank []               = []
    blank (Classified
EndCode    :[Classified]
_ ) = [String -> Classified
Error String
"\\end{code} without \\begin{code}"]
    blank (Classified
BeginCode  :[Classified]
ls) = String -> Classified
Blank String
""    Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls
    blank (BirdTrack String
l:[Classified]
ls) = String -> Classified
BirdTrack String
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
bird [Classified]
ls
    blank (Ordinary  String
l:[Classified]
ls) = String -> Classified
Comment   String
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    blank (          Classified
l:[Classified]
ls) =           Classified
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
blank [Classified]
ls

    bird :: [Classified] -> [Classified]
bird []              = []
    bird (Classified
EndCode   :[Classified]
_ ) = [String -> Classified
Error String
"\\end{code} without \\begin{code}"]
    bird (Classified
BeginCode :[Classified]
ls) = String -> Classified
Blank String
"" Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls
    bird (Blank String
l   :[Classified]
ls) = String -> Classified
Blank String
l  Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
blank [Classified]
ls
    bird (Ordinary String
_:[Classified]
_ ) = [String -> Classified
Error String
"program line before comment line"]
    bird (         Classified
l:[Classified]
ls) = Classified
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
bird [Classified]
ls

    comment :: [Classified] -> [Classified]
comment []               = []
    comment (Classified
EndCode    :[Classified]
_ ) = [String -> Classified
Error String
"\\end{code} without \\begin{code}"]
    comment (Classified
BeginCode  :[Classified]
ls) = String -> Classified
Blank String
"" Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
latex [Classified]
ls
    comment (CPP String
l      :[Classified]
ls) = String -> Classified
CPP String
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    comment (BirdTrack String
_:[Classified]
_ ) = [String -> Classified
Error String
"comment line before program line"]
    -- a blank line and another ordinary line following a comment
    -- will be treated as continuing the comment. Otherwise it's
    -- then end of the comment, with a blank line.
    comment (Blank     String
l:ls :: [Classified]
ls@(Ordinary  String
_:[Classified]
_)) = String -> Classified
Comment String
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    comment (Blank     String
l:[Classified]
ls) = String -> Classified
Blank String
l   Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
blank [Classified]
ls
    comment (Line Int
n String
f   :[Classified]
ls) = Int -> String -> Classified
Line Int
n String
f  Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    comment (Ordinary  String
l:[Classified]
ls) = String -> Classified
Comment String
l Classified -> [Classified] -> [Classified]
forall a. a -> [a] -> [a]
: [Classified] -> [Classified]
comment [Classified]
ls
    comment (Comment   String
_: [Classified]
_) = [Classified]
forall a. a
internalError
    comment (Error     String
_: [Classified]
_) = [Classified]
forall a. a
internalError

-- Re-implementation of 'lines', for better efficiency (but decreased laziness).
-- Also, importantly, accepts non-standard DOS and Mac line ending characters.
inlines :: String -> [String]
inlines :: String -> [String]
inlines String
xs = String -> (String -> String) -> [String]
lines' String
xs String -> String
forall a. a -> a
id
  where
  lines' :: String -> (String -> String) -> [String]
lines' []             String -> String
acc = [String -> String
acc []]
  lines' (Char
'\^M':Char
'\n':String
s) String -> String
acc = String -> String
acc [] String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> (String -> String) -> [String]
lines' String
s String -> String
forall a. a -> a
id    -- DOS
  lines' (Char
'\^M':String
s)      String -> String
acc = String -> String
acc [] String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> (String -> String) -> [String]
lines' String
s String -> String
forall a. a -> a
id    -- MacOS
  lines' (Char
'\n':String
s)       String -> String
acc = String -> String
acc [] String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> (String -> String) -> [String]
lines' String
s String -> String
forall a. a -> a
id    -- Unix
  lines' (Char
c:String
s)          String -> String
acc = String -> (String -> String) -> [String]
lines' String
s (String -> String
acc (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:))

internalError :: a
internalError :: a
internalError = String -> a
forall a. HasCallStack => String -> a
error String
"unlit: internal error"