{-# LANGUAGE Safe #-}
{-# LANGUAGE ViewPatterns #-}
module Text.Gigaparsec.Errors.TokenExtractors (
    Token(..), TokenExtractor,
    tillNextWhitespace,
    singleChar,
    matchParserDemand--,
    --lexToken
  ) where

import Text.Gigaparsec (Parsec)

import Data.Char (generalCategory, ord, GeneralCategory(Format, Surrogate, PrivateUse, NotAssigned, Control))
import Data.Char qualified as Char (isSpace)
import Data.List.NonEmpty (NonEmpty((:|)))
import Data.Foldable (maximumBy)
import Numeric (showHex)
import Data.Function (on)

type TokenExtractor :: *
type TokenExtractor = NonEmpty Char -> Word -> Bool -> Token

{-|
This type represents an extracted token returned by 'unexpectedToken' in 'ErrorBuilder'.

There is deliberately no analogue for @EndOfInput@ because we guarantee that non-empty
residual input is provided to token extraction.
-}
type Token :: *
data Token = Raw                   -- ^ This is a token that is directly extracted from the residual input itself.
              !String              -- ^ the input extracted.
           | Named                 -- ^ This is a token that has been given a name, and is treated like a labelled item.
              !String              -- ^ the description of the token.
              {-# UNPACK #-} !Word -- ^ the amount of residual input this token ate.

{-# INLINABLE tillNextWhitespace #-}
-- TillNextWhitespace with matches parser demand
tillNextWhitespace :: Bool -> (Char -> Bool) -> TokenExtractor
tillNextWhitespace :: Bool -> (Char -> Bool) -> TokenExtractor
tillNextWhitespace Bool
_ Char -> Bool
_ (NonEmpty Char -> Maybe Token
whitespaceOrUnprintable -> Just Token
tok) Word
_ Bool
_ = Token
tok
tillNextWhitespace Bool
trimToDemand Char -> Bool
isSpace (Char
c :| [Char]
cs) Word
parserDemanded Bool
_
  | Bool
trimToDemand = [Char] -> Token
Raw (Int -> [Char] -> [Char]
forall a. Int -> [a] -> [a]
take (Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
parserDemanded) ([Char] -> [Char]
tillSpace (Char
cChar -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
cs)))
  | Bool
otherwise    = [Char] -> Token
Raw ([Char] -> [Char]
tillSpace (Char
cChar -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
cs))
  where tillSpace :: [Char] -> [Char]
tillSpace = (Char -> Bool) -> [Char] -> [Char]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace)

singleChar :: TokenExtractor
singleChar :: TokenExtractor
singleChar (NonEmpty Char -> Maybe Token
whitespaceOrUnprintable -> Just Token
tok) Word
_ Bool
_ = Token
tok
singleChar (Char
c :| [Char]
_) Word
_ Bool
_ = [Char] -> Token
Raw [Char
c]

matchParserDemand :: TokenExtractor
matchParserDemand :: TokenExtractor
matchParserDemand (NonEmpty Char -> Maybe Token
whitespaceOrUnprintable -> Just Token
tok) Word
_ Bool
_ = Token
tok
matchParserDemand (Char
c :| [Char]
cs) Word
parserDemanded Bool
_ = [Char] -> Token
Raw (Int -> [Char] -> [Char]
forall a. Int -> [a] -> [a]
take (Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
parserDemanded) (Char
cChar -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
cs))

whitespaceOrUnprintable :: NonEmpty Char -> Maybe Token
whitespaceOrUnprintable :: NonEmpty Char -> Maybe Token
whitespaceOrUnprintable (Char
'\n' :| [Char]
_) = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"newline" Word
1
whitespaceOrUnprintable (Char
'\r' :| [Char]
_) = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"carriage return" Word
1
whitespaceOrUnprintable (Char
'\t' :| [Char]
_) = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"tab" Word
1
whitespaceOrUnprintable (Char
' ' :| [Char]
_) = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"space" Word
1
whitespaceOrUnprintable (Char
c :| [Char]
_)
  | Char -> Bool
Char.isSpace Char
c = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"whitespace character" Word
1
  | Bool
otherwise      = case Char -> GeneralCategory
generalCategory Char
c of
    GeneralCategory
Format -> Maybe Token
unprintable
    GeneralCategory
Surrogate -> Maybe Token
unprintable
    GeneralCategory
PrivateUse -> Maybe Token
unprintable
    GeneralCategory
NotAssigned -> Maybe Token
unprintable
    GeneralCategory
Control -> Maybe Token
unprintable
    GeneralCategory
_ -> Maybe Token
forall a. Maybe a
Nothing
  where unprintable :: Maybe Token
unprintable = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named ([Char]
"non-printable character (\\x" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char] -> [Char]
forall a. Integral a => a -> [Char] -> [Char]
showHex (Char -> Int
ord Char
c) [Char]
")") Word
1

lexToken :: [Parsec String] -> TokenExtractor -> TokenExtractor
lexToken :: [Parsec [Char]] -> TokenExtractor -> TokenExtractor
lexToken = (NonEmpty ([Char], Word) -> ([Char], Word))
-> [Parsec [Char]] -> TokenExtractor -> TokenExtractor
lexTokenWithSelect ((([Char], Word) -> ([Char], Word) -> Ordering)
-> NonEmpty ([Char], Word) -> ([Char], Word)
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
maximumBy (Word -> Word -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Word -> Word -> Ordering)
-> (([Char], Word) -> Word)
-> ([Char], Word)
-> ([Char], Word)
-> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` ([Char], Word) -> Word
forall a b. (a, b) -> b
snd))

lexTokenWithSelect :: (NonEmpty (String, Word) -> (String, Word)) -> [Parsec String] -> TokenExtractor -> TokenExtractor
lexTokenWithSelect :: (NonEmpty ([Char], Word) -> ([Char], Word))
-> [Parsec [Char]] -> TokenExtractor -> TokenExtractor
lexTokenWithSelect = (NonEmpty ([Char], Word) -> ([Char], Word))
-> [Parsec [Char]] -> TokenExtractor -> TokenExtractor
forall a. HasCallStack => a
undefined