{-# LANGUAGE Safe #-}
{-# LANGUAGE ViewPatterns #-}
module Text.Gigaparsec.Errors.TokenExtractors (
Token(..), TokenExtractor,
tillNextWhitespace,
singleChar,
matchParserDemand
) where
import Text.Gigaparsec (Parsec)
import Data.Char (generalCategory, ord, GeneralCategory(Format, Surrogate, PrivateUse, NotAssigned, Control))
import Data.Char qualified as Char (isSpace)
import Data.List.NonEmpty (NonEmpty((:|)))
import Data.Foldable (maximumBy)
import Numeric (showHex)
import Data.Function (on)
type TokenExtractor :: *
type = NonEmpty Char -> Word -> Bool -> Token
type Token :: *
data Token = Raw
!String
| Named
!String
{-# UNPACK #-} !Word
{-# INLINABLE tillNextWhitespace #-}
tillNextWhitespace :: Bool -> (Char -> Bool) -> TokenExtractor
tillNextWhitespace :: Bool -> (Char -> Bool) -> TokenExtractor
tillNextWhitespace Bool
_ Char -> Bool
_ (NonEmpty Char -> Maybe Token
whitespaceOrUnprintable -> Just Token
tok) Word
_ Bool
_ = Token
tok
tillNextWhitespace Bool
trimToDemand Char -> Bool
isSpace (Char
c :| [Char]
cs) Word
parserDemanded Bool
_
| Bool
trimToDemand = [Char] -> Token
Raw (Int -> [Char] -> [Char]
forall a. Int -> [a] -> [a]
take (Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
parserDemanded) ([Char] -> [Char]
tillSpace (Char
cChar -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
cs)))
| Bool
otherwise = [Char] -> Token
Raw ([Char] -> [Char]
tillSpace (Char
cChar -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
cs))
where tillSpace :: [Char] -> [Char]
tillSpace = (Char -> Bool) -> [Char] -> [Char]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace)
singleChar :: TokenExtractor
singleChar :: TokenExtractor
singleChar (NonEmpty Char -> Maybe Token
whitespaceOrUnprintable -> Just Token
tok) Word
_ Bool
_ = Token
tok
singleChar (Char
c :| [Char]
_) Word
_ Bool
_ = [Char] -> Token
Raw [Char
c]
matchParserDemand :: TokenExtractor
matchParserDemand :: TokenExtractor
matchParserDemand (NonEmpty Char -> Maybe Token
whitespaceOrUnprintable -> Just Token
tok) Word
_ Bool
_ = Token
tok
matchParserDemand (Char
c :| [Char]
cs) Word
parserDemanded Bool
_ = [Char] -> Token
Raw (Int -> [Char] -> [Char]
forall a. Int -> [a] -> [a]
take (Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
parserDemanded) (Char
cChar -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
cs))
whitespaceOrUnprintable :: NonEmpty Char -> Maybe Token
whitespaceOrUnprintable :: NonEmpty Char -> Maybe Token
whitespaceOrUnprintable (Char
'\n' :| [Char]
_) = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"newline" Word
1
whitespaceOrUnprintable (Char
'\r' :| [Char]
_) = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"carriage return" Word
1
whitespaceOrUnprintable (Char
'\t' :| [Char]
_) = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"tab" Word
1
whitespaceOrUnprintable (Char
' ' :| [Char]
_) = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"space" Word
1
whitespaceOrUnprintable (Char
c :| [Char]
_)
| Char -> Bool
Char.isSpace Char
c = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"whitespace character" Word
1
| Bool
otherwise = case Char -> GeneralCategory
generalCategory Char
c of
GeneralCategory
Format -> Maybe Token
unprintable
GeneralCategory
Surrogate -> Maybe Token
unprintable
GeneralCategory
PrivateUse -> Maybe Token
unprintable
GeneralCategory
NotAssigned -> Maybe Token
unprintable
GeneralCategory
Control -> Maybe Token
unprintable
GeneralCategory
_ -> Maybe Token
forall a. Maybe a
Nothing
where unprintable :: Maybe Token
unprintable = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named ([Char]
"non-printable character (\\x" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char] -> [Char]
forall a. Integral a => a -> [Char] -> [Char]
showHex (Char -> Int
ord Char
c) [Char]
")") Word
1
lexToken :: [Parsec String] -> TokenExtractor -> TokenExtractor
lexToken :: [Parsec [Char]] -> TokenExtractor -> TokenExtractor
lexToken = (NonEmpty ([Char], Word) -> ([Char], Word))
-> [Parsec [Char]] -> TokenExtractor -> TokenExtractor
lexTokenWithSelect ((([Char], Word) -> ([Char], Word) -> Ordering)
-> NonEmpty ([Char], Word) -> ([Char], Word)
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
maximumBy (Word -> Word -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Word -> Word -> Ordering)
-> (([Char], Word) -> Word)
-> ([Char], Word)
-> ([Char], Word)
-> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` ([Char], Word) -> Word
forall a b. (a, b) -> b
snd))
lexTokenWithSelect :: (NonEmpty (String, Word) -> (String, Word)) -> [Parsec String] -> TokenExtractor -> TokenExtractor
lexTokenWithSelect :: (NonEmpty ([Char], Word) -> ([Char], Word))
-> [Parsec [Char]] -> TokenExtractor -> TokenExtractor
lexTokenWithSelect = (NonEmpty ([Char], Word) -> ([Char], Word))
-> [Parsec [Char]] -> TokenExtractor -> TokenExtractor
forall a. HasCallStack => a
undefined