{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ExplicitForAll #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Trasa.TH.Lexer
( ReservedChar(..)
, ReservedSymbol(..)
, Lexeme(..)
, Stream(..)
, stream ) where
import Data.Proxy (Proxy(..))
import Data.Semigroup ((<>))
import Data.Void (Void)
import Data.Coerce
import qualified Data.List.NonEmpty as NE
import qualified Data.List as L
import qualified Text.Megaparsec as MP
import qualified Text.Megaparsec.Char as MP
import qualified Text.Megaparsec.Stream as MP
type Parser a = MP.Parsec Void String a
instance Ord a => MP.ShowErrorComponent (MP.ErrorFancy a) where
showErrorComponent (MP.ErrorFail e) = e
showErrorComponent (MP.ErrorCustom _) = "undefined error"
data ReservedChar
= ReservedCharNewline
| ReservedCharColon
| ReservedCharSlash
| ReservedCharQuestionMark
| ReservedCharAmpersand
| ReservedCharEqual
| ReservedCharOpenBracket
| ReservedCharCloseBracket
| ReservedCharComma
| ReservedCharTab
deriving (Show,Eq,Ord)
data ReservedSymbol
= ReservedSymbolDataType
deriving (Show,Eq,Ord)
data Lexeme
= LexemeSpace Word
| LexemeChar ReservedChar
| LexemeSymbol ReservedSymbol
| LexemeString Word String
deriving (Show,Eq,Ord)
newtype Stream = Stream [Lexeme]
deriving (Eq, Ord, Show, Monoid, Semigroup)
instance MP.Stream Stream where
type Token Stream = Lexeme
type Tokens Stream = [Lexeme]
tokenToChunk Proxy = pure
tokensToChunk Proxy = id
chunkToTokens Proxy = id
chunkLength Proxy = length
chunkEmpty Proxy = null
take1_ (Stream []) = Nothing
take1_ (Stream (t:ts)) = Just (t, Stream ts)
takeN_ n (Stream s)
| n <= 0 = Just ([], Stream s)
| null s = Nothing
| otherwise = Just $ coerce (splitAt n s)
takeWhile_ = coerce . span
reachOffset o pst = reachOffset' (\n (Stream l) -> coerce $ splitAt n l) L.foldl' (fmap prettyCharToken . coerce) (prettyCharToken . coerce) (coerce $ LexemeChar ReservedCharNewline, coerce $ LexemeChar ReservedCharTab) o (coerce pst)
showTokens _ = L.concatMap prettyToken . NE.toList
prettyCharToken = \case
LexemeSpace _ -> ' '
LexemeChar resChar -> case resChar of
ReservedCharNewline -> '\n'
ReservedCharColon -> ':'
ReservedCharSlash -> '/'
ReservedCharQuestionMark -> '?'
ReservedCharAmpersand -> '&'
ReservedCharEqual -> '='
ReservedCharOpenBracket -> '['
ReservedCharCloseBracket -> ']'
ReservedCharComma -> ','
ReservedCharTab -> '\t'
LexemeSymbol sym -> case sym of
ReservedSymbolDataType -> 'd'
LexemeString _ _ -> 's'
prettyToken = \case
LexemeSpace _ -> "space(s)"
LexemeChar resChar -> case resChar of
ReservedCharNewline -> "newline"
ReservedCharColon -> ":"
ReservedCharSlash -> "/"
ReservedCharQuestionMark -> "?"
ReservedCharAmpersand -> "&"
ReservedCharEqual -> "="
ReservedCharOpenBracket -> "["
ReservedCharCloseBracket -> "]"
ReservedCharComma -> ","
ReservedCharTab -> "tab"
LexemeSymbol sym -> case sym of
ReservedSymbolDataType -> "data-type"
LexemeString _ _ -> "any string"
lexeme :: Parser Lexeme
lexeme = MP.choice
[ LexemeSpace . fromIntegral . length <$> MP.some (MP.char ' ')
, LexemeChar <$> MP.choice
[ ReservedCharNewline <$ MP.char '\n'
, ReservedCharColon <$ MP.char ':'
, ReservedCharSlash <$ MP.char '/'
, ReservedCharQuestionMark <$ MP.char '?'
, ReservedCharAmpersand <$ MP.char '&'
, ReservedCharEqual <$ MP.char '='
, ReservedCharOpenBracket <$ MP.char '['
, ReservedCharCloseBracket <$ MP.char ']'
, ReservedCharComma <$ MP.char ','
]
, LexemeSymbol <$> MP.choice
[ ReservedSymbolDataType <$ MP.string "data-type"
]
, string <$> MP.some (MP.noneOf " \n:/?&=[],")
]
where string str = LexemeString (fromIntegral (length str)) str
data St = St MP.SourcePos ShowS
reachOffset'
:: (Int -> Stream -> (MP.Tokens Stream, Stream))
-> (forall b. (b -> MP.Token Stream -> b) -> b -> MP.Tokens Stream -> b)
-> (MP.Tokens Stream -> String)
-> (MP.Token Stream -> Char)
-> (MP.Token Stream, MP.Token Stream)
-> Int
-> MP.PosState Stream
-> (MP.SourcePos, String, MP.PosState Stream)
reachOffset' splitAt'
foldl''
fromToks
fromTok
(newlineTok, tabTok)
o
MP.PosState {..} =
( spos
, case expandTab pstateTabWidth
. addPrefix
. f
. fromToks
. fst
$ MP.takeWhile_ (/= newlineTok) post of
"" -> "<empty line>"
xs -> xs
, MP.PosState
{ MP.pstateInput = post
, MP.pstateOffset = max pstateOffset o
, MP.pstateSourcePos = spos
, MP.pstateTabWidth = pstateTabWidth
, MP.pstateLinePrefix =
if sameLine
then pstateLinePrefix ++ f ""
else f ""
}
)
where
addPrefix xs =
if sameLine
then pstateLinePrefix ++ xs
else xs
sameLine = MP.sourceLine spos == MP.sourceLine pstateSourcePos
(pre, post) = splitAt' (o - pstateOffset) pstateInput
St spos f = foldl'' go (St pstateSourcePos id) pre
go (St apos g) ch =
let MP.SourcePos n l c = apos
c' = MP.unPos c
w = MP.unPos pstateTabWidth
in if | ch == newlineTok ->
St (MP.SourcePos n (l <> MP.pos1) MP.pos1)
id
| ch == tabTok ->
St (MP.SourcePos n l (MP.mkPos $ c' + w - ((c' - 1) `rem` w)))
(g . (fromTok ch :))
| otherwise ->
St (MP.SourcePos n l (c <> MP.pos1))
(g . (fromTok ch :))
{-# INLINE reachOffset' #-}
expandTab
:: MP.Pos
-> String
-> String
expandTab w' = go 0
where
go 0 [] = []
go 0 ('\t':xs) = go w xs
go 0 (x:xs) = x : go 0 xs
go n xs = ' ' : go (n - 1) xs
w = MP.unPos w'
stream :: Parser Stream
stream = Stream <$> MP.many lexeme