-- -*- haskell -*- File generated by the BNF Converter (bnfc 2.9.4.1). -- Lexer definition for use with Alex 3 { {-# OPTIONS -fno-warn-incomplete-patterns #-} {-# OPTIONS_GHC -w #-} {-# LANGUAGE PatternSynonyms #-} module Language.Rzk.Syntax.Lex where import Prelude import qualified Data.Bits import Data.Char (ord) import Data.Function (on) import Data.Word (Word8) } -- Predefined character classes $c = [A-Z\192-\221] # [\215] -- capital isolatin1 letter (215 = \times) FIXME $s = [a-z\222-\255] # [\247] -- small isolatin1 letter (247 = \div ) FIXME $l = [$c $s] -- letter $d = [0-9] -- digit $i = [$l $d _ '] -- identifier character $u = [. \n] -- universal: any character -- Symbols and non-identifier-like reserved words @rsyms = \Σ | \# "lang" | \; | "rzk" \- "1" | \# "set" \- "option" | \= | \# "unset" \- "option" | \# "check" | \: | \# "compute" | \# "compute" \- "whnf" | \# "compute" \- "nf" | \# "postulate" | \# "assume" | \# "variable" | \# "variables" | \# "section" | \# "end" | \# "define" | \: \= | \# "def" | \( | \) | \_ | \, | \{ | \| | \} | \| \- \> | "1" | \* \_ "1" | "2" | "0" \_ "2" | "1" \_ "2" | \* | \= \= \= | \< \= | \/ \\ | \\ \/ | \- \> | \= \_ \{ | \[ | \] | \< | \> | \\ | "refl" \_ \{ | \→ | \∑ :- -- Line comment "--" "--" [.]* ; -- Block comment "{-" "-}" \{ \- [$u # \-]* \- ([$u # [\- \}]] [$u # \-]* \- | \-)* \} ; -- Whitespace (skipped) $white+ ; -- Symbols @rsyms { tok (eitherResIdent TV) } -- token VarIdentToken [$u # [\t \n \r \ \! \" \# \( \) \, \- \. \; \< \> \? \[ \\ \] \{ \| \}]] [$u # [\t \n \r \ \" \# \( \) \, \; \< \> \[ \\ \] \{ \| \}]] * { tok (eitherResIdent T_VarIdentToken) } -- token HoleIdentToken \? { tok (eitherResIdent T_HoleIdentToken) } -- Keywords and Ident $l $i* { tok (eitherResIdent TV) } -- String \" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t | r | f)))* \" { tok (TL . unescapeInitTail) } { -- | Create a token with position. tok :: (String -> Tok) -> (Posn -> String -> Token) tok f p = PT p . f -- | Token without position. data Tok = TK {-# UNPACK #-} !TokSymbol -- ^ Reserved word or symbol. | TL !String -- ^ String literal. | TI !String -- ^ Integer literal. | TV !String -- ^ Identifier. | TD !String -- ^ Float literal. | TC !String -- ^ Character literal. | T_VarIdentToken !String | T_HoleIdentToken !String deriving (Eq, Show, Ord) -- | Smart constructor for 'Tok' for the sake of backwards compatibility. pattern TS :: String -> Int -> Tok pattern TS t i = TK (TokSymbol t i) -- | Keyword or symbol tokens have a unique ID. data TokSymbol = TokSymbol { tsText :: String -- ^ Keyword or symbol text. , tsID :: !Int -- ^ Unique ID. } deriving (Show) -- | Keyword/symbol equality is determined by the unique ID. instance Eq TokSymbol where (==) = (==) `on` tsID -- | Keyword/symbol ordering is determined by the unique ID. instance Ord TokSymbol where compare = compare `on` tsID -- | Token with position. data Token = PT Posn Tok | Err Posn deriving (Eq, Show, Ord) -- | Pretty print a position. printPosn :: Posn -> String printPosn (Pn _ l c) = "line " ++ show l ++ ", column " ++ show c -- | Pretty print the position of the first token in the list. tokenPos :: [Token] -> String tokenPos (t:_) = printPosn (tokenPosn t) tokenPos [] = "end of file" -- | Get the position of a token. tokenPosn :: Token -> Posn tokenPosn (PT p _) = p tokenPosn (Err p) = p -- | Get line and column of a token. tokenLineCol :: Token -> (Int, Int) tokenLineCol = posLineCol . tokenPosn -- | Get line and column of a position. posLineCol :: Posn -> (Int, Int) posLineCol (Pn _ l c) = (l,c) -- | Convert a token into "position token" form. mkPosToken :: Token -> ((Int, Int), String) mkPosToken t = (tokenLineCol t, tokenText t) -- | Convert a token to its text. tokenText :: Token -> String tokenText t = case t of PT _ (TS s _) -> s PT _ (TL s) -> show s PT _ (TI s) -> s PT _ (TV s) -> s PT _ (TD s) -> s PT _ (TC s) -> s Err _ -> "#error" PT _ (T_VarIdentToken s) -> s PT _ (T_HoleIdentToken s) -> s -- | Convert a token to a string. prToken :: Token -> String prToken t = tokenText t -- | Finite map from text to token organized as binary search tree. data BTree = N -- ^ Nil (leaf). | B String Tok BTree BTree -- ^ Binary node. deriving (Show) -- | Convert potential keyword into token or use fallback conversion. eitherResIdent :: (String -> Tok) -> String -> Tok eitherResIdent tv s = treeFind resWords where treeFind N = tv s treeFind (B a t left right) = case compare s a of LT -> treeFind left GT -> treeFind right EQ -> t -- | The keywords and symbols of the language organized as binary search tree. resWords :: BTree resWords = b "===" 33 (b ")" 17 (b "#lang" 9 (b "#compute-whnf" 5 (b "#compute" 3 (b "#check" 2 (b "#assume" 1 N N) N) (b "#compute-nf" 4 N N)) (b "#define" 7 (b "#def" 6 N N) (b "#end" 8 N N))) (b "#unset-option" 13 (b "#section" 11 (b "#postulate" 10 N N) (b "#set-option" 12 N N)) (b "#variables" 15 (b "#variable" 14 N N) (b "(" 16 N N)))) (b "1_2" 25 (b "->" 21 (b "*_1" 19 (b "*" 18 N N) (b "," 20 N N)) (b "0_2" 23 (b "/\\" 22 N N) (b "1" 24 N N))) (b ";" 29 (b ":" 27 (b "2" 26 N N) (b ":=" 28 N N)) (b "<=" 31 (b "<" 30 N N) (b "=" 32 N N))))) (b "idJ" 50 (b "Unit" 42 (b "Sigma" 38 (b "BOT" 36 (b ">" 35 (b "=_{" 34 N N) N) (b "CUBE" 37 N N)) (b "TOPE" 40 (b "TOP" 39 N N) (b "U" 41 N N))) (b "]" 46 (b "\\" 44 (b "[" 43 N N) (b "\\/" 45 N N)) (b "as" 48 (b "_" 47 N N) (b "first" 49 N N)))) (b "uses" 58 (b "refl_{" 54 (b "recOR" 52 (b "recBOT" 51 N N) (b "refl" 53 N N)) (b "second" 56 (b "rzk-1" 55 N N) (b "unit" 57 N N))) (b "}" 62 (b "|" 60 (b "{" 59 N N) (b "|->" 61 N N)) (b "\8594" 64 (b "\931" 63 N N) (b "\8721" 65 N N))))) where b s n = B bs (TS bs n) where bs = s -- | Unquote string literal. unescapeInitTail :: String -> String unescapeInitTail = id . unesc . tail . id where unesc s = case s of '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs '\\':'n':cs -> '\n' : unesc cs '\\':'t':cs -> '\t' : unesc cs '\\':'r':cs -> '\r' : unesc cs '\\':'f':cs -> '\f' : unesc cs '"':[] -> [] c:cs -> c : unesc cs _ -> [] ------------------------------------------------------------------- -- Alex wrapper code. -- A modified "posn" wrapper. ------------------------------------------------------------------- data Posn = Pn !Int !Int !Int deriving (Eq, Show, Ord) alexStartPos :: Posn alexStartPos = Pn 0 1 1 alexMove :: Posn -> Char -> Posn alexMove (Pn a l c) '\t' = Pn (a+1) l (((c+7) `div` 8)*8+1) alexMove (Pn a l c) '\n' = Pn (a+1) (l+1) 1 alexMove (Pn a l c) _ = Pn (a+1) l (c+1) type Byte = Word8 type AlexInput = (Posn, -- current position, Char, -- previous char [Byte], -- pending bytes on the current char String) -- current input string tokens :: String -> [Token] tokens str = go (alexStartPos, '\n', [], str) where go :: AlexInput -> [Token] go inp@(pos, _, _, str) = case alexScan inp 0 of AlexEOF -> [] AlexError (pos, _, _, _) -> [Err pos] AlexSkip inp' len -> go inp' AlexToken inp' len act -> act pos (take len str) : (go inp') alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) alexGetByte (p, c, (b:bs), s) = Just (b, (p, c, bs, s)) alexGetByte (p, _, [], s) = case s of [] -> Nothing (c:s) -> let p' = alexMove p c (b:bs) = utf8Encode c in p' `seq` Just (b, (p', c, bs, s)) alexInputPrevChar :: AlexInput -> Char alexInputPrevChar (p, c, bs, s) = c -- | Encode a Haskell String to a list of Word8 values, in UTF8 format. utf8Encode :: Char -> [Word8] utf8Encode = map fromIntegral . go . ord where go oc | oc <= 0x7f = [oc] | oc <= 0x7ff = [ 0xc0 + (oc `Data.Bits.shiftR` 6) , 0x80 + oc Data.Bits..&. 0x3f ] | oc <= 0xffff = [ 0xe0 + (oc `Data.Bits.shiftR` 12) , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) , 0x80 + oc Data.Bits..&. 0x3f ] | otherwise = [ 0xf0 + (oc `Data.Bits.shiftR` 18) , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f) , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) , 0x80 + oc Data.Bits..&. 0x3f ] }