-- File generated by the BNF Converter.

-- -*- haskell -*-
-- Lexer definition for use with Alex 3.
{
{-# OPTIONS -fno-warn-incomplete-patterns #-}
{-# OPTIONS_GHC -w #-}

{-# LANGUAGE PatternSynonyms #-}

module LexGF where

import Prelude

import qualified Data.Bits
import           Data.Char     (ord)
import           Data.Function (on)
import           Data.Maybe    (fromMaybe)
import qualified Data.Map      as Map
import           Data.Map      (Map)
import           Data.Word     (Word8)
}

-- Predefined character classes

$c = [A-Z\192-\221] # [\215]  -- capital isolatin1 letter (215 = \times)
$s = [a-z\222-\255] # [\247]  -- small   isolatin1 letter (247 = \div  )
$l = [$c $s]         -- letter
$d = [0-9]           -- digit
$i = [$l $d _ ']     -- identifier character
$u = [. \n]          -- universal: any character

-- Symbols and non-identifier-like reserved words

@rsyms = \! | \$ | \% | \( | \) | \* | \* \* | \+ | \+ \+ | \, | \- | \- \> | \. | \/ | \: | \; | \< | \= | \= \> | \> | \? | \@ | \[ | \\ | \] | \_ | \{ | \| | \}

:-

-- Line comment "--"
"--" [.]* ;

-- Block comment "{-" "-}"
\{ \- [ $u # \- ]* \- ([ $u # [ \} \- ] ] [ $u # \- ]* \- | \-)* \};

-- Whitespace (skipped)
$white+ ;

-- Symbols
@rsyms
    { tok (eitherResIdent TV) }

-- token LString
\' [ $u # \' ]* \'
    { tok (eitherResIdent T_LString) }

-- Keywords and Ident
$l $i*
    { tok (eitherResIdent TV) }

-- String
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t | r | f)))* \"
    { tok (TL . unescapeInitTail) }

-- Integer
$d+
    { tok TI }

{
-- | Create a token with position.
tok :: (String -> Tok) -> (Posn -> String -> Token)
tok f p = PT p . f

-- | Token without position.
data Tok
  = TK {-# UNPACK #-} !TokSymbol  -- ^ Reserved word or symbol.
  | TL !String                    -- ^ String literal.
  | TI !String                    -- ^ Integer literal.
  | TV !String                    -- ^ Identifier.
  | TD !String                    -- ^ Float literal.
  | TC !String                    -- ^ Character literal.
  | T_LString !String
  deriving (Eq, Show, Ord)

-- | Smart constructor for 'Tok' for the sake of backwards compatibility.
pattern TS :: String -> Int -> Tok
pattern TS t i = TK (TokSymbol t i)

-- | Keyword or symbol tokens have a unique ID.
data TokSymbol = TokSymbol
  { tsText :: String
      -- ^ Keyword or symbol text.
  , tsID   :: !Int
      -- ^ Unique ID.
  } deriving (Show)

-- | Keyword/symbol equality is determined by the unique ID.
instance Eq  TokSymbol where (==)    = (==)    `on` tsID

-- | Keyword/symbol ordering is determined by the unique ID.
instance Ord TokSymbol where compare = compare `on` tsID

-- | Token with position.
data Token
  = PT  Posn Tok
  | Err Posn
  deriving (Eq, Show, Ord)

-- | Pretty print a position.
printPosn :: Posn -> String
printPosn (Pn _ l c) = "line " ++ show l ++ ", column " ++ show c

-- | Pretty print the position of the first token in the list.
tokenPos :: [Token] -> String
tokenPos (t:_) = printPosn (tokenPosn t)
tokenPos []    = "end of file"

-- | Get the position of a token.
tokenPosn :: Token -> Posn
tokenPosn (PT p _) = p
tokenPosn (Err p)  = p

-- | Get line and column of a token.
tokenLineCol :: Token -> (Int, Int)
tokenLineCol = posLineCol . tokenPosn

-- | Get line and column of a position.
posLineCol :: Posn -> (Int, Int)
posLineCol (Pn _ l c) = (l,c)

-- | Convert a token into "position token" form.
mkPosToken :: Token -> ((Int, Int), String)
mkPosToken t = (tokenLineCol t, tokenText t)

-- | Convert a token to its text.
tokenText :: Token -> String
tokenText t = case t of
  PT _ (TS s _) -> s
  PT _ (TL s)   -> show s
  PT _ (TI s)   -> s
  PT _ (TV s)   -> s
  PT _ (TD s)   -> s
  PT _ (TC s)   -> s
  Err _         -> "#error"
  PT _ (T_LString s) -> s

-- | Convert a token to a string.
prToken :: Token -> String
prToken t = tokenText t

-- | Convert potential keyword into token or use fallback conversion.
eitherResIdent :: (String -> Tok) -> String -> Tok
eitherResIdent tv s = fromMaybe (tv s) (Map.lookup s resWords)

-- | The keywords and symbols of the language organized as a Map.
resWords :: Map String Tok
resWords = Map.fromAscList
  [ ("!", TS "!" 1)
  , ("$", TS "$" 2)
  , ("%", TS "%" 3)
  , ("(", TS "(" 4)
  , (")", TS ")" 5)
  , ("*", TS "*" 6)
  , ("**", TS "**" 7)
  , ("+", TS "+" 8)
  , ("++", TS "++" 9)
  , (",", TS "," 10)
  , ("-", TS "-" 11)
  , ("->", TS "->" 12)
  , (".", TS "." 13)
  , ("/", TS "/" 14)
  , (":", TS ":" 15)
  , (";", TS ";" 16)
  , ("<", TS "<" 17)
  , ("=", TS "=" 18)
  , ("=>", TS "=>" 19)
  , (">", TS ">" 20)
  , ("?", TS "?" 21)
  , ("@", TS "@" 22)
  , ("Lin", TS "Lin" 23)
  , ("PType", TS "PType" 24)
  , ("Str", TS "Str" 25)
  , ("Strs", TS "Strs" 26)
  , ("Tok", TS "Tok" 27)
  , ("Type", TS "Type" 28)
  , ("[", TS "[" 29)
  , ("\\", TS "\\" 30)
  , ("]", TS "]" 31)
  , ("_", TS "_" 32)
  , ("abstract", TS "abstract" 33)
  , ("case", TS "case" 34)
  , ("cat", TS "cat" 35)
  , ("concrete", TS "concrete" 36)
  , ("data", TS "data" 37)
  , ("def", TS "def" 38)
  , ("flags", TS "flags" 39)
  , ("fn", TS "fn" 40)
  , ("fun", TS "fun" 41)
  , ("grammar", TS "grammar" 42)
  , ("in", TS "in" 43)
  , ("include", TS "include" 44)
  , ("incomplete", TS "incomplete" 45)
  , ("instance", TS "instance" 46)
  , ("interface", TS "interface" 47)
  , ("let", TS "let" 48)
  , ("lin", TS "lin" 49)
  , ("lincat", TS "lincat" 50)
  , ("lindef", TS "lindef" 51)
  , ("lintype", TS "lintype" 52)
  , ("of", TS "of" 53)
  , ("open", TS "open" 54)
  , ("oper", TS "oper" 55)
  , ("out", TS "out" 56)
  , ("package", TS "package" 57)
  , ("param", TS "param" 58)
  , ("pattern", TS "pattern" 59)
  , ("pre", TS "pre" 60)
  , ("printname", TS "printname" 61)
  , ("resource", TS "resource" 62)
  , ("reuse", TS "reuse" 63)
  , ("strs", TS "strs" 64)
  , ("table", TS "table" 65)
  , ("tokenizer", TS "tokenizer" 66)
  , ("transfer", TS "transfer" 67)
  , ("union", TS "union" 68)
  , ("var", TS "var" 69)
  , ("variants", TS "variants" 70)
  , ("where", TS "where" 71)
  , ("with", TS "with" 72)
  , ("{", TS "{" 73)
  , ("|", TS "|" 74)
  , ("}", TS "}" 75)
  ]

-- | Unquote string literal.
unescapeInitTail :: String -> String
unescapeInitTail = id . unesc . tail . id
  where
  unesc s = case s of
    '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
    '\\':'n':cs  -> '\n' : unesc cs
    '\\':'t':cs  -> '\t' : unesc cs
    '\\':'r':cs  -> '\r' : unesc cs
    '\\':'f':cs  -> '\f' : unesc cs
    '"':[]       -> []
    c:cs         -> c : unesc cs
    _            -> []

-------------------------------------------------------------------
-- Alex wrapper code.
-- A modified "posn" wrapper.
-------------------------------------------------------------------

data Posn = Pn !Int !Int !Int
  deriving (Eq, Show, Ord)

alexStartPos :: Posn
alexStartPos = Pn 0 1 1

alexMove :: Posn -> Char -> Posn
alexMove (Pn a l c) '\t' = Pn (a+1)  l     (((c+7) `div` 8)*8+1)
alexMove (Pn a l c) '\n' = Pn (a+1) (l+1)   1
alexMove (Pn a l c) _    = Pn (a+1)  l     (c+1)

type Byte = Word8

type AlexInput =
  ( Posn     -- current position
  , Char     -- previous char
  , [Byte]   -- pending bytes on the current char
  , String ) -- current input string

tokens :: String -> [Token]
tokens str = go (alexStartPos, '\n', [], str)
    where
      go :: AlexInput -> [Token]
      go inp@(pos, _, _, str) =
        case alexScan inp 0 of
          AlexEOF                   -> []
          AlexError (pos, _, _, _)  -> [Err pos]
          AlexSkip  inp' len        -> go inp'
          AlexToken inp' len act    -> act pos ( take len str) : (go inp')

alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (p, c, (b:bs), s) = Just (b, (p, c, bs, s))
alexGetByte (p, _, [], s) =
  case s  of
    [] -> Nothing
    (c:s) ->
      let p'     = alexMove p c
          (b:bs) = utf8Encode c
      in p' `seq` Just (b, (p', c, bs, s))

alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (p, c, bs, s) = c

-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
utf8Encode :: Char -> [Word8]
utf8Encode = map fromIntegral . go . ord
  where
  go oc
   | oc <= 0x7f       = [oc]

   | oc <= 0x7ff      = [ 0xc0 + (oc `Data.Bits.shiftR` 6)
                        , 0x80 + oc Data.Bits..&. 0x3f
                        ]

   | oc <= 0xffff     = [ 0xe0 + (oc `Data.Bits.shiftR` 12)
                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
                        , 0x80 + oc Data.Bits..&. 0x3f
                        ]
   | otherwise        = [ 0xf0 + (oc `Data.Bits.shiftR` 18)
                        , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f)
                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
                        , 0x80 + oc Data.Bits..&. 0x3f
                        ]
}