-- File generated by the BNF Converter.

-- -*- haskell -*-
-- Lexer definition for use with Alex 3.
{
{-# OPTIONS -fno-warn-incomplete-patterns #-}
{-# OPTIONS_GHC -w #-}

{-# LANGUAGE PatternSynonyms #-}

module LexCore where

import Prelude

import qualified Data.Bits
import           Data.Char     (ord)
import           Data.Function (on)
import           Data.Maybe    (fromMaybe)
import qualified Data.Map      as Map
import           Data.Map      (Map)
import           Data.Word     (Word8)
}

-- Predefined character classes

$c = [A-Z\192-\221] # [\215]  -- capital isolatin1 letter (215 = \times)
$s = [a-z\222-\255] # [\247]  -- small   isolatin1 letter (247 = \div  )
$l = [$c $s]         -- letter
$d = [0-9]           -- digit
$i = [$l $d _ ']     -- identifier character
$u = [. \n]          -- universal: any character

-- Symbols and non-identifier-like reserved words

@rsyms = \# | \% \_ | \% "case" | \% "coerce" | \% "data" | \% "external" | \% "forall" | \% "in" | \% "let" | \% "module" | \% "newtype" | \% "note" | \% "of" | \% "rec" | \( | \) | \* | \- \> | \. | \: \: | \; | \= | \? | \@ | \\ | \{ | \}

:-

-- Whitespace (skipped)
$white+ ;

-- Symbols
@rsyms
    { tok (eitherResIdent TV) }

-- Keywords and Ident
$l $i*
    { tok (eitherResIdent TV) }

-- String
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t | r | f)))* \"
    { tok (TL . unescapeInitTail) }

-- Char
\' ($u # [\' \\] | \\ [\\ \' n t r f]) \'
    { tok TC }

-- Integer
$d+
    { tok TI }

-- Double
$d+ \. $d+ (e (\-)? $d+)?
    { tok TD }

{
-- | Create a token with position.
tok :: (String -> Tok) -> (Posn -> String -> Token)
tok f p = PT p . f

-- | Token without position.
data Tok
  = TK {-# UNPACK #-} !TokSymbol  -- ^ Reserved word or symbol.
  | TL !String                    -- ^ String literal.
  | TI !String                    -- ^ Integer literal.
  | TV !String                    -- ^ Identifier.
  | TD !String                    -- ^ Float literal.
  | TC !String                    -- ^ Character literal.
  deriving (Eq, Show, Ord)

-- | Smart constructor for 'Tok' for the sake of backwards compatibility.
pattern TS :: String -> Int -> Tok
pattern TS t i = TK (TokSymbol t i)

-- | Keyword or symbol tokens have a unique ID.
data TokSymbol = TokSymbol
  { tsText :: String
      -- ^ Keyword or symbol text.
  , tsID   :: !Int
      -- ^ Unique ID.
  } deriving (Show)

-- | Keyword/symbol equality is determined by the unique ID.
instance Eq  TokSymbol where (==)    = (==)    `on` tsID

-- | Keyword/symbol ordering is determined by the unique ID.
instance Ord TokSymbol where compare = compare `on` tsID

-- | Token with position.
data Token
  = PT  Posn Tok
  | Err Posn
  deriving (Eq, Show, Ord)

-- | Pretty print a position.
printPosn :: Posn -> String
printPosn (Pn _ l c) = "line " ++ show l ++ ", column " ++ show c

-- | Pretty print the position of the first token in the list.
tokenPos :: [Token] -> String
tokenPos (t:_) = printPosn (tokenPosn t)
tokenPos []    = "end of file"

-- | Get the position of a token.
tokenPosn :: Token -> Posn
tokenPosn (PT p _) = p
tokenPosn (Err p)  = p

-- | Get line and column of a token.
tokenLineCol :: Token -> (Int, Int)
tokenLineCol = posLineCol . tokenPosn

-- | Get line and column of a position.
posLineCol :: Posn -> (Int, Int)
posLineCol (Pn _ l c) = (l,c)

-- | Convert a token into "position token" form.
mkPosToken :: Token -> ((Int, Int), String)
mkPosToken t = (tokenLineCol t, tokenText t)

-- | Convert a token to its text.
tokenText :: Token -> String
tokenText t = case t of
  PT _ (TS s _) -> s
  PT _ (TL s)   -> show s
  PT _ (TI s)   -> s
  PT _ (TV s)   -> s
  PT _ (TD s)   -> s
  PT _ (TC s)   -> s
  Err _         -> "#error"

-- | Convert a token to a string.
prToken :: Token -> String
prToken t = tokenText t

-- | Convert potential keyword into token or use fallback conversion.
eitherResIdent :: (String -> Tok) -> String -> Tok
eitherResIdent tv s = fromMaybe (tv s) (Map.lookup s resWords)

-- | The keywords and symbols of the language organized as a Map.
resWords :: Map String Tok
resWords = Map.fromAscList
  [ ("#", TS "#" 1)
  , ("%_", TS "%_" 2)
  , ("%case", TS "%case" 3)
  , ("%coerce", TS "%coerce" 4)
  , ("%data", TS "%data" 5)
  , ("%external", TS "%external" 6)
  , ("%forall", TS "%forall" 7)
  , ("%in", TS "%in" 8)
  , ("%let", TS "%let" 9)
  , ("%module", TS "%module" 10)
  , ("%newtype", TS "%newtype" 11)
  , ("%note", TS "%note" 12)
  , ("%of", TS "%of" 13)
  , ("%rec", TS "%rec" 14)
  , ("(", TS "(" 15)
  , (")", TS ")" 16)
  , ("*", TS "*" 17)
  , ("->", TS "->" 18)
  , (".", TS "." 19)
  , ("::", TS "::" 20)
  , (";", TS ";" 21)
  , ("=", TS "=" 22)
  , ("?", TS "?" 23)
  , ("@", TS "@" 24)
  , ("\\", TS "\\" 25)
  , ("{", TS "{" 26)
  , ("}", TS "}" 27)
  ]

-- | Unquote string literal.
unescapeInitTail :: String -> String
unescapeInitTail = id . unesc . tail . id
  where
  unesc s = case s of
    '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
    '\\':'n':cs  -> '\n' : unesc cs
    '\\':'t':cs  -> '\t' : unesc cs
    '\\':'r':cs  -> '\r' : unesc cs
    '\\':'f':cs  -> '\f' : unesc cs
    '"':[]       -> []
    c:cs         -> c : unesc cs
    _            -> []

-------------------------------------------------------------------
-- Alex wrapper code.
-- A modified "posn" wrapper.
-------------------------------------------------------------------

data Posn = Pn !Int !Int !Int
  deriving (Eq, Show, Ord)

alexStartPos :: Posn
alexStartPos = Pn 0 1 1

alexMove :: Posn -> Char -> Posn
alexMove (Pn a l c) '\t' = Pn (a+1)  l     (((c+7) `div` 8)*8+1)
alexMove (Pn a l c) '\n' = Pn (a+1) (l+1)   1
alexMove (Pn a l c) _    = Pn (a+1)  l     (c+1)

type Byte = Word8

type AlexInput =
  ( Posn     -- current position
  , Char     -- previous char
  , [Byte]   -- pending bytes on the current char
  , String ) -- current input string

tokens :: String -> [Token]
tokens str = go (alexStartPos, '\n', [], str)
    where
      go :: AlexInput -> [Token]
      go inp@(pos, _, _, str) =
        case alexScan inp 0 of
          AlexEOF                   -> []
          AlexError (pos, _, _, _)  -> [Err pos]
          AlexSkip  inp' len        -> go inp'
          AlexToken inp' len act    -> act pos ( take len str) : (go inp')

alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (p, c, (b:bs), s) = Just (b, (p, c, bs, s))
alexGetByte (p, _, [], s) =
  case s  of
    [] -> Nothing
    (c:s) ->
      let p'     = alexMove p c
          (b:bs) = utf8Encode c
      in p' `seq` Just (b, (p', c, bs, s))

alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (p, c, bs, s) = c

-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
utf8Encode :: Char -> [Word8]
utf8Encode = map fromIntegral . go . ord
  where
  go oc
   | oc <= 0x7f       = [oc]

   | oc <= 0x7ff      = [ 0xc0 + (oc `Data.Bits.shiftR` 6)
                        , 0x80 + oc Data.Bits..&. 0x3f
                        ]

   | oc <= 0xffff     = [ 0xe0 + (oc `Data.Bits.shiftR` 12)
                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
                        , 0x80 + oc Data.Bits..&. 0x3f
                        ]
   | otherwise        = [ 0xf0 + (oc `Data.Bits.shiftR` 18)
                        , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f)
                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
                        , 0x80 + oc Data.Bits..&. 0x3f
                        ]
}