src/Database/Sql/Presto/Parser/Token.hs

-- Copyright (c) 2017 Uber Technologies, Inc.
--
-- Permission is hereby granted, free of charge, to any person obtaining a copy
-- of this software and associated documentation files (the "Software"), to deal
-- in the Software without restriction, including without limitation the rights
-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-- copies of the Software, and to permit persons to whom the Software is
-- furnished to do so, subject to the following conditions:
--
-- The above copyright notice and this permission notice shall be included in
-- all copies or substantial portions of the Software.
--
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-- THE SOFTWARE.

module Database.Sql.Presto.Parser.Token where

import Database.Sql.Type.Query (CastFailureAction(..))
import Database.Sql.Presto.Token
import Database.Sql.Presto.Parser.Internal

import Database.Sql.Position

import qualified Text.Parsec as P
import qualified Text.Parsec.Pos as P

import           Data.ByteString.Lazy (ByteString)
import           Data.Text.Lazy (Text)
import qualified Data.Text.Lazy as TL

import qualified Data.Map as M
import Data.Semigroup ((<>))


-- helpers

showTok :: (Token, Position, Position) -> String
showTok (t, _, _) = show t

posFromTok :: P.SourcePos ->
              (Token, Position, Position) ->
              [(Token, Position, Position)] ->
              P.SourcePos
posFromTok _ (_, pos, _) _ = flip P.setSourceLine (fromEnum $ positionLine pos)
                           $ flip P.setSourceColumn (fromEnum $ positionColumn pos)
                           $ P.initialPos "-"

tokEqualsP :: Token -> Parser Range
tokEqualsP tok = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok', s, e) =
        if tok == tok'
         then Just $ Range s e
         else Nothing

tokNotEqualsP :: Token -> Parser Range
tokNotEqualsP tok = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok', s, e) =
        if tok /= tok'
         then Just $ Range s e
         else Nothing

testNameTok :: (Token, Position, Position) -> Maybe (Text, Range)
testNameTok (tok, s, e) =
  case tok of
    TokWord _ name -> Just (name, Range s e)
    _ -> Nothing

symbolP :: Text -> Parser Range
symbolP op = tokEqualsP $ TokSymbol op

keywordP :: Text -> Parser Range
keywordP keyword = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord False name
            | name == keyword -> Just (Range s e)

        _ -> Nothing

-- onto the actual parsers

dotP :: Parser Range
dotP = symbolP "."

starP :: Parser Range
starP = symbolP "*"

plusP :: Parser Range
plusP = symbolP "+"

minusP :: Parser Range
minusP = symbolP "-"

commaP :: Parser Range
commaP = symbolP ","

openP :: Parser Range
openP = symbolP "("

closeP :: Parser Range
closeP = symbolP ")"

openBracketP :: Parser Range
openBracketP = symbolP "["

closeBracketP :: Parser Range
closeBracketP = symbolP "]"

openAngleP :: Parser Range
openAngleP = symbolP "<"

closeAngleP :: Parser Range
closeAngleP = symbolP ">"

questionMarkP :: Parser Range
questionMarkP = symbolP "?"

stringP :: Parser (ByteString, Range)
stringP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokString string -> Just (string, Range s e)
        _ -> Nothing

binaryLiteralP :: Parser (ByteString, Range)
binaryLiteralP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokBinary bytes -> Just (bytes, Range s e)
        _ -> Nothing

numberP :: Parser (Text, Range)
numberP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokNumber number -> Just (number, Range s e)
        _ -> Nothing

typeNameP :: Parser (Text, Range)
typeNameP = P.choice
    [ P.try $ multiWordTypeP -- the try is for plain old `double`, `time`, or `timestamp`
    , singleWordTypeP
    ]
  where
    multiWordTypeP = P.choice
        [ do
              s <- doubleP
              e <- precisionP
              return ("double precision", s <> e)
        , do
              r <- Left <$> timeP P.<|> Right <$> timestampP
              _ <- withP
              e <- timezoneP
              case r of
                  Left s -> return ("time with time zone", s <> e)
                  Right s -> return ("timestamp with time zone", s <> e)
        ]

    singleWordTypeP = P.tokenPrim showTok posFromTok testNameTok

intervalFieldP :: Parser (Text, Range)
intervalFieldP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord _ name | TL.toLower name `elem` fields -> Just (TL.toLower name, Range s e)
        _ -> Nothing

    fields = ["year", "month", "day", "hour", "minute", "second"]

comparisonOperatorP :: Parser (Text, Range)
comparisonOperatorP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (TokSymbol op, s, e)
        | op `elem` ["=", "<>", "!=", "<", "<=", ">", ">="] = Just (op, Range s e)

    testTok _ = Nothing

comparisonQuantifierP :: Parser (Text, Range)
comparisonQuantifierP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord _ name | TL.toLower name `elem` quantifiers -> Just (TL.toLower name, Range s e)
        _ -> Nothing

    quantifiers = ["all", "some", "any"]

extractUnitP :: Parser (Text, Range)
extractUnitP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord _ name | TL.toLower name `elem` units -> Just (TL.toLower name, Range s e)
        _ -> Nothing

    units = [ "year", "quarter", "month", "week"
            , "day", "day_of_month", "day_of_week", "dow", "day_of_year", "doy"
            , "year_of_week", "yow"
            , "hour", "minute", "second"
            , "timezone_hour", "timezone_minute"
            ]

typedConstantTypeP :: Parser (Text, Range)
typedConstantTypeP = P.choice
    [ P.try $ multiWordTypeP -- the try is for plain old `double`
    , singleWordTypeP
    ]
  where
    multiWordTypeP = do
        s <- doubleP
        e <- precisionP
        return ("double precision", s <> e)

    singleWordTypeP = P.tokenPrim showTok posFromTok testTok

    testTok (tok, s, e) = case tok of
        TokWord _ name | TL.toLower name `elem` types -> Just (TL.toLower name, Range s e)
        _ -> Nothing

    -- generated with some trial and error (and guesswork) based on
    -- https://github.com/prestodb/presto/blob/master/presto-main/src/main/java/com/facebook/presto/type/TypeRegistry.java
    types = [ "boolean", "bigint", "integer", "smallint", "tinyint"
            , "double", "real", "varbinary", "date", "time", "timestamp"
            , "hyperloglog", "p4hyperloglog", "joniregexp", "re2jregexp"
            , "likepattern", "jsonpath", "color", "json", "codepoints"
            , "varchar", "char", "decimal"
            ]

normalFormP :: Parser (Text, Range)
normalFormP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord _ name | TL.toLower name `elem` forms -> Just (TL.toLower name, Range s e)
        _ -> Nothing

    forms = [ "nfc", "nfd", "nfkc", "nfkd" ]

possiblyBareFuncP :: Parser (Text, Range, Bool)
possiblyBareFuncP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord _ name | TL.toLower name `M.member` funcs ->
                           let name' = TL.toLower name
                               r = Range s e
                               Just alwaysBare = M.lookup name' funcs
                            in Just (name', r, alwaysBare)
        _ -> Nothing

    funcs = M.fromList [ ("current_date",      True)
                       , ("current_time",      False)
                       , ("current_timestamp", False)
                       , ("localtime",         False)
                       , ("localtimestamp",    False)
                       ]

castFuncP :: Parser (CastFailureAction, Range)
castFuncP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord _ name | TL.toLower name == "cast" -> Just (CastFailureError, Range s e)
        TokWord _ name | TL.toLower name == "try_cast" -> Just (CastFailureToNull, Range s e)
        _ -> Nothing


-- In presto, databases are called "catalogs".
-- Notionally they are "one coarser than schema"
databaseNameP :: Parser (Text, Range)
databaseNameP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord True name -> Just (name, Range s e)
        TokWord False name
            | wordCanBeSchemaName (wordInfo name) -> Just (name, Range s e)

        _ -> Nothing

schemaNameP :: Parser (Text, Range)
schemaNameP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord True name -> Just (name, Range s e)
        TokWord False name
            | wordCanBeSchemaName (wordInfo name) -> Just (name, Range s e)

        _ -> Nothing

tableNameP :: Parser (Text, Range)
tableNameP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord True name -> Just (name, Range s e)
        TokWord False name
            | wordCanBeTableName (wordInfo name) -> Just (name, Range s e)

        _ -> Nothing

columnNameP :: Parser (Text, Range)
columnNameP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord True name -> Just (name, Range s e)
        TokWord False name
            | wordCanBeColumnName (wordInfo name) -> Just (name, Range s e)

        _ -> Nothing

structFieldNameP :: Parser (Text, Range)
structFieldNameP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord True name -> Just (name, Range s e)
        TokWord False name
            | wordCanBeColumnName (wordInfo name) -> Just (name, Range s e)

        _ -> Nothing

paramNameP :: Parser (Text, Range)
paramNameP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord True name -> Just (name, Range s e)
        TokWord False name
            | wordCanBeColumnName (wordInfo name) -> Just (name, Range s e)

        _ -> Nothing

functionNameP :: Parser (Text, Range)
functionNameP = P.tokenPrim showTok posFromTok testTok
  where
    testTok (tok, s, e) = case tok of
        TokWord True name -> Just (name, Range s e)
        TokWord False name
            | wordCanBeFunctionName (wordInfo name) ->
                Just (name, Range s e)

        _ -> Nothing

allP :: Parser Range
allP = keywordP "all"

andP :: Parser Range
andP = keywordP "and"

arrayP :: Parser Range
arrayP = keywordP "array"

asP :: Parser Range
asP = keywordP "as"

ascP :: Parser Range
ascP = keywordP "asc"

atP :: Parser Range
atP = keywordP "at"

bernoulliP :: Parser Range
bernoulliP = keywordP "bernoulli"

betweenP :: Parser Range
betweenP = keywordP "between"

byP :: Parser Range
byP = keywordP "by"

callP :: Parser Range
callP = keywordP "call"

caseP :: Parser Range
caseP = keywordP "case"

crossP :: Parser Range
crossP = keywordP "cross"

cubeP :: Parser Range
cubeP = keywordP "cube"

currentP :: Parser Range
currentP = keywordP "current"

deleteP :: Parser Range
deleteP = keywordP "delete"

descP :: Parser Range
descP = keywordP "desc"

describeP :: Parser Range
describeP = keywordP "describe" P.<|> keywordP "desc"

distinctP :: Parser Range
distinctP = keywordP "distinct"

doubleP :: Parser Range
doubleP = keywordP "double"

dropP :: Parser Range
dropP = keywordP "drop"

elseP :: Parser Range
elseP = keywordP "else"

endP :: Parser Range
endP = keywordP "end"

escapeP :: Parser Range
escapeP = keywordP "escape"

exceptP :: Parser Range
exceptP = keywordP "except"

existsP :: Parser Range
existsP = keywordP "exists"

explainP :: Parser Range
explainP = keywordP "explain"

extractP :: Parser Range
extractP = keywordP "extract"

falseP :: Parser Range
falseP = keywordP "false"

filterP :: Parser Range
filterP = keywordP "filter"

firstP :: Parser Range
firstP = keywordP "first"

followingP :: Parser Range
followingP = keywordP "following"

forP :: Parser Range
forP = keywordP "for"

fromP :: Parser Range
fromP = keywordP "from"

fullP :: Parser Range
fullP = keywordP "full"

grantP :: Parser Range
grantP = keywordP "grant"

groupP :: Parser Range
groupP = keywordP "group"

groupingP :: Parser Range
groupingP = keywordP "grouping"

havingP :: Parser Range
havingP = keywordP "having"

ifP :: Parser Range
ifP = keywordP "if"

intervalP :: Parser Range
intervalP = keywordP "interval"

inP :: Parser Range
inP = keywordP "in"

innerP :: Parser Range
innerP = keywordP "inner"

insertP :: Parser Range
insertP = keywordP "insert"

intersectP :: Parser Range
intersectP = keywordP "intersect"

intoP :: Parser Range
intoP = keywordP "into"

isP :: Parser Range
isP = keywordP "is"

joinP :: Parser Range
joinP = keywordP "join"

lastP :: Parser Range
lastP = keywordP "last"

leftP :: Parser Range
leftP = keywordP "left"

likeP :: Parser Range
likeP = keywordP "like"

limitP :: Parser Range
limitP = keywordP "limit"

mapP :: Parser Range
mapP = keywordP "map"

naturalP :: Parser Range
naturalP = keywordP "natural"

normalizeP :: Parser Range
normalizeP = keywordP "normalize"

notP :: Parser Range
notP = keywordP "not"

nullP :: Parser Range
nullP = keywordP "null"

nullsP :: Parser Range
nullsP = keywordP "nulls"

onP :: Parser Range
onP = keywordP "on"

orP :: Parser Range
orP = keywordP "or"

orderP :: Parser Range
orderP = keywordP "order"

ordinalityP :: Parser Range
ordinalityP = keywordP "ordinality"

outerP :: Parser Range
outerP = keywordP "outer"

overP :: Parser Range
overP = keywordP "over"

partitionP :: Parser Range
partitionP = keywordP "partition"

poissonizedP :: Parser Range
poissonizedP = keywordP "poissonized"

positionP :: Parser Range
positionP = keywordP "position"

precedingP :: Parser Range
precedingP = keywordP "preceding"

precisionP :: Parser Range
precisionP = keywordP "precision"

rangeP :: Parser Range
rangeP = keywordP "range"

revokeP :: Parser Range
revokeP = keywordP "revoke"

rightP :: Parser Range
rightP = keywordP "right"

rollupP :: Parser Range
rollupP = keywordP "rollup"

rowP :: Parser Range
rowP = keywordP "row"

rowsP :: Parser Range
rowsP = keywordP "rows"

selectP :: Parser Range
selectP = keywordP "select"

semicolonP :: Parser Range
semicolonP = symbolP ";"

notSemicolonP :: Parser Range
notSemicolonP = tokNotEqualsP $ TokSymbol ";"

setsP :: Parser Range
setsP = keywordP "sets"

showP :: Parser Range
showP = keywordP "show"

substringP :: Parser Range
substringP = keywordP "substring"

systemP :: Parser Range
systemP = keywordP "system"

tableP :: Parser Range
tableP = keywordP "table"

tableSampleP :: Parser Range
tableSampleP = keywordP "tablesample"

thenP :: Parser Range
thenP = keywordP "then"

timeP :: Parser Range
timeP = keywordP "time"

timestampP :: Parser Range
timestampP = keywordP "timestamp"

timezoneP :: Parser Range
timezoneP = do
    s <- keywordP "time"
    e <- keywordP "zone"
    pure $ s <> e

toP :: Parser Range
toP = keywordP "to"

trueP :: Parser Range
trueP = keywordP "true"

unboundedP :: Parser Range
unboundedP = keywordP "unbounded"

unionP :: Parser Range
unionP = keywordP "union"

unnestP :: Parser Range
unnestP = keywordP "unnest"

usingP :: Parser Range
usingP = keywordP "using"

valuesP :: Parser Range
valuesP = keywordP "values"

viewP :: Parser Range
viewP = keywordP "view"

whenP :: Parser Range
whenP = keywordP "when"

whereP :: Parser Range
whereP = keywordP "where"

withP :: Parser Range
withP = keywordP "with"