] [ WITH TIME ZONE ]
> precision t = try (parens (commaSep integerLiteral)) >>= makeWrap t
> makeWrap (TypeName t) [a] = return $ PrecTypeName t a
> makeWrap (TypeName t) [a,b] = return $ PrecScaleTypeName t a b
> makeWrap _ _ = fail "there must be one or two precision components"
== value expression parens and row ctor
> sparens :: Parser ValueExpr
> sparens =
> ctor <$> parens (commaSep1 valueExpr)
> where
> ctor [a] = Parens a
> ctor as = SpecialOp (Name "rowctor") as
== operator parsing
The 'regular' operators in this parsing and in the abstract syntax are
unary prefix, unary postfix and binary infix operators. The operators
can be symbols (a + b), single keywords (a and b) or multiple keywords
(a is similar to b).
TODO: carefully review the precedences and associativities.
> opTable :: Bool -> [[E.Operator String () Identity ValueExpr]]
> opTable bExpr =
> [[binarySym "." E.AssocLeft]
> ,[prefixSym "+", prefixSym "-"]
> ,[binarySym "^" E.AssocLeft]
> ,[binarySym "*" E.AssocLeft
> ,binarySym "/" E.AssocLeft
> ,binarySym "%" E.AssocLeft]
> ,[binarySym "+" E.AssocLeft
> ,binarySym "-" E.AssocLeft]
> ,[binarySym ">=" E.AssocNone
> ,binarySym "<=" E.AssocNone
> ,binarySym "!=" E.AssocRight
> ,binarySym "<>" E.AssocRight
> ,binarySym "||" E.AssocRight
> ,prefixSym "~"
> ,binarySym "&" E.AssocRight
> ,binarySym "|" E.AssocRight
> ,binaryKeyword "like" E.AssocNone
> ,binaryKeyword "overlaps" E.AssocNone]
> ++ map (`binaryKeywords` E.AssocNone)
> ["not like"
> ,"is similar to"
> ,"is not similar to"
> ,"is distinct from"
> ,"is not distinct from"]
> ++ map postfixKeywords
> ["is null"
> ,"is not null"
> ,"is true"
> ,"is not true"
> ,"is false"
> ,"is not false"
> ,"is unknown"
> ,"is not unknown"]
> ++ [E.Postfix $ try inSuffix,E.Postfix $ try betweenSuffix]
> ]
> ++
> [[binarySym "<" E.AssocNone
> ,binarySym ">" E.AssocNone]
> ,[binarySym "=" E.AssocRight]
> ,[prefixKeyword "not"]]
> ++
> if bExpr then [] else [[binaryKeyword "and" E.AssocLeft]]
> ++
> [[binaryKeyword "or" E.AssocLeft]]
> where
> binarySym nm assoc = binary (try $ symbol_ nm) nm assoc
> binaryKeyword nm assoc = binary (try $ keyword_ nm) nm assoc
> binaryKeywords nm assoc = binary (try $ mapM_ keyword_ (words nm)) nm assoc
> binary p nm assoc =
> E.Infix (p >> return (\a b -> BinOp a (Name nm) b)) assoc
> prefixKeyword nm = prefix (try $ keyword_ nm) nm
> prefixSym nm = prefix (try $ symbol_ nm) nm
> prefix p nm = E.Prefix (p >> return (PrefixOp (Name nm)))
> postfixKeywords nm = postfix (try $ mapM_ keyword_ (words nm)) nm
> postfix p nm = E.Postfix (p >> return (PostfixOp (Name nm)))
== value expressions
TODO:
left factor stuff which starts with identifier
This parses most of the value exprs.The order of the parsers and use
of try is carefully done to make everything work. It is a little
fragile and could at least do with some heavy explanation.
> valueExpr :: Parser ValueExpr
> valueExpr = E.buildExpressionParser (opTable False) term
> term :: Parser ValueExpr
> term = choice [literal
> ,parameter
> ,scase
> ,cast
> ,try specialOpKs
> ,subquery
> ,try app
> ,try star
> ,identifier
> ,sparens]
expose the b expression for window frame clause range between
> valueExprB :: Parser ValueExpr
> valueExprB = E.buildExpressionParser (opTable True) term
-------------------------------------------------
= query expressions
== select lists
> selectItem :: Parser (ValueExpr,Maybe Name)
> selectItem = (,) <$> valueExpr <*> optionMaybe (try als)
> where als = optional (try (keyword_ "as")) *> name
> selectList :: Parser [(ValueExpr,Maybe Name)]
> selectList = commaSep1 selectItem
== from
Here is the rough grammar for joins
tref
(cross | [natural] ([inner] | (left | right | full) [outer])) join
tref
[on expr | using (...)]
> from :: Parser [TableRef]
> from = try (keyword_ "from") *> commaSep1 tref
> where
> tref = nonJoinTref >>= optionSuffix joinTrefSuffix
> nonJoinTref = choice [try (TRQueryExpr <$> parens queryExpr)
> ,TRParens <$> parens tref
> ,TRLateral <$> (try (keyword_ "lateral")
> *> nonJoinTref)
> ,try (TRFunction <$> name
> <*> parens (commaSep valueExpr))
> ,TRSimple <$> name]
> >>= optionSuffix aliasSuffix
> aliasSuffix j = option j (TRAlias j <$> alias)
> joinTrefSuffix t = (do
> nat <- option False $ try (True <$ try (keyword_ "natural"))
> TRJoin t <$> joinType
> <*> nonJoinTref
> <*> optionMaybe (joinCondition nat))
> >>= optionSuffix joinTrefSuffix
> joinType =
> choice [choice
> [JCross <$ try (keyword_ "cross")
> ,JInner <$ try (keyword_ "inner")
> ,choice [JLeft <$ try (keyword_ "left")
> ,JRight <$ try (keyword_ "right")
> ,JFull <$ try (keyword_ "full")]
> <* optional (try $ keyword_ "outer")]
> <* keyword "join"
> ,JInner <$ keyword_ "join"]
> joinCondition nat =
> choice [guard nat >> return JoinNatural
> ,try (keyword_ "on") >>
> JoinOn <$> valueExpr
> ,try (keyword_ "using") >>
> JoinUsing <$> parens (commaSep1 name)
> ]
> alias :: Parser Alias
> alias = Alias <$> try tableAlias <*> try columnAliases
> where
> tableAlias = optional (try $ keyword_ "as") *> name
> columnAliases = optionMaybe $ try $ parens $ commaSep1 name
== simple other parts
Parsers for where, group by, having, order by and limit, which are
pretty trivial.
Here is a helper for parsing a few parts of the query expr (currently
where, having, limit, offset).
> keywordValueExpr :: String -> Parser ValueExpr
> keywordValueExpr k = try (keyword_ k) *> valueExpr
> swhere :: Parser ValueExpr
> swhere = keywordValueExpr "where"
> sgroupBy :: Parser [GroupingExpr]
> sgroupBy = try (keyword_ "group")
> *> keyword_ "by"
> *> commaSep1 groupingExpression
> where
> groupingExpression =
> choice
> [try (keyword_ "cube") >>
> Cube <$> parens (commaSep groupingExpression)
> ,try (keyword_ "rollup") >>
> Rollup <$> parens (commaSep groupingExpression)
> ,GroupingParens <$> parens (commaSep groupingExpression)
> ,try (keyword_ "grouping") >> keyword_ "sets" >>
> GroupingSets <$> parens (commaSep groupingExpression)
> ,SimpleGroup <$> valueExpr
> ]
> having :: Parser ValueExpr
> having = keywordValueExpr "having"
> orderBy :: Parser [SortSpec]
> orderBy = try (keyword_ "order") *> keyword_ "by" *> commaSep1 ob
> where
> ob = SortSpec
> <$> valueExpr
> <*> option Asc (choice [Asc <$ keyword_ "asc"
> ,Desc <$ keyword_ "desc"])
> <*> option NullsOrderDefault
> (try (keyword_ "nulls" >>
> choice [NullsFirst <$ keyword "first"
> ,NullsLast <$ keyword "last"]))
allows offset and fetch in either order
+ postgresql offset without row(s) and limit instead of fetch also
> offsetFetch :: Parser (Maybe ValueExpr, Maybe ValueExpr)
> offsetFetch = permute ((,) <$?> (Nothing, Just <$> offset)
> <|?> (Nothing, Just <$> fetch))
> offset :: Parser ValueExpr
> offset = try (keyword_ "offset") *> valueExpr
> <* option () (try $ choice [try (keyword_ "rows"),keyword_ "row"])
> fetch :: Parser ValueExpr
> fetch = choice [ansiFetch, limit]
> where
> ansiFetch = try (keyword_ "fetch") >>
> choice [keyword_ "first",keyword_ "next"]
> *> valueExpr
> <* choice [keyword_ "rows",keyword_ "row"]
> <* keyword_ "only"
> limit = try (keyword_ "limit") *> valueExpr
== common table expressions
> with :: Parser QueryExpr
> with = try (keyword_ "with") >>
> With <$> option False (try (True <$ keyword_ "recursive"))
> <*> commaSep1 withQuery <*> queryExpr
> where
> withQuery =
> (,) <$> (alias <* optional (try $ keyword_ "as"))
> <*> parens queryExpr
== query expression
This parser parses any query expression variant: normal select, cte,
and union, etc..
> queryExpr :: Parser QueryExpr
> queryExpr =
> choice [with
> ,choice [values,table, select]
> >>= optionSuffix queryExprSuffix]
> where
> select = try (keyword_ "select") >>
> mkSelect
> <$> (fromMaybe All <$> duplicates)
> <*> selectList
> <*> option [] from
> <*> optionMaybe swhere
> <*> option [] sgroupBy
> <*> optionMaybe having
> <*> option [] orderBy
> <*> offsetFetch
> mkSelect d sl f w g h od (ofs,fe) =
> Select d sl f w g h od ofs fe
> values = try (keyword_ "values")
> >> Values <$> commaSep (parens (commaSep valueExpr))
> table = try (keyword_ "table") >> Table <$> name
> queryExprSuffix :: QueryExpr -> Parser QueryExpr
> queryExprSuffix qe =
> (CombineQueryExpr qe
> <$> try (choice
> [Union <$ keyword_ "union"
> ,Intersect <$ keyword_ "intersect"
> ,Except <$ keyword_ "except"])
> <*> (fromMaybe All <$> duplicates)
> <*> option Respectively
> (try (Corresponding <$ keyword_ "corresponding"))
> <*> queryExpr)
> >>= optionSuffix queryExprSuffix
wrapper for query expr which ignores optional trailing semicolon.
> topLevelQueryExpr :: Parser QueryExpr
> topLevelQueryExpr =
> queryExpr >>= optionSuffix ((symbol ";" *>) . return)
wrapper to parse a series of query exprs from a single source. They
must be separated by semicolon, but for the last expression, the
trailing semicolon is optional.
> queryExprs :: Parser [QueryExpr]
> queryExprs =
> (:[]) <$> queryExpr
> >>= optionSuffix ((symbol ";" *>) . return)
> >>= optionSuffix (\p -> (p++) <$> queryExprs)
------------------------------------------------
= lexing parsers
The lexing is a bit 'virtual', in the usual parsec style. The
convention in this file is to put all the parsers which access
characters directly or indirectly here (i.e. ones which use char,
string, digit, etc.), except for the parsers which only indirectly
access them via these functions, if you follow?
> symbol :: String -> Parser String
> symbol s = string s
>
> <* whiteSpace
> symbol_ :: String -> Parser ()
> symbol_ s = symbol s *> return ()
TODO: now that keyword has try in it, a lot of the trys above can be
removed
> keyword :: String -> Parser String
> keyword s = try $ do
> i <- identifierRaw
> guard (map toLower i == map toLower s)
> return i
> keyword_ :: String -> Parser ()
> keyword_ s = keyword s *> return ()
Identifiers are very simple at the moment: start with a letter or
underscore, and continue with letter, underscore or digit. It doesn't
support quoting other other sorts of identifiers yet. There is a
blacklist of keywords which aren't supported as identifiers.
the identifier raw doesn't check the blacklist since it is used by the
keyword parser also
> identifierRaw :: Parser String
> identifierRaw = (:) <$> letterOrUnderscore
> <*> many letterDigitOrUnderscore <* whiteSpace
> where
> letterOrUnderscore = char '_' <|> letter
> letterDigitOrUnderscore = char '_' <|> alphaNum
> identifierString :: Parser String
> identifierString = do
> s <- identifierRaw
> guard (map toLower s `notElem` blacklist)
> return s
> blacklist :: [String]
> blacklist =
> ["select", "as", "from", "where", "having", "group", "order"
> ,"limit", "offset", "fetch"
> ,"inner", "left", "right", "full", "natural", "join"
> ,"cross", "on", "using", "lateral"
> ,"when", "then", "case", "end", "in"
> ,"except", "intersect", "union"]
These blacklisted names are mostly needed when we parse something with
an optional alias, e.g. select a a from t. If we write select a from
t, we have to make sure the from isn't parsed as an alias. I'm not
sure what other places strictly need the blacklist, and in theory it
could be tuned differently for each place the identifierString/
identifier parsers are used to only blacklist the bare minimum.
> quotedIdentifier :: Parser String
> quotedIdentifier = char '"' *> manyTill anyChar (symbol_ "\"")
String literals: limited at the moment, no escaping \' or other
variations.
> stringLiteral :: Parser String
> stringLiteral = (char '\'' *> manyTill anyChar (char '\'')
> >>= optionSuffix moreString) <* whiteSpace
> where
> moreString s0 = try $ do
> void $ char '\''
> s <- manyTill anyChar (char '\'')
> optionSuffix moreString (s0 ++ "'" ++ s)
number literals
here is the rough grammar target:
digits
digits.[digits][e[+-]digits]
[digits].digits[e[+-]digits]
digitse[+-]digits
numbers are parsed to strings, not to a numeric type. This is to avoid
making a decision on how to represent numbers, the client code can
make this choice.
> numberLiteral :: Parser String
> numberLiteral =
> choice [int
> >>= optionSuffix dot
> >>= optionSuffix fracts
> >>= optionSuffix expon
> ,fract "" >>= optionSuffix expon]
> <* whiteSpace
> where
> int = many1 digit
> fract p = dot p >>= fracts
> dot p = (p++) <$> string "."
> fracts p = (p++) <$> int
> expon p = concat <$> sequence
> [return p
> ,string "e"
> ,option "" (string "+" <|> string "-")
> ,int]
lexer for integer literals which appear in some places in SQL
> integerLiteral :: Parser Int
> integerLiteral = read <$> many1 digit <* whiteSpace
whitespace parser which skips comments also
> whiteSpace :: Parser ()
> whiteSpace =
> choice [simpleWhiteSpace *> whiteSpace
> ,lineComment *> whiteSpace
> ,blockComment *> whiteSpace
> ,return ()]
> where
> lineComment = try (string "--")
> *> manyTill anyChar (void (char '\n') <|> eof)
> blockComment =
> try (string "/*")
>
> *> manyTill anyChar (try $ string "*/")
>
> simpleWhiteSpace = void $ many1 (oneOf " \t\n")
= generic parser helpers
a possible issue with the option suffix is that it enforces left
associativity when chaining it recursively. Have to review
all these uses and figure out if any should be right associative
instead, and create an alternative suffix parser
> optionSuffix :: (a -> Parser a) -> a -> Parser a
> optionSuffix p a = option a (p a)
> parens :: Parser a -> Parser a
> parens = between (symbol_ "(") (symbol_ ")")
> commaSep :: Parser a -> Parser [a]
> commaSep = (`sepBy` symbol_ ",")
> commaSep1 :: Parser a -> Parser [a]
> commaSep1 = (`sepBy1` symbol_ ",")
--------------------------------------------
= helper functions
> setPos :: Maybe (Int,Int) -> Parser ()
> setPos Nothing = return ()
> setPos (Just (l,c)) = fmap f getPosition >>= setPosition
> where f = flip setSourceColumn c
> . flip setSourceLine l
> convParseError :: String -> P.ParseError -> ParseError
> convParseError src e =
> ParseError
> {peErrorString = show e
> ,peFilename = sourceName p
> ,pePosition = (sourceLine p, sourceColumn p)
> ,peFormattedError = formatError src e
> }
> where
> p = errorPos e
format the error more nicely: emacs format for positioning, plus
context
> formatError :: String -> P.ParseError -> String
> formatError src e =
> sourceName p ++ ":" ++ show (sourceLine p)
> ++ ":" ++ show (sourceColumn p) ++ ":"
> ++ context
> ++ show e
> where
> context =
> let lns = take 1 $ drop (sourceLine p 1) $ lines src
> in case lns of
> [x] -> "\n" ++ x ++ "\n"
> ++ replicate (sourceColumn p 1) ' ' ++ "^\n"
> _ -> ""
> p = errorPos e