module Hunt.Query.Language.Grammar
(
Query (..)
, BinOp (..)
, TextSearchType (..)
, escapeChar
, notWordChar
, optimize
, checkWith
, extractTerms
, printQuery
)
where
import Control.Applicative
import Control.Monad
import Data.Aeson
import Data.Binary
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Binary ()
import Hunt.Common.BasicTypes as BTy
import Text.Read (readMaybe)
data Query
= QWord TextSearchType Text
| QFullWord TextSearchType Text
| QPhrase TextSearchType Text
| QContext [Context] Query
| QBinary BinOp Query Query
| QSeq BinOp [Query]
| QBoost Weight Query
| QRange Text Text
deriving (Eq, Show)
data TextSearchType
= QCase
| QNoCase
| QFuzzy
deriving (Eq, Show)
data BinOp
= And
| Or
| AndNot
| Phrase
| Follow Int
| Near Int
deriving (Eq, Show)
instance ToJSON Query where
toJSON o = case o of
QWord op w -> object . ty "word" $ [ "op" .= op, "word" .= w ]
QFullWord op w -> object . ty "fullword" $ [ "op" .= op, "word" .= w ]
QPhrase op s -> object . ty "phrase" $ [ "op" .= op, "phrase" .= s ]
QContext c q -> object . ty "context" $ [ "contexts" .= c , "query" .= q ]
QBinary op q1 q2 -> object . ty' op $ [ "query1" .= q1, "query2" .= q2 ]
QSeq op qs -> object . ty "seq" $ [ "op" .= op, "args" .= qs ]
QBoost w q -> object . ty "boost" $ [ "weight" .= w, "query" .= q ]
QRange l u -> object . ty "range" $ [ "lower" .= l, "upper" .= u ]
where
ty' t = (:) ("type" .= t)
ty t = ty' (t :: Text)
instance FromJSON Query where
parseJSON (Object o) = do
t <- o .: "type"
case (t :: Text) of
"word"
-> QWord <$> (o .: "op") <*> (o .: "word")
"fullword"
-> QFullWord <$> (o .: "op") <*> (o .: "word")
"phrase"
-> QPhrase <$> (o .: "op") <*> (o .: "phrase")
"context"
-> QContext <$> (o .: "contexts") <*> (o .: "query")
"boost"
-> QBoost <$> (o .: "weight") <*> (o .: "query")
"range"
-> QRange <$> (o .: "lower") <*> (o .: "upper")
"and"
-> bin And
"or"
-> bin Or
"and not"
-> bin AndNot
"seq"
-> QSeq <$> (o .: "op") <*> (o .: "args")
_ -> mzero
where
bin op
= QBinary op <$> (o .: "query1") <*> (o .: "query2")
parseJSON _ = mzero
instance ToJSON TextSearchType where
toJSON o = case o of
QCase -> "case"
QNoCase -> "nocase"
QFuzzy -> "fuzzy"
instance FromJSON TextSearchType where
parseJSON (String s)
= case s of
"case" -> return QCase
"nocase" -> return QNoCase
"fuzzy" -> return QFuzzy
_ -> mzero
parseJSON _ = mzero
instance ToJSON BinOp where
toJSON o = case o of
And -> "and"
Or -> "or"
AndNot -> "and not"
Phrase -> "phrase"
Follow d -> String $ "follow " <> T.pack (show d)
Near d -> String $ "near " <> T.pack (show d)
instance FromJSON BinOp where
parseJSON (String s)
= case T.words s of
["and"] -> return And
["or"] -> return Or
["and", "not"] -> return AndNot
["phrase"] -> return Phrase
["follow", d] -> maybe mzero (return . Follow) . readMaybe . T.unpack $ d
["near", d] -> maybe mzero (return . Near ) . readMaybe . T.unpack $ d
_ -> mzero
parseJSON _ = mzero
instance Binary Query where
put (QWord op s) = put (0 :: Word8) >> put op >> put s
put (QFullWord op s) = put (7 :: Word8) >> put op >> put s
put (QPhrase op s) = put (1 :: Word8) >> put op >> put s
put (QContext c q) = put (2 :: Word8) >> put c >> put q
put (QBinary o q1 q2) = put (4 :: Word8) >> put o >> put q1 >> put q2
put (QSeq o qs) = put (8 :: Word8) >> put o >> put qs
put (QBoost w q) = put (5 :: Word8) >> put w >> put q
put (QRange l u) = put (6 :: Word8) >> put l >> put u
get = do
tag <- getWord8
case tag of
0 -> QWord <$> get <*> get
7 -> QFullWord <$> get <*> get
1 -> QPhrase <$> get <*> get
2 -> QContext <$> get <*> get
4 -> QBinary <$> get <*> get <*> get
8 -> QSeq <$> get <*> get
5 -> QBoost <$> get <*> get
6 -> QRange <$> get <*> get
_ -> fail "Error while decoding Query"
instance Binary TextSearchType where
put QCase = put (0 :: Word8)
put QNoCase = put (1 :: Word8)
put QFuzzy = put (2 :: Word8)
get = do
tag <- getWord8
case tag of
0 -> return QCase
1 -> return QNoCase
2 -> return QFuzzy
_ -> fail "Error while decoding BinOp"
instance Binary BinOp where
put And = put (0 :: Word8)
put Or = put (1 :: Word8)
put AndNot = put (2 :: Word8)
put Phrase = put (3 :: Word8)
put (Follow d) = put (4 :: Word8) >> put d
put (Near d) = put (5 :: Word8) >> put d
get = do
tag <- getWord8
case tag of
0 -> return And
1 -> return Or
2 -> return AndNot
3 -> return Phrase
4 -> Follow <$> get
5 -> Near <$> get
_ -> fail "Error while decoding BinOp"
notWordChar :: String
notWordChar = escapeChar : "\"')([]^ \n\r\t"
escapeChar :: Char
escapeChar = '\\'
optimize :: Query -> Query
optimize q@(QBinary And (QWord QNoCase q1) (QWord QNoCase q2))
| T.toLower q1 `T.isPrefixOf` T.toLower q2 = QWord QNoCase q2
| T.toLower q2 `T.isPrefixOf` T.toLower q1 = QWord QNoCase q1
| otherwise = q
optimize q@(QBinary And (QWord QCase q1) (QWord QCase q2))
| q1 `T.isPrefixOf` q2 = QWord QCase q2
| q2 `T.isPrefixOf` q1 = QWord QCase q1
| otherwise = q
optimize q@(QBinary Or (QWord QNoCase q1) (QWord QNoCase q2))
| T.toLower q1 `T.isPrefixOf` T.toLower q2 = QWord QNoCase q1
| T.toLower q2 `T.isPrefixOf` T.toLower q1 = QWord QNoCase q2
| otherwise = q
optimize q@(QBinary Or (QWord QCase q1) (QWord QCase q2))
| q1 `T.isPrefixOf` q2 = QWord QCase q1
| q2 `T.isPrefixOf` q1 = QWord QCase q2
| otherwise = q
optimize (QBinary And q1 q2) = QBinary And (optimize q1) (optimize q2)
optimize (QBinary Or q1 q2) = QBinary Or (optimize q1) (optimize q2)
optimize (QBinary AndNot q1 q2) = QBinary AndNot (optimize q1) (optimize q2)
optimize (QContext cs q) = QContext cs (optimize q)
optimize (QBoost w q) = QBoost w (optimize q)
optimize q = q
checkWith :: (Text -> Bool) -> Query -> Bool
checkWith f (QWord _ s) = f s
checkWith f (QFullWord _ s) = f s
checkWith f (QPhrase _ s) = f s
checkWith f (QBinary _ q1 q2) = checkWith f q1 && checkWith f q2
checkWith f (QSeq _ qs) = and $ map (checkWith f) qs
checkWith f (QContext _ q) = checkWith f q
checkWith f (QBoost _ q) = checkWith f q
checkWith f (QRange s1 s2) = f s1 && f s2
extractTerms :: Query -> [Text]
extractTerms (QWord _ s) = [s]
extractTerms (QFullWord _ s) = [s]
extractTerms (QContext _ q) = extractTerms q
extractTerms (QBinary _ q1 q2) = extractTerms q1 ++ extractTerms q2
extractTerms _ = []
printQuery :: Query -> Text
printQuery (QWord QNoCase w)
= printWord w
printQuery (QWord QCase w)
= "!" <> printWord w
printQuery (QWord QFuzzy w)
= "~" <> printWord w
printQuery (QFullWord QNoCase w)
= printPhrase w
printQuery (QFullWord QCase w)
= "!" <> printPhrase w
printQuery (QFullWord QFuzzy w)
= "~" <> printPhrase w
printQuery (QPhrase _ w)
= printPhrase w
printQuery (QContext [] w)
= printQPar w
printQuery (QContext cs' w)
= printCs <> ":(" <> (printQPar w) <> ")"
where
printCs = foldr1 (\l r -> l <> "," <> r) cs'
printQuery (QBinary o l r)
= (printQPar l) <> (printOp o) <> (printQPar r)
printQuery (QSeq _ [])
= ""
printQuery (QSeq _ [q])
= printQuery q
printQuery (QSeq o qs)
= foldr1 (\ res arg -> res <> printOp o <> arg) $
map printQPar qs
printQuery (QBoost w q)
= (printQPar q) <> "^" <> (T.pack $ show $ unScore $ toDefScore $ w)
printQuery (QRange l u)
= "[" <> l <> " TO " <> u <> "]"
printOp :: BinOp -> Text
printOp And = " "
printOp Or = " OR "
printOp AndNot = " AND NOT "
printOp Phrase = " ++ "
printOp (Follow d) = " FOLLOW " <> (T.pack $ show d) <> " "
printOp (Near d) = " NEAR " <> (T.pack $ show d) <> " "
printQPar :: Query -> Text
printQPar q@QWord{} = printQuery q
printQPar q@QFullWord{} = printQuery q
printQPar q@QPhrase{} = printQuery q
printQPar q@QRange{} = printQuery q
printQPar q@QContext{} = printQuery q
printQPar q = "(" <> (printQuery q) <> ")"
printPhrase :: Text -> Text
printPhrase w
= "\"" <> escapeWord toBeQuoted w <> "\""
where
toBeQuoted = (== '\"')
printWord :: Text -> Text
printWord w
| T.any toBeQuoted w = "'" <> escapeWord (== '\'') w <> "'"
| otherwise = w
where
toBeQuoted c = elem c $ notWordChar
escapeWord :: (Char -> Bool) -> Text -> Text
escapeWord p t
= T.concatMap esc t
where
esc c
| p c = T.pack ('\\' : c : [])
| otherwise = T.singleton c