{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE OverloadedStrings #-}
module Snap.Internal.Parsing where
import Control.Applicative (Alternative ((<|>)), Applicative (pure, (*>), (<*)), liftA2, (<$>))
import Control.Arrow (first, second)
import Control.Monad (Monad (return), MonadPlus (mzero), liftM, when)
import Data.Attoparsec.ByteString.Char8 (IResult (Done, Fail, Partial), Parser, Result, anyChar, char, choice, decimal, endOfInput, feed, inClass, isDigit, isSpace, letter_ascii, many', match, option, parse, satisfy, skipSpace, skipWhile, string, take, takeTill, takeWhile, sepBy')
import qualified Data.Attoparsec.ByteString.Char8 as AP
import Data.Bits (Bits (unsafeShiftL, (.&.), (.|.)))
import Data.ByteString.Builder (Builder, byteString, char8, toLazyByteString, word8)
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as S
import Data.ByteString.Internal (c2w, w2c)
import qualified Data.ByteString.Lazy.Char8 as L
import Data.CaseInsensitive (CI)
import qualified Data.CaseInsensitive as CI (mk)
import Data.Char (Char, intToDigit, isAlpha, isAlphaNum, isAscii, isControl, isHexDigit, ord)
import Data.Int (Int64)
import Data.List (concat, intercalate, intersperse)
import Data.Map (Map)
import qualified Data.Map as Map (empty, insertWith, toList)
import Data.Maybe (Maybe (..), maybe)
import Data.Monoid (Monoid (mconcat, mempty), (<>))
import Data.Word (Word8)
import GHC.Exts (Int (I#), uncheckedShiftRL#, word2Int#)
import GHC.Word (Word8 (..))
import Prelude (Bool (..), Either (..), Enum (fromEnum, toEnum), Eq (..), Num (..), Ord (..), String, and, any, concatMap, elem, error, filter, flip, foldr, fst, id, map, not, otherwise, show, snd, ($), ($!), (&&), (++), (.), (||))
import Snap.Internal.Http.Types (Cookie (Cookie))
{-# INLINE fullyParse #-}
fullyParse :: ByteString -> Parser a -> Either String a
fullyParse = fullyParse' parse feed
{-# INLINE (<?>) #-}
(<?>) :: Parser a -> String -> Parser a
(<?>) a !b = (AP.<?>) a b
infix 0 <?>
{-# INLINE fullyParse' #-}
fullyParse' :: (Parser a -> ByteString -> Result a)
-> (Result a -> ByteString -> Result a)
-> ByteString
-> Parser a
-> Either String a
fullyParse' parseFunc feedFunc s p =
case r' of
(Fail _ context e) -> Left $ concat [ "Parsing "
, intercalate "/" context
, ": "
, e
, "."
]
(Partial _) -> Left "parse failed"
(Done _ x) -> Right x
where
r = parseFunc p s
r' = feedFunc r ""
parseNum :: Parser Int64
parseNum = decimal
untilEOL :: Parser ByteString
untilEOL = takeWhile notend <?> "untilEOL"
where
notend c = not $ c == '\r' || c == '\n'
crlf :: Parser ByteString
crlf = string "\r\n" <?> "crlf"
toTableList :: (Char -> Bool) -> [Char]
toTableList f = l
where
g c = c /= '-' && f c
!l1 = filter g $ map w2c [0..255]
!l0 = if f '-' then ['-'] else []
!l = l0 ++ l1
{-# INLINE toTableList #-}
toTable :: (Char -> Bool) -> (Char -> Bool)
toTable = inClass . toTableList
{-# INLINE toTable #-}
skipFieldChars :: Parser ()
skipFieldChars = skipWhile isFieldChar
isFieldChar :: Char -> Bool
isFieldChar = toTable f
where
f c = (isDigit c) || (isAlpha c) || c == '-' || c == '_'
pHeaders :: Parser [(ByteString, ByteString)]
pHeaders = many' header <?> "headers"
where
slurp p = fst <$> match p
header = {-# SCC "pHeaders/header" #-}
liftA2 (,)
fieldName
(char ':' *> skipSpace *> contents)
fieldName = {-# SCC "pHeaders/fieldName" #-}
slurp (letter_ascii *> skipFieldChars)
contents = {-# SCC "pHeaders/contents" #-}
liftA2 S.append
(untilEOL <* crlf)
(continuation <|> pure S.empty)
isLeadingWS w = {-# SCC "pHeaders/isLeadingWS" #-}
w == ' ' || w == '\t'
leadingWhiteSpace = {-# SCC "pHeaders/leadingWhiteSpace" #-}
skipWhile1 isLeadingWS
continuation = {-# SCC "pHeaders/continuation" #-}
liftA2 S.cons
(leadingWhiteSpace *> pure ' ')
contents
skipWhile1 f = satisfy f *> skipWhile f
pWord :: Parser ByteString
pWord = pWord' isRFCText
pWord' :: (Char -> Bool) -> Parser ByteString
pWord' charPred = pQuotedString' charPred <|> (takeWhile (/= ';'))
pQuotedString :: Parser ByteString
pQuotedString = pQuotedString' isRFCText
pQuotedString' :: (Char -> Bool) -> Parser ByteString
pQuotedString' charPred = q *> quotedText <* q
where
quotedText = (S.concat . L.toChunks . toLazyByteString) <$> f mempty
f soFar = do
t <- takeWhile qdtext
let soFar' = soFar <> byteString t
choice [ string "\\\"" *> f (soFar' <> char8 '"')
, pure soFar' ]
q = char '"'
qdtext = matchAll [ charPred, (/= '"'), (/= '\\') ]
{-# INLINE isRFCText #-}
isRFCText :: Char -> Bool
isRFCText = not . isControl
{-# INLINE matchAll #-}
matchAll :: [ Char -> Bool ] -> Char -> Bool
matchAll x c = and $ map ($ c) x
pAvPairs :: Parser [(ByteString, ByteString)]
pAvPairs = do
a <- pAvPair
b <- many' (skipSpace *> char ';' *> skipSpace *> pAvPair)
return $! a:b
{-# INLINE pAvPair #-}
pAvPair :: Parser (ByteString, ByteString)
pAvPair = do
key <- pToken <* skipSpace
val <- liftM trim (option "" $ char '=' *> skipSpace *> pWord)
return $! (key, val)
pParameter :: Parser (ByteString, ByteString)
pParameter = pParameter' isRFCText
pParameter' :: (Char -> Bool) -> Parser (ByteString, ByteString)
pParameter' valueCharPred = parser <?> "pParameter'"
where
parser = do
key <- pToken <* skipSpace
val <- liftM trim (char '=' *> skipSpace *> pWord' valueCharPred)
return $! (trim key, val)
{-# INLINE trim #-}
trim :: ByteString -> ByteString
trim = snd . S.span isSpace . fst . S.spanEnd isSpace
pValueWithParameters :: Parser (ByteString, [(CI ByteString, ByteString)])
pValueWithParameters = pValueWithParameters' isRFCText
pValueWithParameters' :: (Char -> Bool) -> Parser (ByteString, [(CI ByteString, ByteString)])
pValueWithParameters' valueCharPred = parser <?> "pValueWithParameters'"
where
parser = do
value <- liftM trim (skipSpace *> takeWhile (/= ';'))
params <- many' pParam
endOfInput
return (value, map (first CI.mk) params)
pParam = skipSpace *> char ';' *> skipSpace *> pParameter' valueCharPred
pContentTypeWithParameters :: Parser ( ByteString
, [(CI ByteString, ByteString)] )
pContentTypeWithParameters = parser <?> "pContentTypeWithParameters"
where
parser = do
value <- liftM trim (skipSpace *> takeWhile (not . isSep))
params <- many' (skipSpace *> satisfy isSep *> skipSpace *> pParameter)
endOfInput
return $! (value, map (first CI.mk) params)
isSep c = c == ';' || c == ','
{-# INLINE pToken #-}
pToken :: Parser ByteString
pToken = takeWhile isToken
{-# INLINE isToken #-}
isToken :: Char -> Bool
isToken = toTable f
where
f = matchAll [ isAscii
, not . isControl
, not . isSpace
, not . flip elem [ '(', ')', '<', '>', '@', ',', ';'
, ':', '\\', '\"', '/', '[', ']'
, '?', '=', '{', '}' ]
]
{-# INLINE pTokens #-}
pTokens :: Parser [ByteString]
pTokens = (skipSpace *> pToken <* skipSpace) `sepBy'` char ','
{-# INLINE parseToCompletion #-}
parseToCompletion :: Parser a -> ByteString -> Maybe a
parseToCompletion p s = toResult $ finish r
where
r = parse p s
toResult (Done _ c) = Just c
toResult _ = Nothing
type DList a = [a] -> [a]
pUrlEscaped :: Parser ByteString
pUrlEscaped = do
sq <- nextChunk id
return $! S.concat $ sq []
where
nextChunk :: DList ByteString -> Parser (DList ByteString)
nextChunk !s = (endOfInput *> pure s) <|> do
c <- anyChar
case c of
'+' -> plusSpace s
'%' -> percentEncoded s
_ -> unEncoded c s
percentEncoded :: DList ByteString -> Parser (DList ByteString)
percentEncoded !l = do
hx <- take 2
when (S.length hx /= 2 || (not $ S.all isHexDigit hx)) $
mzero
let code = w2c ((unsafeFromHex hx) :: Word8)
nextChunk $ l . ((S.singleton code) :)
unEncoded :: Char -> DList ByteString -> Parser (DList ByteString)
unEncoded !c !l' = do
let l = l' . ((S.singleton c) :)
bs <- takeTill (flip elem ['%', '+'])
if S.null bs
then nextChunk l
else nextChunk $ l . (bs :)
plusSpace :: DList ByteString -> Parser (DList ByteString)
plusSpace l = nextChunk (l . ((S.singleton ' ') :))
urlDecode :: ByteString -> Maybe ByteString
urlDecode = parseToCompletion pUrlEscaped
{-# INLINE urlDecode #-}
urlEncode :: ByteString -> ByteString
urlEncode = S.concat . L.toChunks . toLazyByteString . urlEncodeBuilder
{-# INLINE urlEncode #-}
urlEncodeBuilder :: ByteString -> Builder
urlEncodeBuilder = go mempty
where
go !b !s = maybe b' esc (S.uncons y)
where
(x,y) = S.span urlEncodeClean s
b' = b <> byteString x
esc (c,r) = let b'' = if c == ' '
then b' <> char8 '+'
else b' <> hexd c
in go b'' r
urlEncodeClean :: Char -> Bool
urlEncodeClean = toTable f
where
f c = any ($ c) [\c' -> isAscii c' && isAlphaNum c'
, flip elem [ '$', '_', '-', '.', '!'
, '*' , '\'', '(', ')', ',' ]]
hexd :: Char -> Builder
hexd c0 = char8 '%' <> word8 hi <> word8 low
where
!c = c2w c0
toDigit = c2w . intToDigit
!low = toDigit $ fromEnum $ c .&. 0xf
!hi = toDigit $ (c .&. 0xf0) `shiftr` 4
shiftr (W8# a#) (I# b#) = I# (word2Int# (uncheckedShiftRL# a# b#))
finish :: Result a -> Result a
finish (Partial f) = flip feed "" $ f ""
finish x = x
parseUrlEncoded :: ByteString -> Map ByteString [ByteString]
parseUrlEncoded s = foldr ins Map.empty decoded
where
ins (!k,v) !m = Map.insertWith (++) k [v] m
parts :: [(ByteString,ByteString)]
parts = map breakApart $
S.splitWith (\c -> c == '&' || c == ';') s
breakApart = (second (S.drop 1)) . S.break (== '=')
urldecode = parseToCompletion pUrlEscaped
decodeOne (a,b) = do
!a' <- urldecode a
!b' <- urldecode b
return $! (a',b')
decoded = go id parts
where
go !dl [] = dl []
go !dl (x:xs) = maybe (go dl xs)
(\p -> go (dl . (p:)) xs)
(decodeOne x)
buildUrlEncoded :: Map ByteString [ByteString] -> Builder
buildUrlEncoded m = mconcat builders
where
builders = intersperse (char8 '&') $
concatMap encodeVS $ Map.toList m
encodeVS (k,vs) = map (encodeOne k) vs
encodeOne k v = mconcat [ urlEncodeBuilder k
, char8 '='
, urlEncodeBuilder v ]
printUrlEncoded :: Map ByteString [ByteString] -> ByteString
printUrlEncoded = S.concat . L.toChunks . toLazyByteString . buildUrlEncoded
pCookies :: Parser [Cookie]
pCookies = do
kvps <- pAvPairs
return $! map toCookie $ filter (not . S.isPrefixOf "$" . fst) kvps
where
toCookie (nm,val) = Cookie nm val Nothing Nothing Nothing False False
parseCookie :: ByteString -> Maybe [Cookie]
parseCookie = parseToCompletion pCookies
unsafeFromHex :: (Enum a, Num a, Bits a) => ByteString -> a
unsafeFromHex = S.foldl' f 0
where
#if MIN_VERSION_base(4,5,0)
sl = unsafeShiftL
#else
sl = shiftL
#endif
f !cnt !i = sl cnt 4 .|. nybble i
nybble c | c >= '0' && c <= '9' = toEnum $! fromEnum c - fromEnum '0'
| c >= 'a' && c <= 'f' = toEnum $! 10 + fromEnum c - fromEnum 'a'
| c >= 'A' && c <= 'F' = toEnum $! 10 + fromEnum c - fromEnum 'A'
| otherwise = error $ "bad hex digit: " ++ show c
{-# INLINE unsafeFromHex #-}
unsafeFromNat :: (Enum a, Num a, Bits a) => ByteString -> a
unsafeFromNat = S.foldl' f 0
where
zero = ord '0'
f !cnt !i = cnt * 10 + toEnum (digitToInt i)
digitToInt c = if d >= 0 && d <= 9
then d
else error $ "bad digit: '" ++ [c] ++ "'"
where
!d = ord c - zero
{-# INLINE unsafeFromNat #-}