{-# LANGUAGE LambdaCase #-} module Http.Header ( Header (..) , decodeMany , parser , parserSmallArray , builder , builderSmallArray ) where import Data.Bytes (Bytes) import Data.Bytes.Builder (Builder) import Data.Bytes.Parser (Parser) import Data.Bytes.Types (Bytes (Bytes)) import Data.Primitive (ByteArray (ByteArray), SmallArray, SmallMutableArray) import Data.Text (Text) import Data.Bytes qualified as Bytes import Data.Bytes.Builder qualified as Builder import Data.Bytes.Parser qualified as Parser import Data.Bytes.Parser.Latin qualified as Latin import Data.Bytes.Text.Utf8 qualified as Utf8 import Data.Primitive qualified as PM import Data.Text.Array qualified import Data.Text.Internal qualified as Text {- | An HTTP header. This type does not enforce a restricted character set. If, for example, the user creates a header whose key has a colon character, the resulting request will be malformed. -} data Header = Header { name :: {-# UNPACK #-} !Text , value :: {-# UNPACK #-} !Text } deriving (Eq, Show) uninitializedHeader :: Header {-# NOINLINE uninitializedHeader #-} uninitializedHeader = errorWithoutStackTrace "parserHeaders: uninitialized header" {- | Parse headers. Expects two CRLF sequences in a row at the end. Fails if leftovers are encountered. -} decodeMany :: Int -> Bytes -> Maybe (SmallArray Header) decodeMany !n !b = Parser.parseBytesMaybe (parserSmallArray n <* Parser.endOfInput ()) b -- Parse headers. Stops after encountering two CRLF sequences in -- a row. parserSmallArray :: Int -> -- maximum number of headers allowed, recommended 128 Parser () s (SmallArray Header) parserSmallArray !n = do dst <- Parser.effect (PM.newSmallArray n uninitializedHeader) parserHeaderStep 0 n dst parserHeaderStep :: Int -> -- index Int -> -- remaining length SmallMutableArray s Header -> Parser () s (SmallArray Header) parserHeaderStep !ix !n !dst = Latin.trySatisfy (== '\r') >>= \case True -> do Latin.char () '\n' Parser.effect $ do PM.shrinkSmallMutableArray dst ix PM.unsafeFreezeSmallArray dst False -> if n > 0 then do header <- parser Parser.effect (PM.writeSmallArray dst ix header) parserHeaderStep (ix + 1) (n - 1) dst else Parser.fail () {- | Parse a single HTTP header including the trailing CRLF sequence. From RFC 7230: > header-field = field-name ":" OWS field-value OWS > field-name = token > field-value = *( field-content / obs-fold ) > field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] > field-vchar = VCHAR / obs-text -} parser :: Parser () s Header parser = do -- Header name may contain: a-z, A-Z, 0-9, underscore, hyphen !name <- Parser.takeWhile $ \c -> (c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A) || (c >= 0x30 && c <= 0x39) || c == 0x2D || c == 0x5F Latin.char () ':' Latin.skipWhile (\c -> c == ' ' || c == '\t') -- Header name allows vchar, space, and tab. value0 <- Parser.takeWhile $ \c -> (c >= 0x20 && c <= 0x7e) || (c == 0x09) Latin.char2 () '\r' '\n' -- We only need to trim the end because the leading spaces and tab -- were already skipped. let !value = Bytes.dropWhileEnd (\c -> c == 0x20 || c == 0x09) value0 pure Header {name = unsafeBytesToText name, value = unsafeBytesToText value} unsafeBytesToText :: Bytes -> Text {-# INLINE unsafeBytesToText #-} unsafeBytesToText (Bytes (ByteArray arr) off len) = Text.Text (Data.Text.Array.ByteArray arr) off len -- | Encode a header. Includes the trailing CRLF sequence. builder :: Header -> Builder builder Header {name, value} = Builder.copy (Utf8.fromText name) <> Builder.ascii2 ':' ' ' <> Builder.copy (Utf8.fromText value) <> Builder.ascii2 '\r' '\n' builderSmallArray :: SmallArray Header -> Builder builderSmallArray = foldMap builder