{-# LANGUAGE BangPatterns #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE UnboxedTuples #-} {- | Parse RFC 5424 messages. For example (from the spec itself): > <165>1 2003-10-11T22:14:15.003Z mymachine.example.com > evntslog - ID47 [exampleSDID@32473 iut="3" eventSource="Application" > eventID="1011"] BOMAn application event log entry... -} module Syslog.Ietf ( -- * Types Message (..) , Element (..) , Parameter (..) -- * Full Decode , decode , parser ) where import Prelude hiding (id) import Control.Monad (when) import Control.Monad.ST.Run (runIntByteArrayST) import Data.Bytes.Parser (Parser) import Data.Bytes.Types (Bytes (Bytes)) import Data.Int (Int64) import Data.Primitive (SmallArray) import Data.Word (Word32, Word64, Word8) import qualified Chronos import qualified Data.Bytes.Parser as Parser import qualified Data.Bytes.Parser.Latin as Latin import qualified Data.Bytes.Parser.Unsafe as Unsafe import qualified Data.Bytes.Types import qualified Data.Maybe.Unpacked.Numeric.Word32 as Word32 import qualified Data.Primitive as PM import qualified Data.Primitive.Contiguous as C data Message = Message { priority :: !Word32 , version :: !Word32 , timestamp :: !Chronos.OffsetDatetime , hostname :: {-# UNPACK #-} !Bytes , application :: {-# UNPACK #-} !Bytes , processId :: {-# UNPACK #-} !Word32.Maybe , messageType :: {-# UNPACK #-} !Bytes -- ^ A missing message type, represented as a hyphen in IETF-flavor -- syslog, is represented by the empty byte sequence. , structuredData :: {-# UNPACK #-} !(SmallArray Element) , message :: {-# UNPACK #-} !Bytes } deriving (Show) data Element = Element { id :: {-# UNPACK #-} !Bytes , parameters :: {-# UNPACK #-} !(SmallArray Parameter) } deriving (Show) data Parameter = Parameter { name :: {-# UNPACK #-} !Bytes , value :: {-# UNPACK #-} !Bytes } deriving (Show) -- | Run the RFC 5424 parser. See 'parser'. decode :: Bytes -> Maybe Message decode = Parser.parseBytesMaybe parser -- | Parse a RFC 5424 message. parser :: Parser () s Message parser = do priority <- takePriority () version <- Latin.decWord32 () Latin.char () ' ' timestamp <- takeTimestamp Latin.char () ' ' hostname <- takeKeywordAndSpace () application <- takeKeywordAndSpace () processId <- Latin.trySatisfy (== '-') >>= \case True -> do Latin.char () ' ' pure Word32.nothing False -> Parser.orElse -- This is a hack to smooth over a mistake. The process id -- can actually be things other than a decimal-encoded number. -- Sometimes it is 128-bit or 256-bit hexadecimal number. In -- these cases, we just ignore it. ( do w <- Latin.decWord32 () Latin.char () ' ' pure (Word32.just w) ) ( do Latin.skipWhile (\c -> (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9')) Latin.char () ' ' pure Word32.nothing ) messageType <- Latin.trySatisfy (== '-') >>= \case True -> do Latin.char () ' ' array <- Unsafe.expose pure Bytes {array, offset = 0, length = 0} False -> takeKeywordAndSpace () structuredData <- Latin.trySatisfy (== '-') >>= \case True -> pure mempty False -> takeStructuredData message <- Parser.isEndOfInput >>= \case True -> do arr <- Unsafe.expose pure Bytes {array = arr, offset = 0, length = 0} False -> do Latin.char () ' ' Parser.remaining pure Message { priority , version , timestamp , hostname , application , processId , messageType , structuredData , message } takeStructuredData :: Parser () s (SmallArray Element) takeStructuredData = go 0 [] where go :: Int -> [Element] -> Parser () s (SmallArray Element) go !n !acc = Latin.trySatisfy (== '[') >>= \case True -> do id <- takeKeyword parameters <- takeParameters let !e = Element {id, parameters} go (n + 1) (e : acc) False -> pure $! C.unsafeFromListReverseN n acc takeParameters :: Parser () s (SmallArray Parameter) takeParameters = go 0 [] where go :: Int -> [Parameter] -> Parser () s (SmallArray Parameter) go !n !acc = Latin.trySatisfy (== ']') >>= \case True -> pure $! C.unsafeFromListReverseN n acc False -> do Latin.char () ' ' name <- takeKeywordAndEquals value <- takeParameterValue let !p = Parameter {name, value} go (n + 1) (p : acc) -- This handles escape sequences correctly. takeParameterValue :: Parser () s Bytes takeParameterValue = do Latin.char () '"' start <- Unsafe.cursor Parser.skipTrailedBy2 () 0x22 0x5C >>= \case False -> do -- no backslashes, went all the way to a double quote end <- Unsafe.cursor let !len = (end - start) - 1 arr <- Unsafe.expose pure Bytes {array = arr, offset = start, length = len} True -> do -- found a backslash, we will need to escape quotes c <- Latin.any () if c == '"' || c == '\\' then pure () else Parser.fail () consumeThroughUnescapedQuote end <- Unsafe.cursor let !len = (end - start) - 1 arr <- Unsafe.expose let bs = Bytes {array = arr, offset = start, length = len} pure $! removeEscapeSequences bs consumeThroughUnescapedQuote :: Parser () s () consumeThroughUnescapedQuote = Parser.skipTrailedBy2 () 0x22 0x5C >>= \case False -> pure () True -> do c <- Latin.any () if c == '"' || c == '\\' then consumeThroughUnescapedQuote else Parser.fail () {- | Precondition: Every backslash is followed by a double quote or by a backslash. -} removeEscapeSequences :: Bytes -> Bytes removeEscapeSequences Bytes {array, offset = off0, length = len0} = let (lengthX, arrayX) = runIntByteArrayST $ do dst <- PM.newByteArray len0 let go !ixSrc !ixDst !len = case len of 0 -> pure ixDst _ -> do let w :: Word8 = PM.indexByteArray array ixSrc case w of 0x5C -> case PM.indexByteArray array (ixSrc + 1) :: Word8 of 0x5C -> do PM.writeByteArray dst ixDst (0x5C :: Word8) go (ixSrc + 2) (ixDst + 1) (len - 2) 0x22 -> do PM.writeByteArray dst ixDst (0x22 :: Word8) go (ixSrc + 2) (ixDst + 1) (len - 2) _ -> errorWithoutStackTrace "Syslog.Ietf.removeEscapeSequences: invariant violated" _ -> do PM.writeByteArray dst ixDst w go (ixSrc + 1) (ixDst + 1) (len - 1) lenDst <- go off0 0 len0 PM.shrinkMutableByteArray dst lenDst dst' <- PM.unsafeFreezeByteArray dst pure (lenDst, dst') in Bytes {array = arrayX, length = lengthX, offset = 0} {- | Consume the angle-bracketed priority. RFC 5424 does not allow a space to follow the priority, so this does not consume a trailing space. -} takePriority :: e -> Parser e s Word32 takePriority e = do Latin.char e '<' priority <- Latin.decWord32 e Latin.char e '>' pure priority {- | Consume the keyword and the space that follows it. Returns the hostname. -} takeKeywordAndSpace :: e -> Parser e s Bytes takeKeywordAndSpace e = -- TODO: This should actually use a takeWhile1. Latin.takeTrailedBy e ' ' -- | Consume the keyword. Returns the keyword. takeKeyword :: Parser e s Bytes takeKeyword = -- TODO: Should use takeWhile1 Parser.takeWhile (\c -> c /= 0x20) {- | Consume the keyword and the equals sign that follows it. Returns the keyword. -} takeKeywordAndEquals :: Parser () s Bytes takeKeywordAndEquals = -- TODO: This should actually use a takeWhile1. Latin.takeTrailedBy () '=' -- | Consume the timestamp. takeTimestamp :: Parser () s Chronos.OffsetDatetime takeTimestamp = do year <- Latin.decWord () Latin.char () '-' month' <- Latin.decWord () let !month = month' - 1 when (month >= 12) (Parser.fail ()) Latin.char () '-' day <- Latin.decWord () Latin.char () 'T' hour <- Latin.decWord () Latin.char () ':' minute <- Latin.decWord () Latin.char () ':' sec <- Latin.decWord () let date = Chronos.Date (Chronos.Year (fromIntegral year)) (Chronos.Month (fromIntegral month)) (Chronos.DayOfMonth (fromIntegral day)) !nanos <- Latin.trySatisfy (== '.') >>= \case True -> do (n, w) <- Parser.measure (Latin.decWord64 ()) when (n > 9) (Parser.fail ()) let go !acc !b = case b of 0 -> acc _ -> go (acc * 10) (b - 1) !ns = go w (9 - n) pure ns False -> pure 0 off <- Latin.any () >>= \case 'Z' -> pure 0 '+' -> parserOffset '-' -> do !off <- parserOffset pure (negate off) _ -> Parser.fail () pure $! Chronos.OffsetDatetime ( Chronos.Datetime date $ Chronos.TimeOfDay (fromIntegral hour) (fromIntegral minute) (fromIntegral @Word64 @Int64 (fromIntegral sec * 1000000000 + nanos)) ) (Chronos.Offset off) -- Should consume exactly five characters: HH:MM. However, the implementation -- is more generous. parserOffset :: Parser () s Int parserOffset = do h <- Latin.decWord8 () Latin.char () ':' m <- Latin.decWord8 () let !r = ((fromIntegral @Word8 @Int h) * 60) + fromIntegral @Word8 @Int m pure r