{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Irc.Format
( UserInfo(..)
, RawIrcMsg(..)
, parseRawIrcMsg
, renderRawIrcMsg
, parseUserInfo
, renderUserInfo
, Identifier
, mkId
, idBytes
, idDenote
, asUtf8
, ircFoldCase
) where
import Control.Applicative
import Control.Monad (when)
import Data.Array
import Data.Attoparsec.ByteString.Char8 as P
import Data.ByteString (ByteString)
import Data.ByteString.Builder (Builder)
import Data.Functor
import Data.Monoid
import Data.String
import Data.Text (Text)
import Data.Time (UTCTime)
import Data.Word (Word8)
import qualified Data.ByteString as B
import qualified Data.ByteString.Builder as Builder
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import Irc.Time (myParseTime)
data UserInfo = UserInfo
{ userNick :: Identifier
, userName :: Maybe ByteString
, userHost :: Maybe ByteString
}
deriving (Read, Show)
data RawIrcMsg = RawIrcMsg
{ msgTime :: Maybe UTCTime
, msgPrefix :: Maybe UserInfo
, msgCommand :: ByteString
, msgParams :: [ByteString]
}
deriving (Read, Show)
data Identifier = Identifier ByteString ByteString
deriving (Read, Show)
instance Eq Identifier where
x == y = idDenote x == idDenote y
instance Ord Identifier where
compare x y = compare (idDenote x) (idDenote y)
instance IsString Identifier where
fromString = mkId . fromString
mkId :: ByteString -> Identifier
mkId x = Identifier x (ircFoldCase x)
idBytes :: Identifier -> ByteString
idBytes (Identifier x _) = x
idDenote :: Identifier -> ByteString
idDenote (Identifier _ x) = x
parseRawIrcMsg :: ByteString -> Maybe RawIrcMsg
parseRawIrcMsg x =
case parseOnly rawIrcMsgParser x of
Left{} -> Nothing
Right r -> Just r
maxMiddleParams :: Int
maxMiddleParams = 14
rawIrcMsgParser :: Parser RawIrcMsg
rawIrcMsgParser =
do time <- guarded (string "@time=") timeParser
prefix <- guarded (char ':') prefixParser
cmd <- simpleTokenParser
params <- paramsParser maxMiddleParams
return RawIrcMsg
{ msgTime = time
, msgPrefix = prefix
, msgCommand = cmd
, msgParams = params
}
paramsParser :: Int -> Parser [ByteString]
paramsParser n =
do _ <- skipMany (char ' ')
endOfInput $> [] <|> more
where
more
| n == 0 =
do _ <- optional (char ':')
finalParam
| otherwise =
do mbColon <- optional (char ':')
case mbColon of
Just{} -> finalParam
Nothing -> middleParam
finalParam =
do x <- takeByteString
let !x' = B.copy x
return [x']
middleParam =
do x <- P.takeWhile (/= ' ')
when (B8.null x) (fail "Empty middle parameter")
let !x' = B.copy x
xs <- paramsParser (n-1)
return (x':xs)
timeParser :: Parser UTCTime
timeParser =
do timeBytes <- simpleTokenParser
_ <- char ' '
case parseIrcTime (B8.unpack timeBytes) of
Nothing -> fail "Bad server-time format"
Just t -> return t
parseIrcTime :: String -> Maybe UTCTime
parseIrcTime = myParseTime "%Y-%m-%dT%H:%M:%S%Q%Z"
prefixParser :: Parser UserInfo
prefixParser =
do tok <- simpleTokenParser
_ <- char ' '
return (parseUserInfo tok)
simpleTokenParser :: Parser ByteString
simpleTokenParser =
do xs <- P.takeWhile (/= ' ')
when (B8.null xs) (fail "Empty token")
return $! B8.copy xs
renderUserInfo :: UserInfo -> ByteString
renderUserInfo u = idBytes (userNick u)
<> maybe B.empty ("!" <>) (userName u)
<> maybe B.empty ("@" <>) (userHost u)
parseUserInfo :: ByteString -> UserInfo
parseUserInfo x = UserInfo
{ userNick = mkId nick
, userName = if B.null user then Nothing else Just (B.drop 1 user)
, userHost = if B.null host then Nothing else Just (B.drop 1 host)
}
where
(nickuser,host) = B8.break (=='@') x
(nick,user) = B8.break (=='!') nickuser
renderRawIrcMsg :: RawIrcMsg -> ByteString
renderRawIrcMsg m = L.toStrict $ Builder.toLazyByteString $
maybe mempty renderPrefix (msgPrefix m)
<> Builder.byteString (msgCommand m)
<> buildParams (msgParams m)
<> Builder.word8 13
<> Builder.word8 10
renderPrefix :: UserInfo -> Builder
renderPrefix u = Builder.char8 ':'
<> Builder.byteString (renderUserInfo u)
<> Builder.char8 ' '
buildParams :: [ByteString] -> Builder
buildParams [x]
| B.elem 32 x || B.elem 58 x
= Builder.word8 32 <> Builder.word8 58 <> Builder.byteString x
buildParams (x:xs)
= Builder.word8 32 <> Builder.byteString x <> buildParams xs
buildParams [] = mempty
guarded :: Parser a -> Parser b -> Parser (Maybe b)
guarded pa pb =
do mb <- optional pa
case mb of
Nothing -> return Nothing
Just{} -> fmap Just pb
ircFoldCase :: ByteString -> ByteString
ircFoldCase = B.map (B.index casemap . fromIntegral)
casemap :: ByteString
casemap = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\
\\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\
\ !\"#$%&'()*+,-./0123456789:;<=>?\
\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_\
\`ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^\x7f\
\\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\
\\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\
\\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\
\\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\
\\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\
\\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\
\\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\
\\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
asUtf8 :: ByteString -> Text
asUtf8 x = case Text.decodeUtf8' x of
Right txt -> txt
Left{} -> decodeCP1252 x
decodeCP1252 :: ByteString -> Text
decodeCP1252 = Text.pack . map (cp1252!) . B.unpack
cp1252 :: Array Word8 Char
cp1252 = listArray (0,255)
['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK','\a','\b','\t','\n','\v','\f','\r','\SO','\SI',
'\DLE','\DC1','\DC2','\DC3','\DC4','\NAK','\SYN','\ETB','\CAN','\EM','\SUB','\ESC','\FS','\GS','\RS','\US',
' ','!','\"','#','$','%','&','\'','(',')','*','+',',','-','.','/',
'0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
'@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_',
'`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~','\DEL',
'\8364','\129','\8218','\402','\8222','\8230','\8224','\8225','\710','\8240','\352','\8249','\338','\141','\381','\143',
'\144','\8216','\8217','\8220','\8221','\8226','\8211','\8212','\732','\8482','\353','\8250','\339','\157','\382','\376',
'\160','\161','\162','\163','\164','\165','\166','\167','\168','\169','\170','\171','\172','\173','\174','\175',
'\176','\177','\178','\179','\180','\181','\182','\183','\184','\185','\186','\187','\188','\189','\190','\191',
'\192','\193','\194','\195','\196','\197','\198','\199','\200','\201','\202','\203','\204','\205','\206','\207',
'\208','\209','\210','\211','\212','\213','\214','\215','\216','\217','\218','\219','\220','\221','\222','\223',
'\224','\225','\226','\227','\228','\229','\230','\231','\232','\233','\234','\235','\236','\237','\238','\239',
'\240','\241','\242','\243','\244','\245','\246','\247','\248','\249','\250','\251','\252','\253','\254','\255']