module Codec.MIME.Parse
( parseMIMEBody
, parseMIMEType
, parseMIMEMessage
, parseHeaders
, parseMultipart
, parseContentType
, splitMulti
, normalizeCRLF
) where
import Codec.MIME.Type
import Codec.MIME.Decode
import Control.Arrow(second)
import Data.Char
import Data.Maybe
import qualified Data.List as L
import Debug.Trace ( trace )
import qualified Data.Text as T
import Data.Monoid(Monoid(..), (<>))
enableTrace :: Bool
enableTrace = False
doTrace :: String -> b -> b
doTrace | enableTrace = trace
| otherwise = \_ x -> x
parseMIMEBody :: [MIMEParam] -> T.Text -> MIMEValue
parseMIMEBody headers_in body = result { mime_val_headers = headers }
where
result = case mimeType mty of
Multipart{} -> fst (parseMultipart mty body)
Message{} -> fst (parseMultipart mty body)
_ -> nullMIMEValue { mime_val_type = mty
, mime_val_disp = parseContentDisp headers
, mime_val_content = Single (processBody headers body)
}
headers = [ MIMEParam (T.toLower k) v | (MIMEParam k v) <- headers_in ]
mty = fromMaybe defaultType
(parseContentType =<< lookupField "content-type" (paramPairs headers))
defaultType :: Type
defaultType = Type { mimeType = Text "plain"
, mimeParams = [MIMEParam "charset" "us-ascii"]
}
parseContentDisp :: [MIMEParam] -> Maybe Disposition
parseContentDisp headers =
(processDisp . dropFoldingWSP) =<< lookupField "content-disposition" (paramPairs headers)
where
processDisp t | T.null t = Nothing
| T.null bs = Just $ Disposition { dispType = toDispType (T.toLower as)
, dispParams = []
}
| otherwise = Just $ Disposition { dispType = toDispType (T.toLower as)
, dispParams = processParams (parseParams bs)
}
where (as,bs) = T.break (\ch -> isSpace ch || ch == ';') t
processParams = map procP
where
procP (MIMEParam as val)
| "name" == asl = Name val
| "filename" == asl = Filename val
| "creation-date" == asl = CreationDate val
| "modification-date" == asl = ModDate val
| "read-date" == asl = ReadDate val
| "size" == asl = Size val
| otherwise = OtherParam asl val
where asl = T.toLower as
toDispType t = if t == "inline" then DispInline
else if t == "attachment" then DispAttachment
else if t == "form-data" then DispFormData
else DispOther t
paramPairs :: [MIMEParam] -> [(T.Text, T.Text)]
paramPairs = map paramPair
where
paramPair (MIMEParam a b) = (a,b)
processBody :: [MIMEParam] -> T.Text -> T.Text
processBody headers body =
case lookupField "content-transfer-encoding" $ paramPairs headers of
Nothing -> body
Just v -> T.pack $ decodeBody (T.unpack v) $ T.unpack body
normalizeCRLF :: T.Text -> T.Text
normalizeCRLF t
| T.null t = ""
| "\r\n" `T.isPrefixOf` t = "\r\n" <> normalizeCRLF (T.drop 2 t)
| any (`T.isPrefixOf` t) ["\r", "\n"] = "\r\n" <> normalizeCRLF (T.drop 1 t)
| otherwise = let (a,b) = T.break (`elem` ['\r','\n']) t in a <> normalizeCRLF b
parseMIMEMessage :: T.Text -> MIMEValue
parseMIMEMessage entity =
case parseHeaders (normalizeCRLF entity) of
(as,bs) -> parseMIMEBody as bs
parseHeaders :: T.Text -> ([MIMEParam], T.Text)
parseHeaders str =
case findFieldName "" str of
Left (nm, rs) -> parseFieldValue nm (dropFoldingWSP rs)
Right body -> ([],body)
where
findFieldName acc t
| T.null t = Right ""
| "\r\n" `T.isPrefixOf` t = Right $ T.drop 2 t
| ":" `T.isPrefixOf` t = Left (T.reverse $ T.dropWhile isHSpace acc, T.drop 1 t)
| otherwise = findFieldName (T.take 1 t <> acc) $ T.drop 1 t
parseFieldValue nm xs
| T.null bs = ([MIMEParam nm as], "")
| otherwise = let (zs,ys) = parseHeaders bs in (MIMEParam nm as :zs, ys)
where
(as,bs) = takeUntilCRLF xs
parseMultipart :: Type -> T.Text -> (MIMEValue, T.Text)
parseMultipart mty body =
case lookupField "boundary" (paramPairs $ mimeParams mty) of
Nothing -> doTrace ("Multipart mime type, " ++ T.unpack (showType mty) ++
", has no required boundary parameter. Defaulting to text/plain") $
(nullMIMEValue{ mime_val_type = defaultType
, mime_val_disp = Nothing
, mime_val_content = Single body
}, "")
Just bnd -> (nullMIMEValue { mime_val_type = mty
, mime_val_disp = Nothing
, mime_val_content = Multi vals
}, rs)
where (vals,rs) = splitMulti bnd body
splitMulti :: T.Text -> T.Text -> ([MIMEValue], T.Text)
splitMulti bnd body_in =
let body | "--" `T.isPrefixOf` body_in = "\r\n" <> body_in
| otherwise = body_in
in case untilMatch dashBoundary body of
Nothing -> mempty
Just xs | "--" `T.isPrefixOf` xs -> ([], T.drop 2 xs)
| otherwise -> splitMulti1 (dropTrailer xs)
where
dashBoundary = ("\r\n--" <> bnd)
splitMulti1 xs
| T.null as && T.null bs = ([], "")
| T.null bs = ([parseMIMEMessage as],"")
| T.isPrefixOf "--" bs = ([parseMIMEMessage as], dropTrailer bs)
| otherwise = let (zs,ys) = splitMulti1 (dropTrailer bs)
in ((parseMIMEMessage as) : zs,ys)
where
(as,bs) = matchUntil dashBoundary xs
dropTrailer xs
| "\r\n" `T.isPrefixOf` xs1 = T.drop 2 xs1
| otherwise = xs1
where
xs1 = T.dropWhile isHSpace xs
parseMIMEType :: T.Text -> Maybe Type
parseMIMEType = parseContentType
parseContentType :: T.Text -> Maybe Type
parseContentType str
| T.null minor0 = doTrace ("unable to parse content-type: " ++ show str) $ Nothing
| otherwise = Just Type { mimeType = toType maj as
, mimeParams = parseParams (T.dropWhile isHSpace bs)
}
where
(maj, minor0) = T.break (=='/') (dropFoldingWSP str)
minor = T.drop 1 minor0
(as, bs) = T.break (\ ch -> isHSpace ch || isTSpecial ch) minor
toType a b = case lookupField (T.toLower a) mediaTypes of
Just ctor -> ctor b
_ -> Other a b
parseParams :: T.Text -> [MIMEParam]
parseParams t | T.null t = []
| ';' == T.head t = let (nm_raw, vs0) = T.break (=='=') (dropFoldingWSP $ T.tail t)
nm = T.toLower nm_raw in
if T.null vs0
then []
else let vs = T.tail vs0 in
if not (T.null vs) && T.head vs == '"'
then let vs1 = T.tail vs
(val, zs0) = T.break (=='"') vs1 in
if T.null zs0
then [MIMEParam nm val]
else MIMEParam nm val : parseParams (T.dropWhile isHSpace $ T.tail zs0)
else let (val, zs) = T.break (\ch -> isHSpace ch || isTSpecial ch) vs in
MIMEParam nm val : parseParams (T.dropWhile isHSpace zs)
| otherwise = doTrace ("Codec.MIME.Parse.parseParams: curious param value -- " ++ show t) []
mediaTypes :: [(T.Text, T.Text -> MIMEType)]
mediaTypes =
[ ("multipart", (Multipart . toMultipart))
, ("application", Application)
, ("audio", Audio)
, ("image", Image)
, ("message", Message)
, ("model", Model)
, ("text", Text)
, ("video", Video)
]
where toMultipart b = fromMaybe other (lookupField (T.toLower b) multipartTypes)
where other | T.isPrefixOf "x-" b = Extension b
| otherwise = OtherMulti b
multipartTypes :: [(T.Text, Multipart)]
multipartTypes =
[ ("alternative", Alternative)
, ("byteranges", Byteranges)
, ("digest", Digest)
, ("encrypted", Encrypted)
, ("form-data", FormData)
, ("mixed", Mixed)
, ("parallel", Parallel)
, ("related", Related)
, ("signed", Signed)
]
untilMatch :: T.Text -> T.Text -> Maybe T.Text
untilMatch a b | T.null a = Just b
| T.null b = Nothing
| a `T.isPrefixOf` b = Just $ T.drop (T.length a) b
| otherwise = untilMatch a $ T.tail b
matchUntil :: T.Text -> T.Text -> (T.Text, T.Text)
matchUntil str = second (T.drop $ T.length str) . T.breakOn str
isHSpace :: Char -> Bool
isHSpace c = c == ' ' || c == '\t'
isTSpecial :: Char -> Bool
isTSpecial x = x `elem` ("()<>@,;:\\\"/[]?="::String)
dropFoldingWSP :: T.Text -> T.Text
dropFoldingWSP t | T.null t = ""
| isHSpace (T.head t) = dropFoldingWSP $ T.tail t
| "\r\n" `T.isPrefixOf` t && not (T.null $ T.drop 2 t) && isHSpace (T.head $ T.drop 2 t)
= dropFoldingWSP $ T.drop 3 t
| otherwise = t
takeUntilCRLF :: T.Text -> (T.Text, T.Text)
takeUntilCRLF str = go "" str
where
go acc t | T.null t = (T.reverse (T.dropWhile isHSpace acc), "")
| "\r\n" `T.isPrefixOf` t && not (T.null $ T.drop 2 t) && isHSpace (T.head $ T.drop 2 t)
= go (" " <> acc) (T.drop 3 t)
| "\r\n" `T.isPrefixOf` t && not (T.null $ T.drop 2 t)
= (T.reverse (T.dropWhile isHSpace acc), T.drop 2 t)
| otherwise = go (T.take 1 t <> acc) $ T.tail t
lookupField :: T.Text -> [(T.Text,a)] -> Maybe a
lookupField n ns =
case lookup n ns of
x@Just{} -> x
Nothing ->
let nl = T.toLower n in
fmap snd $ L.find ((nl==) . T.toLower . fst) ns