module Codec.MIME.String.Parse where
import qualified Codec.Binary.Base64.String as Base64 (decode)
import Codec.MIME.String.ContentDisposition
(
ContentDisposition(ContentDisposition), get_content_disposition,
DispositionType(Inline, Attachment),
DispositionParameter(..),
)
import Codec.MIME.String.Date (get_date)
import Codec.MIME.String.Headers
(
ContentTransferEncoding(ContentTransferEncoding),
get_content_transfer_encoding,
ContentType(ContentType), get_content_type,
get_content_description, get_boundary,
Parameter(Parameter),
MIMEVersion(MIMEVersion), get_mime_version,
Subject(Subject),
get_subject, get_from, get_to,
)
import qualified Codec.MIME.String.QuotedPrintable as QuotedPrintable (decode)
import Codec.MIME.String.Types
(
ParseM,
Header(Header, h_raw_header, h_raw_name, h_name, h_body),
Headers,
Message(Message),
MessageInfo(..),
Multipart(Multipart),
MessageContent(NoContent, Mixed, Alternative,
Parallel, Digest, RFC822),
mkData, mkBody,
digest_content_type, ascii_text_content_type,
)
import Codec.MIME.String.Internal.Utils
import Codec.Text.IConv
import Control.Monad (liftM)
import Control.Monad.State (evalState, get, put)
import qualified Data.ByteString.Lazy.Char8 as BS
import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Maybe (fromMaybe)
mkMessage :: MessageInfo -> ParseM MessageContent -> ParseM Message
mkMessage mi f_mc
= do pn <- get
put (pn + 1)
mc <- f_mc
return $ Message pn mi mc
parse :: String -> Message
parse msg = evalState (parse_message msg) 1
parse_message :: String -> ParseM Message
parse_message msg
= let (headers, m_body) = parse_headers msg
in case get_header headers "mime-version:" get_mime_version of
Just (MIMEVersion 1 0) ->
parse_mime_message ascii_text_content_type msg
_ ->
do let m_from = get_header headers "from:" get_from
m_subject = get_header headers "subject:" (Just . Subject)
m_to = get_header headers "to:" get_to
m_date = get_header headers "date:" get_date
mi = MessageInfo {
mi_headers = headers,
mi_from = m_from,
mi_subject = m_subject,
mi_to = m_to,
mi_date = m_date,
mi_content_description = Nothing
}
mc = case m_body of
Just body ->
mkBody ascii_text_content_type "unknown"
(convertAsciiToUtf8 body)
Nothing ->
return $ NoContent ascii_text_content_type
mkMessage mi mc
convertAsciiToUtf8 :: String -> String
convertAsciiToUtf8 xs
= BS.unpack $ convertFuzzy Transliterate "US-ASCII" "utf8" $ BS.pack xs
parse_mime_message :: ContentType -> String -> ParseM Message
parse_mime_message def_content_type msg
= let (headers, m_body) = parse_headers msg
mi = make_mime_message_info headers
mc = case m_body of
Nothing ->
do let m_ct = get_header headers "content-type:"
get_content_type
content_type = fromMaybe def_content_type m_ct
return $ NoContent content_type
Just body ->
make_mime_message_content def_content_type headers body
in mkMessage mi mc
make_mime_message_info :: Headers -> MessageInfo
make_mime_message_info headers
= let m_content_description = get_header headers "content-description:"
get_content_description
m_from = get_header headers "from:" get_from
m_subject = get_header headers "subject:" get_subject
m_to = get_header headers "to:" get_to
m_date = get_header headers "date:" get_date
in MessageInfo {
mi_headers = headers,
mi_from = m_from,
mi_subject = m_subject,
mi_to = m_to,
mi_date = m_date,
mi_content_description = m_content_description
}
make_mime_message_content :: ContentType -> Headers -> String
-> ParseM MessageContent
make_mime_message_content def_content_type headers body
=
case m_decoded_body of
Nothing ->
mkData (Just cte) content_type filename body
Just decoded_body ->
case content_disposition of
ContentDisposition Attachment _ ->
mkData Nothing content_type filename decoded_body
ContentDisposition Inline _ ->
case content_type of
ContentType "text" _ ps ->
let charset = fromMaybe "US-ASCII"
$ lookup_param "charset" ps
decoded_body' = BS.pack decoded_body
in case tryConvertFuzzy Transliterate charset "utf8" decoded_body' of
Just xs ->
mkBody content_type filename $ BS.unpack xs
Nothing ->
mkData Nothing content_type filename decoded_body
ContentType "multipart" st ps
| Just raw_b <- lookup_param "boundary" ps,
Just b <- get_boundary raw_b
-> do let (preamble, parts, epilogue)
= get_parts b decoded_body
ct = case st of
"digest" -> digest_content_type
_ -> ascii_text_content_type
constr = case st of
"alternative" -> Alternative
"parallel" -> Parallel
"digest" -> Digest
_ -> Mixed
ms <- mapM (parse_mime_message ct) parts
return $ constr (Multipart preamble ms epilogue)
ContentType "message" "rfc822" _ ->
liftM (RFC822 decoded_body filename)
(parse_message decoded_body)
_ -> mkData Nothing content_type filename decoded_body
where m_cd = get_header headers "content-disposition:"
get_content_disposition
content_disposition = fromMaybe (ContentDisposition Inline []) m_cd
m_ct = get_header headers "content-type:"
get_content_type
content_type = fromMaybe def_content_type m_ct
filename = get_filename content_disposition content_type
m_ce = get_header headers "content-transfer-encoding:"
get_content_transfer_encoding
cte@(ContentTransferEncoding content_transfer_encoding)
= fromMaybe (ContentTransferEncoding "7bit") m_ce
m_decoded_body
| content_transfer_encoding == "base64" = Just (Base64.decode body)
| content_transfer_encoding == "quoted-printable" = Just (QuotedPrintable.decode $ my_lines body)
| content_transfer_encoding `elem` ["7bit", "8bit", "binary"] = Just body
| otherwise = Nothing
tryConvertFuzzy :: Fuzzy -> EncodingName -> EncodingName -> ByteString
-> Maybe ByteString
tryConvertFuzzy fuzzy from_charset to_charset decoded_body =
case convertStrictly from_charset to_charset decoded_body of
Right (UnsuportedConversion {}) -> Nothing
_ -> Just $ convertFuzzy fuzzy from_charset to_charset decoded_body
get_filename :: ContentDisposition -> ContentType -> FilePath
get_filename (ContentDisposition _ params) (ContentType _ _ params')
= loop params
where loop [] =
case lookup_param "name" params' of
Nothing -> default_filename
Just f -> sanitise f
loop (Filename f:_) = sanitise f
loop (_:ps) = loop ps
sanitise f = case reverse $ takeWhile ('/' /=) $ reverse f of
"" -> default_filename
f' -> f'
default_filename = "unknown"
get_parts :: String -> String -> (String, [String], String)
get_parts boundary body
= case gps [] body of
(pre:parts, epi) -> (pre, parts, epi)
_ -> error "Parse.get_parts: Can't happen XXX"
where gps acc "" = ([from_acc acc], "")
gps acc xs
= case fmap (dropWhile isWSP) $ after dd_boundary
$ snd $ read_line_ending xs of
Just "--" -> ([from_acc acc], "")
Just ('-':'-':cs@(c:_))
| is_cr_or_lf c -> ([from_acc acc], cs)
Just "" -> ([from_acc acc], "")
Just (c1:cs)
| is_cr_or_lf c1
-> let cs' = case cs of
c2:cs''
| is_cr_or_lf c2 && c1 /= c2
-> cs''
_ -> cs
in case gps [] cs' of
(ps, ep) -> (from_acc acc:ps, ep)
_ -> case read_line_ending xs of
(le, xs') ->
case break is_cr_or_lf xs' of
(ys, zs) -> gps (ys:le:acc) zs
from_acc acc = concat $ reverse acc
read_line_ending cs = case cs of
(c1:cs1)
| is_cr_or_lf c1 ->
case cs1 of
(c2:cs2)
| is_cr_or_lf c2 && c1 /= c2 ->
([c1, c2], cs2)
_ -> ([c1], cs1)
_ -> ("", cs)
after "" ys = Just ys
after (x:xs) (y:ys)
| x == y = after xs ys
| otherwise = Nothing
after (_:_) "" = Nothing
dd_boundary = "--" ++ boundary
isWSP ' ' = True
isWSP '\t' = True
isWSP _ = False
lookup_param :: String -> [Parameter] -> Maybe String
lookup_param _ [] = Nothing
lookup_param name (Parameter n v:ps)
| name == n = Just v
| otherwise = lookup_param name ps
get_header :: Headers -> String -> (String -> Maybe a) -> Maybe a
get_header hs name getter
= case filter ((name ==) . h_name) hs of
[h] -> getter (h_body h)
_ -> Nothing
parse_headers :: String -> (Headers, Maybe String)
parse_headers msg = skip_whitespace (Just msg)
where skip_whitespace :: Maybe String -> ([Header], Maybe String)
skip_whitespace Nothing = ([], Nothing)
skip_whitespace (Just xs) = case get_line xs of
([], m_zs) -> ([], m_zs)
(ys, m_zs) ->
case dropWhile isWhite ys of
[] -> skip_whitespace m_zs
ys' -> gather [ys'] m_zs
gather :: [String] -> Maybe String -> ([Header], Maybe String)
gather acc Nothing = ([mk_rev_header acc], Nothing)
gather acc (Just xs)
= case get_line xs of
([], m_zs) -> ([mk_rev_header acc], m_zs)
(ys, m_zs)
| starts_with_white ys -> gather (ys:acc) m_zs
| otherwise -> let (hs, m_rest) = gather [ys] m_zs
in (mk_rev_header acc:hs, m_rest)
starts_with_white (c:_) = isWhite c
starts_with_white [] = False
isWhite ' ' = True
isWhite '\t' = True
isWhite _ = False
mk_rev_header :: [String] -> Header
mk_rev_header = mk_header . reverse
mk_header :: [String] -> Header
mk_header xs = let unfolded = concat xs
(raw_name, body) = case break ends_header unfolded of
(name, ':':val) -> (name ++ ":", val)
(name, val) -> (name, val)
in Header {
h_raw_header = xs,
h_raw_name = raw_name,
h_name = map asciiToLower raw_name,
h_body = body
}
where ends_header ':' = True
ends_header ' ' = True
ends_header '\t' = True
ends_header _ = False