module Codec.MIME.String.Parse where

import qualified Codec.Binary.Base64.String as Base64 (decode)
import Codec.MIME.String.ContentDisposition
      (
       ContentDisposition(ContentDisposition), get_content_disposition,
       DispositionType(Inline, Attachment),
       DispositionParameter(..),
      )
import Codec.MIME.String.Date (get_date)
import Codec.MIME.String.Headers
      (
       ContentTransferEncoding(ContentTransferEncoding),
       get_content_transfer_encoding,
       ContentType(ContentType), get_content_type,
       get_content_description, get_boundary,
       Parameter(Parameter),
       MIMEVersion(MIMEVersion), get_mime_version,
       Subject(Subject),
       get_subject, get_from, get_to,
      )
import qualified Codec.MIME.String.QuotedPrintable as QuotedPrintable (decode)
import Codec.MIME.String.Types
      (
       ParseM,
       Header(Header, h_raw_header, h_raw_name, h_name, h_body),
       Headers,
       Message(Message),
       MessageInfo(..),
       Multipart(Multipart),
       MessageContent(NoContent, Mixed, Alternative,
                      Parallel, Digest, RFC822),
       mkData, mkBody,
       digest_content_type, ascii_text_content_type,
      )
import Codec.MIME.String.Internal.Utils

import Codec.Text.IConv
import Control.Monad (liftM)
import Control.Monad.State (evalState, get, put)
import qualified Data.ByteString.Lazy.Char8 as BS
import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Maybe (fromMaybe)

mkMessage :: MessageInfo -> ParseM MessageContent -> ParseM Message
mkMessage mi f_mc
 = do pn <- get
      put (pn + 1)
      -- This is done slightly oddly so we get numbering in the natural
      -- order, which in turn is important when looking for a part number
      mc <- f_mc
      return $ Message pn mi mc

parse :: String -> Message
parse msg = evalState (parse_message msg) 1

parse_message :: String -> ParseM Message
parse_message msg
 = let (headers, m_body) = parse_headers msg
   in case get_header headers "mime-version:" get_mime_version of
          -- We only try and be clever if the MIME version is 1.0
          Just (MIMEVersion 1 0) ->
              parse_mime_message ascii_text_content_type msg
          _ ->
              do let m_from = get_header headers "from:" get_from
                     m_subject = get_header headers "subject:" (Just . Subject)
                     m_to = get_header headers "to:" get_to
                     m_date = get_header headers "date:" get_date
                     mi = MessageInfo {
                              mi_headers = headers,
                              mi_from = m_from,
                              mi_subject = m_subject,
                              mi_to = m_to,
                              mi_date = m_date,
                              mi_content_description = Nothing
                          }
                     mc = case m_body of
                              Just body ->
                                  mkBody ascii_text_content_type "unknown"
                                         (convertAsciiToUtf8 body)
                              Nothing ->
                                  return $ NoContent ascii_text_content_type
                 mkMessage mi mc

convertAsciiToUtf8 :: String -> String
convertAsciiToUtf8 xs
    = BS.unpack $ convertFuzzy Transliterate "US-ASCII" "utf8" $ BS.pack xs

parse_mime_message :: ContentType -> String -> ParseM Message
parse_mime_message def_content_type msg
 = let (headers, m_body) = parse_headers msg
       mi = make_mime_message_info headers
       mc = case m_body of
                Nothing ->
                    do let m_ct = get_header headers "content-type:"
                                                  get_content_type
                           content_type = fromMaybe def_content_type m_ct
                       return $ NoContent content_type
                Just body ->
                    make_mime_message_content def_content_type headers body
   in mkMessage mi mc

make_mime_message_info :: Headers -> MessageInfo
make_mime_message_info headers
 = let m_content_description = get_header headers "content-description:"
                                               get_content_description
       m_from = get_header headers "from:" get_from
       m_subject = get_header headers "subject:" get_subject
       m_to = get_header headers "to:" get_to
       m_date = get_header headers "date:" get_date
   in MessageInfo {
          mi_headers = headers,
          mi_from = m_from,
          mi_subject = m_subject,
          mi_to = m_to,
          mi_date = m_date,
          mi_content_description = m_content_description
      }

make_mime_message_content :: ContentType -> Headers -> String
                          -> ParseM MessageContent
make_mime_message_content def_content_type headers body
 = -- We accept illegal combinations of content type and
   -- encoding, so just decode whatever it says
   case m_decoded_body of
       Nothing ->
           mkData (Just cte) content_type filename body
       Just decoded_body ->
           case content_disposition of
               ContentDisposition Attachment _ ->
                   mkData Nothing content_type filename decoded_body
               ContentDisposition Inline _ ->
                   case content_type of
                   ContentType "text" _ ps ->
                       let charset = fromMaybe "US-ASCII"
                                   $ lookup_param "charset" ps
                           decoded_body' = BS.pack decoded_body
                       in case tryConvertFuzzy Transliterate charset "utf8" decoded_body' of
                          Just xs ->
                              mkBody content_type filename $ BS.unpack xs
                          Nothing ->
                              mkData Nothing content_type filename decoded_body
                   ContentType "multipart" st ps
                    | Just raw_b <- lookup_param "boundary" ps,
                      Just b <- get_boundary raw_b
                       -> do let (preamble, parts, epilogue)
                                     = get_parts b decoded_body
                                 ct = case st of
                                          "digest" -> digest_content_type
                                          _ -> ascii_text_content_type
                                 constr = case st of
                                              "alternative" -> Alternative
                                              "parallel" -> Parallel
                                              "digest" -> Digest
                                              _ -> Mixed
                             ms <- mapM (parse_mime_message ct) parts
                             return $ constr (Multipart preamble ms epilogue)
                   ContentType "message" "rfc822" _ ->
                       liftM (RFC822 decoded_body filename)
                             (parse_message decoded_body)
                   -- Anything else is treated like an
                   -- application/octet-stream
                   _ -> mkData Nothing content_type filename decoded_body
 where m_cd = get_header headers "content-disposition:"
                              get_content_disposition
       content_disposition = fromMaybe (ContentDisposition Inline []) m_cd
       m_ct = get_header headers "content-type:"
                              get_content_type
       content_type = fromMaybe def_content_type m_ct
       filename = get_filename content_disposition content_type
       m_ce = get_header headers "content-transfer-encoding:"
                              get_content_transfer_encoding
       cte@(ContentTransferEncoding content_transfer_encoding)
           = fromMaybe (ContentTransferEncoding "7bit") m_ce
       m_decoded_body
        | content_transfer_encoding == "base64" = Just (Base64.decode body)
        | content_transfer_encoding == "quoted-printable" = Just (QuotedPrintable.decode $ my_lines body)
        | content_transfer_encoding `elem` ["7bit", "8bit", "binary"] = Just body
              -- Don't worry if 8-bit data is in 7-bit transfer-encoded data
        | otherwise = Nothing

-- XXX This is rather hacky. We really want convertFuzzy to tell us if
-- the conversion is supported itself
tryConvertFuzzy :: Fuzzy -> EncodingName -> EncodingName -> ByteString
                -> Maybe ByteString
tryConvertFuzzy fuzzy from_charset to_charset decoded_body =
    case convertStrictly from_charset to_charset decoded_body of
    Right (UnsuportedConversion {}) -> Nothing
    _ -> Just $ convertFuzzy fuzzy from_charset to_charset decoded_body

get_filename :: ContentDisposition -> ContentType -> FilePath
get_filename (ContentDisposition _ params) (ContentType _ _ params')
 = loop params
    where loop [] = -- Look up legacy name parameter in content type
                    case lookup_param "name" params' of
                        Nothing -> default_filename
                        Just f -> sanitise f
          loop (Filename f:_) = sanitise f
          loop (_:ps) = loop ps
          sanitise f = case reverse $ takeWhile ('/' /=) $ reverse f of
                           "" -> default_filename
                           f' -> f'
          default_filename = "unknown"

get_parts :: String -> String -> (String, [String], String)
get_parts boundary body
 = case gps [] body of
       (pre:parts, epi) -> (pre, parts, epi)
       _ -> error "Parse.get_parts: Can't happen XXX"
    where gps acc "" = ([from_acc acc], "")
          gps acc xs
           = case fmap (dropWhile isWSP) $ after dd_boundary
                                         $ snd $ read_line_ending xs of
                 Just "--" -> ([from_acc acc], "")
                 Just ('-':'-':cs@(c:_))
                  | is_cr_or_lf c -> ([from_acc acc], cs)
                 Just "" -> ([from_acc acc], "")
                 Just (c1:cs)
                       | is_cr_or_lf c1
                          -> let cs' = case cs of
                                           c2:cs''
                                            | is_cr_or_lf c2 && c1 /= c2
                                               -> cs''
                                           _ -> cs
                             in case gps [] cs' of
                                    (ps, ep) -> (from_acc acc:ps, ep)
                 _ -> case read_line_ending xs of
                          (le, xs') ->
                              case break is_cr_or_lf xs' of
                                  (ys, zs) -> gps (ys:le:acc) zs
          from_acc acc = concat $ reverse acc
          read_line_ending cs = case cs of
                                    (c1:cs1)
                                     | is_cr_or_lf c1 ->
                                        case cs1 of
                                            (c2:cs2)
                                             | is_cr_or_lf c2 && c1 /= c2 ->
                                                ([c1, c2], cs2)
                                            _ -> ([c1], cs1)
                                    _ -> ("", cs)
          after "" ys = Just ys
          after (x:xs) (y:ys)
           | x == y = after xs ys
           | otherwise = Nothing
          after (_:_) "" = Nothing
          dd_boundary = "--" ++ boundary
          isWSP ' ' = True
          isWSP '\t' = True
          isWSP _ = False

-- Returns the value of the first parameter of the name (must be lower cased)
lookup_param :: String -> [Parameter] -> Maybe String
lookup_param _ [] = Nothing
lookup_param name (Parameter n v:ps)
 | name == n = Just v
 | otherwise = lookup_param name ps

get_header :: Headers -> String -> (String -> Maybe a) -> Maybe a
get_header hs name getter
 = case filter ((name ==) . h_name) hs of
       [h] -> getter (h_body h)
       _ -> Nothing

-- We skip over leading white space lines, both for resilience and
-- because this is allowed for MIME part headers.
parse_headers :: String -> (Headers, Maybe String)
parse_headers msg = skip_whitespace (Just msg)
    where skip_whitespace :: Maybe String -> ([Header], Maybe String)
          skip_whitespace Nothing = ([], Nothing)
          skip_whitespace (Just xs) = case get_line xs of
                                          ([], m_zs) -> ([], m_zs)
                                          (ys, m_zs) ->
                                              case dropWhile isWhite ys of
                                                  [] -> skip_whitespace m_zs
                                                  ys' -> gather [ys'] m_zs
          gather :: [String] -> Maybe String -> ([Header], Maybe String)
          gather acc Nothing = ([mk_rev_header acc], Nothing)
          gather acc (Just xs)
              = case get_line xs of
                    ([], m_zs) -> ([mk_rev_header acc], m_zs)
                    (ys, m_zs)
                     | starts_with_white ys -> gather (ys:acc) m_zs
                     | otherwise -> let (hs, m_rest) = gather [ys] m_zs
                                    in (mk_rev_header acc:hs, m_rest)
          starts_with_white (c:_) = isWhite c
          starts_with_white [] = False
          isWhite ' ' = True
          isWhite '\t' = True
          isWhite _ = False

-- Takes the lines comprising a header (in reverse order) and constructs
-- the corresponding Header
mk_rev_header :: [String] -> Header
mk_rev_header = mk_header . reverse

-- Takes the lines comprising a header and constructs
-- the corresponding Header
mk_header :: [String] -> Header
mk_header xs = let unfolded = concat xs
                   (raw_name, body) = case break ends_header unfolded of
                                          (name, ':':val) -> (name ++ ":", val)
                                          (name, val) -> (name, val)
               in Header {
                      h_raw_header = xs,
                      h_raw_name = raw_name,
                      h_name = map asciiToLower raw_name,
                      h_body = body
                  }
    where ends_header ':' = True
          ends_header ' ' = True
          ends_header '\t' = True
          ends_header _ = False