{-# LANGUAGE ForeignFunctionInterface, EmptyDataDecls #-}
{-# OPTIONS_GHC -fno-cse -fno-full-laziness #-}


-- | Low-level interface to Expat. Unless speed is paramount, this should
-- normally be avoided in favour of the interfaces provided by
-- 'Text.XML.Expat.SAX' and 'Text.XML.Expat.Tree', etc.
module Text.XML.Expat.Internal.IO (
  HParser,
  hexpatNewParser,
  encodingToString,
  Encoding(..),
  XMLParseError(..),
  XMLParseLocation(..)
  ) where

import Control.Applicative
import Control.DeepSeq
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as I
import Data.Int
import Data.Word
import Foreign
import Foreign.C


data Parser_struct
type ParserPtr = Ptr Parser_struct

data Encoding = ASCII | UTF8 | UTF16 | ISO88591
encodingToString :: Encoding -> String
encodingToString ASCII    = "US-ASCII"
encodingToString UTF8     = "UTF-8"
encodingToString UTF16    = "UTF-16"
encodingToString ISO88591 = "ISO-8859-1"

withOptEncoding :: Maybe Encoding -> (CString -> IO a) -> IO a
withOptEncoding Nothing    f = f nullPtr
withOptEncoding (Just enc) f = withCString (encodingToString enc) f

-- ByteString.useAsCStringLen is almost what we need, but C2HS wants a CInt
-- instead of an Int.
withBStringLen :: B.ByteString -> ((CString, CInt) -> IO a) -> IO a
withBStringLen bs f = do
  B.useAsCStringLen bs $ \(str, len) -> f (str, fromIntegral len)

unStatus :: CInt -> Bool
unStatus 0 = False
unStatus _ = True

getError :: ParserPtr -> IO XMLParseError
getError pp = do
    code <- xmlGetErrorCode pp
    cerr <- xmlErrorString code
    err <- peekCString cerr
    loc <- getParseLocation pp
    return $ XMLParseError err loc

-- |Obtain C value from Haskell 'Bool'.
--
cFromBool :: Num a => Bool -> a
cFromBool = fromBool

-- | Parse error, consisting of message text and error location
data XMLParseError = XMLParseError String XMLParseLocation deriving (Eq, Show)

instance NFData XMLParseError where
    rnf (XMLParseError msg loc) = rnf (msg, loc)

-- | Specifies a location of an event within the input text
data XMLParseLocation = XMLParseLocation {
        xmlLineNumber   :: Int64,  -- ^ Line number of the event
        xmlColumnNumber :: Int64,  -- ^ Column number of the event
        xmlByteIndex    :: Int64,  -- ^ Byte index of event from start of document
        xmlByteCount    :: Int64   -- ^ The number of bytes in the event
    }
    deriving (Eq, Show)

instance NFData XMLParseLocation where
    rnf (XMLParseLocation lin col ind cou) = rnf (lin, col, ind, cou)

getParseLocation :: ParserPtr -> IO XMLParseLocation
getParseLocation pp = do
    line <- xmlGetCurrentLineNumber pp
    col <- xmlGetCurrentColumnNumber pp
    index <- xmlGetCurrentByteIndex pp
    count <- xmlGetCurrentByteCount pp
    return $ XMLParseLocation {
            xmlLineNumber = fromIntegral line,
            xmlColumnNumber = fromIntegral col,
            xmlByteIndex = fromIntegral index,
            xmlByteCount = fromIntegral count
        }

-- Note on word sizes:
--
-- on expat 2.0:
-- XML_GetCurrentLineNumber returns XML_Size
-- XML_GetCurrentColumnNumber returns XML_Size
-- XML_GetCurrentByteIndex returns XML_Index
-- These are defined in expat_external.h
--
-- debian-i386 says XML_Size and XML_Index are 4 bytes.
-- ubuntu-amd64 says XML_Size and XML_Index are 8 bytes.
-- These two systems do NOT define XML_LARGE_SIZE, which would force these types
-- to be 64-bit.
--
-- If we guess the word size too small, it shouldn't matter: We will just discard
-- the most significant part.  If we get the word size too large, we will get
-- garbage (very bad).
--
-- So - what I will do is use CLong and CULong, which correspond to what expat
-- is using when XML_LARGE_SIZE is disabled, and give the correct sizes on the
-- two machines mentioned above.  At the absolute worst the word size will be too
-- short.

foreign import ccall unsafe "expat.h XML_GetErrorCode" xmlGetErrorCode
    :: ParserPtr -> IO CInt
foreign import ccall unsafe "expat.h XML_GetCurrentLineNumber" xmlGetCurrentLineNumber
    :: ParserPtr -> IO CULong
foreign import ccall unsafe "expat.h XML_GetCurrentColumnNumber" xmlGetCurrentColumnNumber
    :: ParserPtr -> IO CULong
foreign import ccall unsafe "expat.h XML_GetCurrentByteIndex" xmlGetCurrentByteIndex
    :: ParserPtr -> IO CLong
foreign import ccall unsafe "expat.h XML_GetCurrentByteCount" xmlGetCurrentByteCount
    :: ParserPtr -> IO CInt
foreign import ccall unsafe "expat.h XML_ErrorString" xmlErrorString
    :: CInt -> IO CString

type HParser = B.ByteString -> Bool -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)

foreign import ccall unsafe "hexpatNewParser"
  _hexpatNewParser :: Ptr CChar -> CInt -> IO MyParserPtr

foreign import ccall unsafe "hexpatGetParser"
  _hexpatGetParser :: MyParserPtr -> ParserPtr

data MyParser_struct
type MyParserPtr = Ptr MyParser_struct

foreign import ccall "&hexpatFreeParser" hexpatFreeParser :: FunPtr (MyParserPtr -> IO ())

hexpatNewParser :: Maybe Encoding
                -> Maybe (B.ByteString -> Maybe B.ByteString)  -- ^ Entity decoder
                -> Bool        -- ^ Whether to include input locations
                -> IO (HParser, IO XMLParseLocation)
hexpatNewParser enc mDecoder locations =
    withOptEncoding enc $ \cEnc -> do
        parser <- newForeignPtr hexpatFreeParser =<< _hexpatNewParser cEnc (cFromBool locations)
        return (parse parser, withForeignPtr parser $ \mp -> getParseLocation $ _hexpatGetParser mp)
  where
    parse parser = case mDecoder of
        Nothing -> \text final ->
            alloca $ \ppData ->
            alloca $ \pLen ->
            withBStringLen text $ \(textBuf, textLen) ->
            withForeignPtr parser $ \pp -> do
                ok <- unStatus <$> _hexpatParseUnsafe pp textBuf textLen (cFromBool final) ppData pLen
                pData <- peek ppData
                len <- peek pLen
                err <- if ok
                    then return Nothing
                    else Just <$> getError (_hexpatGetParser pp)
                fpData <- newForeignPtr funPtrFree pData
                return (fpData, len, err)
        Just decoder -> \text final ->
            alloca $ \ppData ->
            alloca $ \pLen ->
            withBStringLen text $ \(textBuf, textLen) ->
            withForeignPtr parser $ \pp -> do
                eh <- mkCEntityHandler . wrapCEntityHandler $ decoder
                _hexpatSetEntityHandler pp eh
                ok <- unStatus <$> _hexpatParseSafe pp textBuf textLen (cFromBool final) ppData pLen
                freeHaskellFunPtr eh
                pData <- peek ppData
                len <- peek pLen
                err <- if ok
                    then return Nothing
                    else Just <$> getError (_hexpatGetParser pp)
                fpData <- newForeignPtr funPtrFree pData
                return (fpData, len, err)

foreign import ccall unsafe "hexpatParse"
  _hexpatParseUnsafe :: MyParserPtr -> Ptr CChar -> CInt -> CInt -> Ptr (Ptr Word8) -> Ptr CInt -> IO CInt

foreign import ccall safe "hexpatParse"
  _hexpatParseSafe :: MyParserPtr -> Ptr CChar -> CInt -> CInt -> Ptr (Ptr Word8) -> Ptr CInt -> IO CInt

type CEntityHandler = Ptr CChar -> IO (Ptr CChar)

foreign import ccall safe "wrapper"
  mkCEntityHandler :: CEntityHandler
                   -> IO (FunPtr CEntityHandler)

peekByteStringLen :: CStringLen -> IO B.ByteString
{-# INLINE peekByteStringLen #-}
peekByteStringLen (cstr, len) =
    I.create (fromIntegral len) $ \ptr ->
        I.memcpy ptr (castPtr cstr) (fromIntegral len)

wrapCEntityHandler :: (B.ByteString -> Maybe B.ByteString) -> CEntityHandler
wrapCEntityHandler handler = h
  where
    h cname = do
        sz <- fromIntegral <$> I.c_strlen cname
        name <- peekByteStringLen (cname, sz)
        case handler name of
            Just text -> do
                let (fp, offset, len) = I.toForeignPtr text
                withForeignPtr fp $ \ctextBS -> do
                    ctext <- mallocBytes (len + 1) :: IO CString
                    I.memcpy (castPtr ctext) (ctextBS `plusPtr` offset) (fromIntegral len)
                    poke (ctext `plusPtr` len) (0 :: CChar)
                    return ctext
            Nothing -> return nullPtr

foreign import ccall unsafe "hexpatSetEntityHandler"
  _hexpatSetEntityHandler :: MyParserPtr -> FunPtr CEntityHandler -> IO ()

foreign import ccall "&free" funPtrFree :: FunPtr (Ptr Word8 -> IO ())