{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE DoAndIfThenElse #-} {-# LANGUAGE ForeignFunctionInterface #-} {-# LANGUAGE EmptyDataDecls #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE NoImplicitPrelude #-} ------------------------------------------------------------------------------- -- | -- Module : GHC.Event.Windows.FFI -- Copyright : (c) Tamar Christina 2019 -- License : BSD-style (see the file libraries/base/LICENSE) -- -- Maintainer : libraries@haskell.org -- Stability : experimental -- Portability : non-portable -- -- WinIO Windows API Foreign Function imports -- ------------------------------------------------------------------------------- module GHC.Event.Windows.FFI ( -- * IOCP IOCP(..), CompletionKey, newIOCP, associateHandleWithIOCP, getQueuedCompletionStatusEx, postQueuedCompletionStatus, getOverlappedResult, -- * Completion Data CompletionData(..), CompletionCallback, withRequest, -- * Overlapped OVERLAPPED, LPOVERLAPPED, OVERLAPPED_ENTRY(..), LPOVERLAPPED_ENTRY, HASKELL_OVERLAPPED, LPHASKELL_OVERLAPPED, allocOverlapped, zeroOverlapped, pokeOffsetOverlapped, overlappedIOStatus, overlappedIONumBytes, -- * Cancel pending I/O cancelIoEx, cancelIoEx', -- * Monotonic time -- ** GetTickCount getTickCount64, -- ** QueryPerformanceCounter queryPerformanceCounter, queryPerformanceFrequency, -- ** Miscellaneous throwWinErr, setLastError ) where #include #include #include "winio_structs.h" ##include "windows_cconv.h" import Data.Maybe import Foreign import GHC.Base import GHC.Num ((*)) import GHC.Real (fromIntegral) import GHC.Show import GHC.Windows import qualified GHC.Event.Array as A import qualified GHC.Windows as Win32 import GHC.IO.Handle.Internals (debugIO) ------------------------------------------------------------------------ -- IOCP -- | An I/O completion port. newtype IOCP = IOCP HANDLE deriving (Eq, Ord, Show) type CompletionKey = ULONG_PTR -- | This function has two distinct purposes depending on the value of -- The completion port handle: -- -- - When the IOCP port is NULL then the function creates a new I/O completion -- port. See `newIOCP`. -- -- - When The port contains a valid handle then the given handle is -- associated with he given completion port handle. Once associated it -- cannot be easily changed. Associating a Handle with a Completion Port -- allows the I/O manager's worker threads to handle requests to the given -- handle. foreign import WINDOWS_CCONV unsafe "windows.h CreateIoCompletionPort" c_CreateIoCompletionPort :: HANDLE -> IOCP -> ULONG_PTR -> DWORD -> IO IOCP -- | Create a new I/O completion port. newIOCP :: IO IOCP newIOCP = failIf (== IOCP nullPtr) "newIOCP" $ c_CreateIoCompletionPort iNVALID_HANDLE_VALUE (IOCP nullPtr) 0 0 -- | Associate a HANDLE with an I/O completion port. associateHandleWithIOCP :: IOCP -> HANDLE -> CompletionKey -> IO () associateHandleWithIOCP iocp handle completionKey = failIf_ (/= iocp) "associateHandleWithIOCP" $ c_CreateIoCompletionPort handle iocp completionKey 0 foreign import WINDOWS_CCONV safe "windows.h GetOverlappedResult" c_GetOverlappedResult :: HANDLE -> LPOVERLAPPED -> Ptr DWORD -> BOOL -> IO BOOL -- | Get the result of a single overlap operation without the IO manager getOverlappedResult :: HANDLE -> Ptr OVERLAPPED -> BOOL -> IO (Maybe DWORD) getOverlappedResult handle lp block = alloca $ \bytes -> do res <- c_GetOverlappedResult handle lp bytes block if res then fmap Just $ peek bytes else return Nothing foreign import WINDOWS_CCONV safe "windows.h GetQueuedCompletionStatusEx" c_GetQueuedCompletionStatusEx :: IOCP -> LPOVERLAPPED_ENTRY -> Word32 -> Ptr ULONG -> DWORD -> BOOL -> IO BOOL -- | Note [Completion Ports] -- When an I/O operation has been queued by an operation -- (ReadFile/WriteFile/etc) it is placed in a queue that the driver uses when -- servicing IRQs. This queue has some important properties: -- -- 1.) It is not an ordered queue. Requests may be performed out of order as -- as the OS's native I/O manager may try to re-order requests such that as -- few random seeks as possible are needed to complete the pending -- operations. As such do not assume a fixed order between something being -- queued and dequeued. -- -- 2.) Operations may skip the queue entirely. In which case they do not end in -- in this function. (This is an optimization flag we have turned on. See -- `openFile`.) -- -- 3.) Across this call the specified OVERLAPPED_ENTRY buffer MUST remain live, -- and the buffer for an I/O operation cannot be freed or moved until -- `getOverlappedResult` says it's done. The reason is the kernel may not -- have fully released the buffer, or finished writing to it when this -- operation returns. Failure to adhere to this will cause your IRQs to be -- silently dropped and your program will never receive a completion for it. -- This means that the OVERLAPPED buffer must also remain valid for the -- duration of the call and as such must be allocated on the unmanaged heap. -- -- 4.) When a thread calls this method it is associated with the I/O manager's -- worker threads pool. You should always use dedicated threads for this -- since the OS I/O manager will now monitor the threads. If the thread -- becomes blocked for whatever reason, the Haskell I/O manager will wake up -- another threads from it's pool to service the remaining results. -- A new thread will also be woken up from the pool when the previous thread -- is busy servicing requests and new requests have finished. For this -- reason the Haskell I/O manager multiplexes I/O operations from N haskell -- threads into 1 completion port, which is serviced by M native threads in -- an asynchronous method. This allows it to scale efficiently. getQueuedCompletionStatusEx :: IOCP -> A.Array OVERLAPPED_ENTRY -> DWORD -- ^ Timeout in milliseconds (or -- 'GHC.Windows.iNFINITE') -> IO Int getQueuedCompletionStatusEx iocp arr timeout = alloca $ \num_removed_ptr ->do A.unsafeLoad arr $ \oes cap -> do -- TODO: remove after debugging fillBytes oes 0 (cap * (sizeOf (undefined :: OVERLAPPED_ENTRY))) debugIO $ "-- call getQueuedCompletionStatusEx " -- don't block the call if the rts is not supporting threads. -- this would block the entire program. let alertable = False -- not rtsSupportsBoundThreads ok <- c_GetQueuedCompletionStatusEx iocp oes (fromIntegral cap) num_removed_ptr timeout alertable debugIO $ "-- call getQueuedCompletionStatusEx: " ++ show ok err <- getLastError nc <- (peek num_removed_ptr) debugIO $ "-- getQueuedCompletionStatusEx: n=" ++ show nc ++ " ,err=" ++ show err if ok then fromIntegral `fmap` peek num_removed_ptr else do debugIO $ "failed getQueuedCompletionStatusEx: " ++ show err if err == #{const WAIT_TIMEOUT} || alertable then return 0 else failWith "GetQueuedCompletionStatusEx" err overlappedIOStatus :: LPOVERLAPPED -> IO NTSTATUS overlappedIOStatus lpol = do status <- #{peek OVERLAPPED, Internal} lpol -- TODO: Map NTSTATUS to ErrCode? -- See https://github.com/libuv/libuv/blob/b12624c13693c4d29ca84b3556eadc9e9c0936a4/src/win/winsock.c#L153 return status {-# INLINE overlappedIOStatus #-} overlappedIONumBytes :: LPOVERLAPPED -> IO ULONG_PTR overlappedIONumBytes lpol = do bytes <- #{peek OVERLAPPED, InternalHigh} lpol return bytes {-# INLINE overlappedIONumBytes #-} foreign import WINDOWS_CCONV unsafe "windows.h PostQueuedCompletionStatus" c_PostQueuedCompletionStatus :: IOCP -> DWORD -> ULONG_PTR -> LPOVERLAPPED -> IO BOOL -- | Manually post a completion to the specified I/O port. This will wake up -- a thread waiting `GetQueuedCompletionStatusEx`. postQueuedCompletionStatus :: IOCP -> DWORD -> CompletionKey -> LPOVERLAPPED -> IO () postQueuedCompletionStatus iocp numBytes completionKey lpol = failIfFalse_ "PostQueuedCompletionStatus" $ c_PostQueuedCompletionStatus iocp numBytes completionKey lpol ------------------------------------------------------------------------ -- Completion Data -- | Called when the completion is delivered. type CompletionCallback a = ErrCode -- ^ 0 indicates success -> DWORD -- ^ Number of bytes transferred -> IO a -- | Callback type that will be called when an I/O operation completes. type IOCallback = CompletionCallback () -- | Structure that the I/O manager uses to associate callbacks with -- additional payload such as their OVERLAPPED structure and Win32 handle -- etc. *Must* be kept in sync with that in `winio_structs.h` or horrible things -- happen. -- -- We keep the handle around for the benefit of ghc-external libraries making -- use of the manager. data CompletionData = CompletionData { cdHandle :: !HANDLE , cdCallback :: !(StablePtr IOCallback) } instance Storable CompletionData where sizeOf _ = #{size CompletionData} alignment _ = #{alignment CompletionData} peek ptr = do cdCallback <- #{peek CompletionData, cdCallback} ptr cdHandle <- #{peek CompletionData, cdHandle} ptr let !cd = CompletionData{..} return cd poke ptr CompletionData{..} = do #{poke CompletionData, cdCallback} ptr cdCallback #{poke CompletionData, cdHandle} ptr cdHandle ------------------------------------------------------------------------ -- Overlapped -- | Tag type for @LPOVERLAPPED@. data OVERLAPPED -- | Tag type for the extended version of @OVERLAPPED@ containg some book -- keeping information. data HASKELL_OVERLAPPED -- | Identifies an I/O operation. Used as the @LPOVERLAPPED@ parameter -- for overlapped I/O functions (e.g. @ReadFile@, @WSASend@). type LPOVERLAPPED = Ptr OVERLAPPED -- | Pointer to the extended HASKELL_OVERLAPPED function. type LPHASKELL_OVERLAPPED = Ptr HASKELL_OVERLAPPED -- | An array of these is passed to GetQueuedCompletionStatusEx as an output -- argument. data OVERLAPPED_ENTRY = OVERLAPPED_ENTRY { lpCompletionKey :: ULONG_PTR, lpOverlapped :: LPOVERLAPPED, dwNumberOfBytesTransferred :: DWORD } type LPOVERLAPPED_ENTRY = Ptr OVERLAPPED_ENTRY instance Storable OVERLAPPED_ENTRY where sizeOf _ = #{size OVERLAPPED_ENTRY} alignment _ = #{alignment OVERLAPPED_ENTRY} peek ptr = do lpCompletionKey <- #{peek OVERLAPPED_ENTRY, lpCompletionKey} ptr lpOverlapped <- #{peek OVERLAPPED_ENTRY, lpOverlapped} ptr dwNumberOfBytesTransferred <- #{peek OVERLAPPED_ENTRY, dwNumberOfBytesTransferred} ptr let !oe = OVERLAPPED_ENTRY{..} return oe poke ptr OVERLAPPED_ENTRY{..} = do #{poke OVERLAPPED_ENTRY, lpCompletionKey} ptr lpCompletionKey #{poke OVERLAPPED_ENTRY, lpOverlapped} ptr lpOverlapped #{poke OVERLAPPED_ENTRY, dwNumberOfBytesTransferred} ptr dwNumberOfBytesTransferred -- | Allocate a new -- structure on the unmanaged heap. This also zeros the memory to -- prevent the values inside the struct to be incorrectlt interpreted as data -- payload. -- -- We extend the overlapped structure with some extra book keeping information -- such that we don't have to do a lookup on the Haskell side. -- -- Future: We can gain some performance here by using a pool instead of calling -- malloc for each request. A simple block allocator would be very -- useful here, especially when we implement sockets support. allocOverlapped :: Word64 -- ^ Offset/OffsetHigh -> IO (Ptr HASKELL_OVERLAPPED) allocOverlapped offset = do lpol <- mallocBytes #{size HASKELL_OVERLAPPED} zeroOverlapped lpol pokeOffsetOverlapped (castPtr lpol) offset return lpol -- | Zero-fill an HASKELL_OVERLAPPED structure. zeroOverlapped :: LPHASKELL_OVERLAPPED -> IO () zeroOverlapped lpol = fillBytes lpol 0 #{size HASKELL_OVERLAPPED} {-# INLINE zeroOverlapped #-} -- | Set the offset field in an OVERLAPPED structure. pokeOffsetOverlapped :: LPOVERLAPPED -> Word64 -> IO () pokeOffsetOverlapped lpol offset = do let (offsetHigh, offsetLow) = Win32.ddwordToDwords offset #{poke OVERLAPPED, Offset} lpol offsetLow #{poke OVERLAPPED, OffsetHigh} lpol offsetHigh {-# INLINE pokeOffsetOverlapped #-} -- | Set the event field in an OVERLAPPED structure. pokeEventOverlapped :: LPOVERLAPPED -> HANDLE -> IO () pokeEventOverlapped lpol event = do #{poke OVERLAPPED, hEvent} lpol event {-# INLINE pokeEventOverlapped #-} ------------------------------------------------------------------------ -- Request management -- [Note AsyncHandles] -- In `winio` we have designed it to work in asynchronous mode always. -- According to the MSDN documentation[1][2], when a handle is not opened -- in asynchronous mode then the operation would simply work but operate -- synchronously. -- -- This seems to happen as documented for `File` handles, but `pipes` don't -- seem to follow this documented behavior and so are a problem. -- Under `msys2` your standard handles are actually pipes, not console -- handles or files. As such running under an msys2 console causes a hang -- as the pipe read never returns. -- -- [1] https://docs.microsoft.com/en-us/windows/win32/fileio/synchronous-and-asynchronous-i-o -- [2] https://docs.microsoft.com/en-us/windows/win32/sync/synchronization-and-overlapped-input-and-output -- -- As such we need to annotate all NativeHandles with a Boolean to indicate -- wether it's an asynchronous handle or not. -- This allows us to manually wait for the completion instead of relying -- on the I/O system to do the right thing. As we have been using the -- buffers in async mode we may not have moved the file pointer on the kernel -- object, as such we still need to give an `OVERLAPPED` structure, but we -- instead create an event object that we can wait on. -- -- As documented in MSDN this even object must be in manual reset mode. This -- approach gives us the flexibility, with minimum impact to support both -- synchronous and asynchronous access. -- -- Additional approaches explored -- -- Normally the I/O system is in full control of all Handles it creates, with -- one big exception: inheritance. -- -- For any `HANDLE` we inherit we don't know how it's been open. A different -- solution I have explored was to try to detect the `HANDLE` mode. -- But this approach would never work for a few reasons: -- -- 1. The presence of an asynchronous flag does not indicate that we are able -- to handle the operation asynchronously. In particular, just because a -- `HANDLE` is open in async mode, it may not be associated with our -- completion port. -- 2. One can only associate a `HANDLE` to a *single* completion port. As -- such, if the handle is opened in async mode but already registered to a -- completion port then we can't use it asynchronously. -- 3. You can only associate a completion port once, even if it's the same -- port. This means were we to strap a `HANDLE` of it's `NativeHandle` -- wrapper and then wrap it again, we can't retest as the result would be -- invalid. This is an issue because to pass `HANDLE`s we have to pass -- the native OS Handle not the Haskell one. i.e. remote-iserv. -- See [Note AsyncHandles] withRequest :: Bool -> Word64 -> HANDLE -> IOCallback -> (Ptr HASKELL_OVERLAPPED -> Ptr CompletionData -> IO a) -> IO a withRequest async offset hdl cb f = do -- Create the completion record and store it. -- We only need the record when we enqueue a request, however if we -- delay creating it then we will run into a race condition where the -- driver may have finished servicing the request before we were ready -- and so the request won't have the book keeping information to know -- what to do. So because of that we always create the payload, If we -- need it ok, if we don't that's no problem. This approach prevents -- expensive lookups in hash-tables. -- -- Todo: Use a memory pool for this so we don't have to hit malloc every -- time. This would allow us to scale better. cb_sptr <- newStablePtr cb let cbData :: CompletionData cbData = CompletionData hdl cb_sptr r <- allocaBytes #{size HASKELL_OVERLAPPED} $ \hs_lpol -> with cbData $ \cdData -> do zeroOverlapped hs_lpol let lpol = castPtr hs_lpol pokeOffsetOverlapped lpol offset -- If doing a synchronous request then register an event object. -- This event object MUST be manual reset per MSDN. case async of True -> f hs_lpol cdData False -> do event <- failIfNull "withRequest (create)" $ c_CreateEvent nullPtr True False nullPtr debugIO $ "{{ event " ++ show event ++ " for " ++ show hs_lpol pokeEventOverlapped lpol event res <- f hs_lpol cdData -- Once the request has finished, close the object and free it. failIfFalse_ "withRequest (free)" $ c_CloseHandle event return res freeStablePtr cb_sptr return r -- | Create an event object for use when the HANDLE isn't asynchronous foreign import WINDOWS_CCONV unsafe "windows.h CreateEventW" c_CreateEvent :: Ptr () -> Bool -> Bool -> LPCWSTR -> IO HANDLE -- | Close a handle object foreign import WINDOWS_CCONV unsafe "windows.h CloseHandle" c_CloseHandle :: HANDLE -> IO Bool ------------------------------------------------------------------------ -- Cancel pending I/O -- | CancelIo shouldn't block, but cancellation happens infrequently, -- so we might as well be on the safe side. foreign import WINDOWS_CCONV unsafe "windows.h CancelIoEx" c_CancelIoEx :: HANDLE -> LPOVERLAPPED -> IO BOOL -- | Cancel all pending overlapped I/O for the given file that was initiated by -- the current OS thread. Cancelling is just a request for cancellation and -- before the OVERLAPPED struct is freed we must make sure that the IRQ has been -- removed from the queue. See `getOverlappedResult`. cancelIoEx :: HANDLE -> LPOVERLAPPED -> IO () cancelIoEx h o = failIfFalse_ "CancelIoEx" . c_CancelIoEx h $ o cancelIoEx' :: HANDLE -> LPOVERLAPPED -> IO Bool cancelIoEx' = c_CancelIoEx ------------------------------------------------------------------------ -- Monotonic time foreign import WINDOWS_CCONV "windows.h GetTickCount64" c_GetTickCount64 :: IO #{type ULONGLONG} -- | Call the @GetTickCount64@ function, which returns a monotonic time in -- milliseconds. -- -- Problems: -- -- * Low resolution (10 to 16 milliseconds). -- -- getTickCount64 :: IO Word64 getTickCount64 = c_GetTickCount64 -- | Call the @QueryPerformanceCounter@ function. -- -- Problems: -- -- * Might not be available on some hardware. Use 'queryPerformanceFrequency' -- to test for availability before calling this function. -- -- * On a multiprocessor computer, may produce different results on -- different processors due to hardware bugs. -- -- To get a monotonic time in seconds, divide the result of -- 'queryPerformanceCounter' by that of 'queryPerformanceFrequency'. -- -- queryPerformanceCounter :: IO Int64 queryPerformanceCounter = callQP c_QueryPerformanceCounter >>= maybe (throwGetLastError "QueryPerformanceCounter") return -- | Call the @QueryPerformanceFrequency@ function. Return 'Nothing' if the -- hardware does not provide a high-resolution performance counter. -- -- queryPerformanceFrequency :: IO (Maybe Int64) queryPerformanceFrequency = do m <- callQP c_QueryPerformanceFrequency case m of Nothing -> return Nothing Just 0 -> return Nothing -- Shouldn't happen; just a safeguard to -- avoid a zero denominator. Just freq -> return (Just freq) type QPFunc = Ptr Int64 -> IO BOOL foreign import WINDOWS_CCONV "Windows.h QueryPerformanceCounter" c_QueryPerformanceCounter :: QPFunc foreign import WINDOWS_CCONV "Windows.h QueryPerformanceFrequency" c_QueryPerformanceFrequency :: QPFunc callQP :: QPFunc -> IO (Maybe Int64) callQP qpfunc = allocaBytes #{size LARGE_INTEGER} $ \ptr -> do ok <- qpfunc ptr if ok then do n <- #{peek LARGE_INTEGER, QuadPart} ptr return (Just n) else return Nothing ------------------------------------------------------------------------ -- Miscellaneous type ULONG_PTR = #type ULONG_PTR throwWinErr :: String -> ErrCode -> IO a throwWinErr loc err = do c_SetLastError err Win32.failWith loc err setLastError :: ErrCode -> IO () setLastError = c_SetLastError foreign import WINDOWS_CCONV unsafe "windows.h SetLastError" c_SetLastError :: ErrCode -> IO ()