{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# OPTIONS_HADDOCK hide #-}
module Language.Hanspell.PnuSpellChecker
( PnuSpellChecker
, spellCheckByPnu
, pnuSpellCheckerMaxWords
) where
import qualified Data.ByteString.UTF8 as BU
import qualified Data.ByteString.Lazy.UTF8 as BLU
import Data.List
import Data.List.Split
import Network.HTTP.Types.Status
import Text.Regex
import Debug.Trace
import Control.Monad.Trans.Maybe
import Control.Monad.Trans.Class
import Network.HTTP.Client
import Network.HTTP.Client.TLS
import Data.Aeson
import GHC.Generics
import Language.Hanspell.Typo
import Language.Hanspell.Decoder
class Monad m => PnuSpellChecker m where
spellCheckByPnu :: String -> m [Typo]
instance PnuSpellChecker (MaybeT IO) where
spellCheckByPnu text = htmlToTypos <$> requestToPnu text
instance PnuSpellChecker IO where
spellCheckByPnu text = do
maybe <- runMaybeT $ htmlToTypos <$> requestToPnu text
case maybe of
Nothing -> return []
Just typos -> return typos
pnuSpellCheckerMaxWords :: Int
pnuSpellCheckerMaxWords = 295
pnuConnectError :: String
pnuConnectError =
"-- 한스펠 오류: 부산대 서버의 접속 오류로 일부 문장 교정에 실패했습니다."
invalidResponseFromPnu :: String
invalidResponseFromPnu =
"-- 한스펠 오류: 부산대 서비스가 유효하지 않은 양식을 반환했습니다. ("
++ pnuSpellCheckUrl ++ ")"
pnuSpellCheckUrl :: String
pnuSpellCheckUrl = "http://speller.cs.pusan.ac.kr/results"
gsub :: String -> String -> String -> String
gsub from to text = subRegex (mkRegex from) text to
requestToPnu :: String -> MaybeT IO String
requestToPnu text = if null (words text) then return "" else do
let text' = intercalate "\n " . splitOn "\n" $ text
manager <- lift $ newManager tlsManagerSettings
let pair = [("text1",BU.fromString text')]
initialRequest <- lift $ parseRequest pnuSpellCheckUrl
let request = (urlEncodedBody pair initialRequest) { method = "POST" }
response <- lift $ httpLbs request manager
let errCode = statusCode (responseStatus response)
let pnuResponseInfix = "<title>한국어 맞춤법/문법 검사기</title>"
if errCode == 200
then let body = BLU.toString (responseBody response)
in if pnuResponseInfix `isInfixOf` body
then return body
else trace invalidResponseFromPnu (MaybeT $ return Nothing)
else trace (pnuConnectError ++ " ("++ show errCode ++ ")")
(MaybeT $ return Nothing)
data PnuTypo = PnuTypo
{ help :: String
, errorIdx :: Int
, correctMethod :: Int
, start :: Int
, end :: Int
, orgStr :: String
, candWord :: String
} deriving (Show, Generic, ToJSON, FromJSON)
data PnuTypos = PnuTypos
{ str :: String
, errInfo :: [PnuTypo]
, idx :: Int
} deriving (Show, Generic, ToJSON, FromJSON)
htmlToTypos :: String -> [Typo]
htmlToTypos body =
case matchRegex (mkRegex "^\tdata = (.*);$") body of
Nothing -> []
Just [jsonText] -> map pnuTypoToTypo pnuTypos
where
Just pnuTyopsList = decode . BLU.fromString $ jsonText
pnuTypos = mconcat . map errInfo $ pnuTyopsList
pnuTypoToTypo :: PnuTypo -> Typo
pnuTypoToTypo pnuTypo =
Typo { errorType = ""
, token = decodeEntity . orgStr $ pnuTypo
, suggestions = splitOn "|" . decodeEntity $ suggestions'
, context = ""
, info = decodeEntity . gsub "\n\\(예\\) " "(예)\n"
. gsub " *<br/> *" "\n" . (++ "\n") . help $ pnuTypo
} where
suggestions' = if null . candWord $ pnuTypo
then orgStr pnuTypo
else candWord pnuTypo