{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleInstances #-}
{-# OPTIONS_HADDOCK hide #-}
module Language.Hanspell.DaumSpellChecker
( DaumSpellChecker
, spellCheckByDaum
, daumSpellCheckerMaxChars
) where
import qualified Data.ByteString.UTF8 as BU
import qualified Data.ByteString.Lazy.UTF8 as BLU
import Data.List
import Data.List.Split
import Network.HTTP.Types.Status
import Text.Regex
import Debug.Trace
import Control.Monad.Trans.Maybe
import Control.Monad.Trans.Class
import Network.HTTP.Client
import Network.HTTP.Client.TLS
import Language.Hanspell.Typo
import Language.Hanspell.Decoder
class Monad m => DaumSpellChecker m where
spellCheckByDaum :: String -> m [Typo]
instance DaumSpellChecker (MaybeT IO) where
spellCheckByDaum :: String -> MaybeT IO [Typo]
spellCheckByDaum String
text = String -> [Typo]
htmlToTypos (String -> [Typo]) -> MaybeT IO String -> MaybeT IO [Typo]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> MaybeT IO String
requestToDaum String
text
instance DaumSpellChecker IO where
spellCheckByDaum :: String -> IO [Typo]
spellCheckByDaum String
text = do
Maybe [Typo]
maybe <- MaybeT IO [Typo] -> IO (Maybe [Typo])
forall (m :: * -> *) a. MaybeT m a -> m (Maybe a)
runMaybeT (MaybeT IO [Typo] -> IO (Maybe [Typo]))
-> MaybeT IO [Typo] -> IO (Maybe [Typo])
forall a b. (a -> b) -> a -> b
$ String -> [Typo]
htmlToTypos (String -> [Typo]) -> MaybeT IO String -> MaybeT IO [Typo]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> MaybeT IO String
requestToDaum String
text
case Maybe [Typo]
maybe of
Maybe [Typo]
Nothing -> [Typo] -> IO [Typo]
forall (m :: * -> *) a. Monad m => a -> m a
return []
Just [Typo]
typos -> [Typo] -> IO [Typo]
forall (m :: * -> *) a. Monad m => a -> m a
return [Typo]
typos
daumSpellCheckerMaxChars :: Int
daumSpellCheckerMaxChars :: Int
daumSpellCheckerMaxChars = Int
1000
daumConnectError :: String
daumConnectError :: String
daumConnectError =
String
"-- 한스펠 오류: 다음 서버의 접속 오류로 일부 문장 교정에 실패했습니다."
invalidResponseFromDaum :: String
invalidResponseFromDaum :: String
invalidResponseFromDaum =
String
"-- 한스펠 오류: 다음 서비스가 유효하지 않은 양식을 반환했습니다. ("
String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
daumSpellCheckUrl String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")"
daumSpellCheckUrl :: String
daumSpellCheckUrl :: String
daumSpellCheckUrl = String
"https://dic.daum.net/grammar_checker.do"
requestToDaum :: String -> MaybeT IO String
requestToDaum :: String -> MaybeT IO String
requestToDaum String
text = do
Manager
manager <- IO Manager -> MaybeT IO Manager
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO Manager -> MaybeT IO Manager)
-> IO Manager -> MaybeT IO Manager
forall a b. (a -> b) -> a -> b
$ ManagerSettings -> IO Manager
newManager ManagerSettings
tlsManagerSettings
let pair :: [(ByteString, ByteString)]
pair = [(ByteString
"sentence",String -> ByteString
BU.fromString String
text)]
Request
initialRequest <- IO Request -> MaybeT IO Request
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO Request -> MaybeT IO Request)
-> IO Request -> MaybeT IO Request
forall a b. (a -> b) -> a -> b
$ String -> IO Request
forall (m :: * -> *). MonadThrow m => String -> m Request
parseRequest String
daumSpellCheckUrl
let request :: Request
request = ([(ByteString, ByteString)] -> Request -> Request
urlEncodedBody [(ByteString, ByteString)]
pair Request
initialRequest) { method :: ByteString
method = ByteString
"POST" }
Response ByteString
response <- IO (Response ByteString) -> MaybeT IO (Response ByteString)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO (Response ByteString) -> MaybeT IO (Response ByteString))
-> IO (Response ByteString) -> MaybeT IO (Response ByteString)
forall a b. (a -> b) -> a -> b
$ Request -> Manager -> IO (Response ByteString)
httpLbs Request
request Manager
manager
let errCode :: Int
errCode = Status -> Int
statusCode (Response ByteString -> Status
forall body. Response body -> Status
responseStatus Response ByteString
response)
let daumResponseInfix :: String
daumResponseInfix = String
"=\"screen_out\">맞춤법 검사기 본문</h2>"
if Int
errCode Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
200
then let body :: String
body = ByteString -> String
BLU.toString (Response ByteString -> ByteString
forall body. Response body -> body
responseBody Response ByteString
response)
in if String
daumResponseInfix String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isInfixOf` String
body
then String -> MaybeT IO String
forall (m :: * -> *) a. Monad m => a -> m a
return String
body
else String -> MaybeT IO String -> MaybeT IO String
forall a. String -> a -> a
trace String
invalidResponseFromDaum (IO (Maybe String) -> MaybeT IO String
forall (m :: * -> *) a. m (Maybe a) -> MaybeT m a
MaybeT (IO (Maybe String) -> MaybeT IO String)
-> IO (Maybe String) -> MaybeT IO String
forall a b. (a -> b) -> a -> b
$ Maybe String -> IO (Maybe String)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe String
forall a. Maybe a
Nothing)
else String -> MaybeT IO String -> MaybeT IO String
forall a. String -> a -> a
trace (String
daumConnectError String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" ("String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
errCode String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")")
(IO (Maybe String) -> MaybeT IO String
forall (m :: * -> *) a. m (Maybe a) -> MaybeT m a
MaybeT (IO (Maybe String) -> MaybeT IO String)
-> IO (Maybe String) -> MaybeT IO String
forall a b. (a -> b) -> a -> b
$ Maybe String -> IO (Maybe String)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe String
forall a. Maybe a
Nothing)
htmlToTypos :: String -> [Typo]
htmlToTypos :: String -> [Typo]
htmlToTypos String
body =
let stripped :: String
stripped = [String] -> String
forall a. [a] -> a
head ([String] -> String) -> (String -> [String]) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> [String]
forall a. Eq a => [a] -> [a] -> [[a]]
splitOn String
"<span class=\"info_byte\">" (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String
body
splitted :: [String]
splitted = [String] -> [String]
forall a. [a] -> [a]
tail ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> [String]
forall a. Eq a => [a] -> [a] -> [[a]]
splitOn String
"data-error-type" (String -> [String]) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ String
stripped
in (String -> Typo) -> [String] -> [Typo]
forall a b. (a -> b) -> [a] -> [b]
map String -> Typo
htmlToTypo [String]
splitted
htmlToTypo :: String -> Typo
htmlToTypo :: String -> Typo
htmlToTypo String
body = Typo :: String -> String -> [String] -> String -> String -> Typo
Typo { errorType :: String
errorType = String -> String
decodeEntity ([String]
splitted[String] -> Int -> String
forall a. [a] -> Int -> a
!!Int
1)
, token :: String
token = String -> String
decodeEntity ([String]
splitted[String] -> Int -> String
forall a. [a] -> Int -> a
!!Int
3)
, suggestions :: [String]
suggestions = [String -> String
decodeEntity ([String]
splitted[String] -> Int -> String
forall a. [a] -> Int -> a
!!Int
5)]
, context :: String
context = String -> String
decodeEntity ([String]
splitted[String] -> Int -> String
forall a. [a] -> Int -> a
!!Int
7)
, info :: String
info = String
info'''
} where
gsub :: String -> String -> String -> String
gsub String
from String
to String
text = Regex -> String -> String -> String
subRegex (String -> Regex
mkRegex String
from) String
text String
to
splitted :: [String]
splitted = String -> String -> [String]
forall a. Eq a => [a] -> [a] -> [[a]]
splitOn String
"\"" (String -> [String]) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ [String] -> String
forall a. [a] -> a
head (String -> [String]
lines String
body)
info' :: String
info' = String -> String -> [String]
forall a. Eq a => [a] -> [a] -> [[a]]
splitOn String
"<div>" String
body[String] -> Int -> String
forall a. [a] -> Int -> a
!!Int
1
info'' :: String
info'' = [String] -> String
forall a. [a] -> a
head (String -> String -> [String]
forall a. Eq a => [a] -> [a] -> [[a]]
splitOn String
"<span class=\"info_byte\">" String
info')
info''' :: String
info''' = String -> String -> String -> String
gsub String
"^[ \n][ \n]*" String
""
(String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> String -> String
gsub String
"<[^>]*>" String
""
(String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> String -> String
gsub String
"<br[^>]*>" String
"\n"
(String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> String -> String
gsub String
"</span><span class.*\n" String
""
(String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> String -> String
gsub String
"<a href=\"#none\".*\n" String
""
(String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> String -> String
gsub String
"^<span>.*\n" String
""
(String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> String -> String
gsub String
"<strong class.*\n" String
""
(String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> String -> String
gsub String
".*strong class=.tit_help.>예문</strong.*\n" String
"(예)"
(String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> String -> String
gsub String
"\t" String
""
(String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String
info''