{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Skylighting.Regex (
Regex
, RegexException
, RE(..)
, compileRegex
, matchRegex
, convertOctalEscapes
) where
import qualified Control.Exception as E
import Data.Aeson
import Data.Binary (Binary)
import qualified Data.ByteString.Base64 as Base64
import qualified Data.ByteString.Char8 as BS
import Data.ByteString.UTF8 (toString)
import Data.Data
import qualified Data.Text as Text
import qualified Data.Text.Encoding as TE
import GHC.Generics (Generic)
import System.IO.Unsafe (unsafePerformIO)
import Text.Printf
import Text.Regex.PCRE.ByteString
newtype RegexException = RegexException String
deriving (Show, Typeable, Generic)
instance E.Exception RegexException
data RE = RE{
reString :: BS.ByteString
, reCaseSensitive :: Bool
} deriving (Show, Read, Ord, Eq, Data, Typeable, Generic)
instance Binary RE
instance ToJSON RE where
toJSON re = object [ "reString" .= encodeToText (reString re)
, "reCaseSensitive" .= reCaseSensitive re ]
instance FromJSON RE where
parseJSON = withObject "RE" $ \v ->
RE <$> ((v .: "reString") >>= decodeFromText)
<*> v .: "reCaseSensitive"
compileRegex :: Bool -> BS.ByteString -> Regex
compileRegex caseSensitive regexpStr =
let opts = compAnchored + compUTF8 +
if caseSensitive then 0 else compCaseless
in case unsafePerformIO $ compile opts (execNotEmpty) regexpStr of
Left (off,msg) -> E.throw $ RegexException $
"Error compiling regex /" ++ toString regexpStr ++
"/ at offset " ++ show off ++ "\n" ++ msg
Right r -> r
convertOctalEscapes :: String -> String
convertOctalEscapes [] = ""
convertOctalEscapes ('\\':'0':x:y:z:rest)
| all isOctalDigit [x,y,z] = '\\':x:y:z: convertOctalEscapes rest
convertOctalEscapes ('\\':x:y:z:rest)
| all isOctalDigit [x,y,z] ='\\':x:y:z: convertOctalEscapes rest
convertOctalEscapes ('\\':'o':'{':zs) =
case break (=='}') zs of
(ds, '}':rest) | all isOctalDigit ds && not (null ds) ->
case reads ('0':'o':ds) of
((n :: Int,[]):_) ->
printf "\\x{%x}" n ++ convertOctalEscapes rest
_ -> E.throw $ RegexException $
"Unable to read octal number: " ++ ds
_ -> '\\':'o':'{': convertOctalEscapes zs
convertOctalEscapes (x:xs) = x : convertOctalEscapes xs
isOctalDigit :: Char -> Bool
isOctalDigit c = c >= '0' && c <= '7'
matchRegex :: Regex -> BS.ByteString -> Maybe [BS.ByteString]
matchRegex r s = case unsafePerformIO (regexec r s) of
Right (Just (_, mat, _ , capts)) ->
Just (mat : capts)
Right Nothing -> Nothing
Left (_rc, msg) -> E.throw $ RegexException msg
encodeToText :: BS.ByteString -> Text.Text
encodeToText = TE.decodeUtf8 . Base64.encode
decodeFromText :: (Monad m) => Text.Text -> m BS.ByteString
decodeFromText = either fail return . Base64.decode . TE.encodeUtf8