module RegExChar.ExtendedRegExChar(
ExtendedRegExChar(..),
InputData,
(+~),
(=~),
(/~)
) where
import Control.Applicative((<$>), (<*>))
import qualified Data.List
import qualified RegExChar.MetaChar as MetaChar
import qualified RegExDot.Anchor as Anchor
import qualified RegExDot.Consumer as Consumer
import RegExDot.DSL((<~>), (-:))
import qualified RegExDot.RegEx as RegEx
import qualified RegExDot.RegExOpts as RegExOpts
import qualified RegExDot.Repeatable as Repeatable
import qualified Text.ParserCombinators.Parsec as Parsec
import Text.ParserCombinators.Parsec((<?>), (<|>))
import qualified ToolShed.Data.Pair
import qualified ToolShed.SelfValidate
infix 4 +~, =~, /~
data ExtendedRegExChar = MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives :: Bool,
extendedRegEx :: RegEx.ExtendedRegEx Char
} deriving Eq
type InputData = RegEx.InputData Char
instance ToolShed.SelfValidate.SelfValidator ExtendedRegExChar where
getErrors MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives',
extendedRegEx = extendedRegEx'
}
| not $ ToolShed.SelfValidate.isValid extendedRegEx' = ToolShed.SelfValidate.getErrors extendedRegEx'
| otherwise = ToolShed.SelfValidate.extractErrors [
(hasNonCapturingTopLevelAlternatives' && any (not . RegEx.isCaptureGroup . Repeatable.base) (RegEx.concatenation extendedRegEx'), "Invalid NonCapturingTopLevelAlternatives.")
]
instance Consumer.Consumer ExtendedRegExChar where
consumptionProfile = Consumer.consumptionProfile . extendedRegEx
starHeight = Consumer.starHeight . extendedRegEx
instance RegEx.ShortcutExpander Char where
expand c = error $ "RegExDot.RegEx.ShortcutExpander.expand RegExChar.ExtendedRegExChar:\tunrecognised shortcut '" ++ show c ++ "'."
instance Read ExtendedRegExChar where
readsPrec _ s
| s == reverse Anchor.tokens = [
(
MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = False,
extendedRegEx = (Just Anchor.Bow, Just Anchor.Stern) <~> []
},
""
)
]
| otherwise = let
extendedRegExCharParser :: Parsec.Parser ExtendedRegExChar
extendedRegExCharParser = reduce <$> alternativesParser where
reduce :: RegEx.Alternatives Char -> ExtendedRegExChar
reduce alternatives
| RegEx.isSingletonAlternatives alternatives = MkExtendedRegExChar False . head $ RegEx.deconstructAlternatives alternatives
| otherwise = MkExtendedRegExChar True $ (Nothing, Nothing) <~> RegEx.CaptureGroup alternatives -: []
alternativesParser :: Parsec.Parser (RegEx.Alternatives Char)
alternativesParser = RegEx.MkAlternatives <$> extendedRegExParser `Parsec.sepBy1` (Parsec.char RegEx.alternativeExtendedRegExSeparatorToken <?> "RegEx.alternativeExtendedRegExSeparatorToken " ++ show RegEx.alternativeExtendedRegExSeparatorToken) where
extendedRegExParser :: Parsec.Parser (RegEx.ExtendedRegEx Char)
extendedRegExParser = do
maybeBowAnchor <- Parsec.option Nothing $ (Parsec.char Anchor.bowToken <?> "Anchor.bowToken " ++ show Anchor.bowToken) >> return (Just Anchor.Bow)
repeatableRequirementList <- repeatableRequirementListParser
(
do
repeatableCaptureGroup <- Repeatable.repeatableParser . RegEx.CaptureGroup =<< uncurry Parsec.between (
ToolShed.Data.Pair.mirror Parsec.char RegEx.captureGroupDelimiters
) alternativesParser <?> "RegEx.captureGroupDelimiters " ++ show RegEx.captureGroupDelimiters
extendedRegEx' <- extendedRegExParser --Recurse.
return $ RegEx.transformExtendedRegEx ((repeatableRequirementList ++) . (repeatableCaptureGroup :)) extendedRegEx' { RegEx.bowAnchor = maybeBowAnchor }
) <|> (
do
maybeSternAnchor <- Parsec.option Nothing $ (Parsec.char Anchor.sternToken <?> "Anchor.sternToken " ++ show Anchor.sternToken) >> return (Just Anchor.Stern)
return RegEx.MkExtendedRegEx {
RegEx.bowAnchor = maybeBowAnchor,
RegEx.concatenation = repeatableRequirementList,
RegEx.sternAnchor = maybeSternAnchor
}
)
where
repeatableRequirementListParser :: Parsec.Parser (RegEx.Concatenation Char)
repeatableRequirementListParser = Parsec.choice [
Parsec.try . Parsec.lookAhead $ (
Parsec.char Anchor.sternToken <?> "Anchor.sternToken " ++ show Anchor.sternToken
) >> (
(
Parsec.eof >> return []
) <|> (
Parsec.oneOf [RegEx.alternativeExtendedRegExSeparatorToken, snd RegEx.captureGroupDelimiters] >> return []
)
),
(:) <$> (
MetaChar.metaCharParser >>= Repeatable.repeatableParser . RegEx.Require . MetaChar.deconstruct
) <*> repeatableRequirementListParser,
return []
]
in (
error . ("readsPrec RegExChar.ExtendedRegExChar:\tparse-error; " ++) . show
) `either` (
\pair@(extendedRegExChar, _) -> if ToolShed.SelfValidate.isValid extendedRegExChar
then [pair]
else error $ ToolShed.SelfValidate.getFirstError extendedRegExChar
) $ Parsec.parse (
(,) <$> extendedRegExCharParser <*> Parsec.getInput
) "ExtendedRegExChar" s
instance Show ExtendedRegExChar where
showsPrec _ MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives',
extendedRegEx = RegEx.MkExtendedRegEx {
RegEx.bowAnchor = maybeBowAnchor,
RegEx.concatenation = concatenation',
RegEx.sternAnchor = maybeSternAnchor
}
} = RegEx.showsMaybeAnchor maybeBowAnchor . foldl (.) (showString "") (
(
let
showAlternatives :: RegEx.Alternatives Char -> [ShowS]
showAlternatives = Data.List.intersperse (showChar RegEx.alternativeExtendedRegExSeparatorToken) . map (shows . MkExtendedRegExChar False) . RegEx.deconstructAlternatives
in if hasNonCapturingTopLevelAlternatives'
then map (
\repeatablePattern -> case Repeatable.base repeatablePattern of
RegEx.CaptureGroup alternatives -> foldr (.) (showString "") $ showAlternatives alternatives
_ -> error $ "Show RegExChar.ExtendedRegExChar: unexpected " ++ show repeatablePattern
)
else map (
\repeatablePattern -> (
case Repeatable.base repeatablePattern of
RegEx.Require meta -> shows $ MetaChar.MkMetaChar meta
RegEx.CaptureGroup alternatives -> showChar (
fst RegEx.captureGroupDelimiters
) . foldr (.) (
showChar $ snd RegEx.captureGroupDelimiters
) (
showAlternatives alternatives
)
) . Repeatable.showSuffix repeatablePattern
)
) concatenation'
) . RegEx.showsMaybeAnchor maybeSternAnchor
(+~)
:: InputData
-> RegExOpts.RegExOpts ExtendedRegExChar
-> RegEx.Result Char
inputData +~ regExOpts = inputData RegEx.+~ fmap extendedRegEx regExOpts
(=~)
:: InputData
-> RegExOpts.RegExOpts ExtendedRegExChar
-> Bool
inputData =~ regExOpts = inputData RegEx.=~ fmap extendedRegEx regExOpts
(/~)
:: InputData
-> RegExOpts.RegExOpts ExtendedRegExChar
-> Bool
(/~) inputData = not . (inputData =~)