{-# OPTIONS_GHC -fno-warn-orphans #-}
{-
Copyright (C) 2010 Dr. Alistair Ward
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
-}
{- |
[@AUTHOR@] Dr. Alistair Ward
[@DESCRIPTION@] An 'RegEx.ExtendedRegEx', which has been specialised for 'Char', to create a tradition non-polymorphic /regex/.
-}
module RegExChar.ExtendedRegExChar(
-- * Types
-- ** Type-synonyms
ExtendedRegExChar(..),
InputData,
-- * Functions
-- ** Operators
(+~),
(=~),
(/~)
) where
import Control.Applicative((<$>), (<*>))
import qualified Data.List
import qualified RegExChar.MetaChar as MetaChar
import qualified RegExDot.Anchor as Anchor
import qualified RegExDot.Consumer as Consumer
import RegExDot.DSL((<~>), (-:))
import qualified RegExDot.RegEx as RegEx
import qualified RegExDot.RegExOpts as RegExOpts
import qualified RegExDot.Repeatable as Repeatable
import qualified Text.ParserCombinators.Parsec as Parsec
import Text.ParserCombinators.Parsec((>), (<|>))
import qualified ToolShed.Data.Pair
import qualified ToolShed.SelfValidate
infix 4 +~, =~, /~ --Same as (==) & (/=).
-- | Specialise a 'RegEx.ExtendedRegEx' for 'Char', & encapsulate it to permit tailored instance-declarations.
data ExtendedRegExChar = MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives :: Bool, -- ^ The string from which a 'RegEx.ExtendedRegEx' is read, may, if data-capture isn't required, omit explicit delimiters around top-level 'RegEx.Alternatives'.
extendedRegEx :: RegEx.ExtendedRegEx Char
} deriving Eq
-- | Abbreviation.
type InputData = RegEx.InputData Char
instance ToolShed.SelfValidate.SelfValidator ExtendedRegExChar where
getErrors MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives',
extendedRegEx = extendedRegEx'
}
| not $ ToolShed.SelfValidate.isValid extendedRegEx' = ToolShed.SelfValidate.getErrors extendedRegEx'
| otherwise = ToolShed.SelfValidate.extractErrors [
(hasNonCapturingTopLevelAlternatives' && any (not . RegEx.isCaptureGroup . Repeatable.base) (RegEx.concatenation extendedRegEx'), "Invalid NonCapturingTopLevelAlternatives.")
]
instance Consumer.Consumer ExtendedRegExChar where
consumptionProfile = Consumer.consumptionProfile . extendedRegEx
starHeight = Consumer.starHeight . extendedRegEx
instance RegEx.ShortcutExpander Char where
expand c = error $ "RegExDot.RegEx.ShortcutExpander.expand RegExChar.ExtendedRegExChar:\tunrecognised shortcut '" ++ show c ++ "'."
instance Read ExtendedRegExChar where
readsPrec _ s
| s == reverse Anchor.tokens = [
(
MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = False,
extendedRegEx = (Just Anchor.Bow, Just Anchor.Stern) <~> []
},
""
)
] --The order of adjacent zero-width assertions is irrelevant.
| otherwise = let
extendedRegExCharParser :: Parsec.Parser ExtendedRegExChar
extendedRegExCharParser = reduce {-correct prior assumption-} <$> alternativesParser {-assume non-capturing top-level Alternatives-} where
reduce :: RegEx.Alternatives Char -> ExtendedRegExChar
reduce alternatives
| RegEx.isSingletonAlternatives alternatives = MkExtendedRegExChar False . head $ RegEx.deconstructAlternatives alternatives
| otherwise = MkExtendedRegExChar True $ (Nothing, Nothing) <~> RegEx.CaptureGroup alternatives -: [] --Infer non-capturing top-level 'RegEx.Alternatives' from the presence of 'RegEx.alternativeExtendedRegExSeparatorToken's.
alternativesParser :: Parsec.Parser (RegEx.Alternatives Char)
alternativesParser = RegEx.MkAlternatives <$> extendedRegExParser `Parsec.sepBy1` (Parsec.char RegEx.alternativeExtendedRegExSeparatorToken > "RegEx.alternativeExtendedRegExSeparatorToken " ++ show RegEx.alternativeExtendedRegExSeparatorToken) where
extendedRegExParser :: Parsec.Parser (RegEx.ExtendedRegEx Char)
extendedRegExParser = do
maybeBowAnchor <- Parsec.option Nothing {-default-} $ (Parsec.char Anchor.bowToken > "Anchor.bowToken " ++ show Anchor.bowToken) >> return {-to GenParser-monad-} (Just Anchor.Bow)
repeatableRequirementList <- repeatableRequirementListParser
(
do
repeatableCaptureGroup <- Repeatable.repeatableParser . RegEx.CaptureGroup =<< uncurry Parsec.between (
ToolShed.Data.Pair.mirror Parsec.char RegEx.captureGroupDelimiters
) alternativesParser {-recurse-} > "RegEx.captureGroupDelimiters " ++ show RegEx.captureGroupDelimiters
extendedRegEx' <- extendedRegExParser --Recurse.
return {-to GenParser-monad-} $ RegEx.transformExtendedRegEx ((repeatableRequirementList ++) . (repeatableCaptureGroup :)) extendedRegEx' { RegEx.bowAnchor = maybeBowAnchor }
) <|> (
do
maybeSternAnchor <- Parsec.option Nothing {-default-} $ (Parsec.char Anchor.sternToken > "Anchor.sternToken " ++ show Anchor.sternToken) >> return {-to GenParser-monad-} (Just Anchor.Stern)
return {-to GenParser-monad-} RegEx.MkExtendedRegEx {
RegEx.bowAnchor = maybeBowAnchor,
RegEx.concatenation = repeatableRequirementList,
RegEx.sternAnchor = maybeSternAnchor
}
)
where
repeatableRequirementListParser :: Parsec.Parser (RegEx.Concatenation Char)
repeatableRequirementListParser = Parsec.choice [
Parsec.try . Parsec.lookAhead $ (
Parsec.char Anchor.sternToken > "Anchor.sternToken " ++ show Anchor.sternToken
) >> (
(
Parsec.eof >> return {-to GenParser-monad-} []
) <|> (
Parsec.oneOf [RegEx.alternativeExtendedRegExSeparatorToken, snd RegEx.captureGroupDelimiters] >> return {-to GenParser-monad-} []
)
),
(:) <$> (
MetaChar.metaCharParser >>= Repeatable.repeatableParser . RegEx.Require . MetaChar.deconstruct
) <*> repeatableRequirementListParser, {-recurse-}
return {-to GenParser-monad-} []
]
in (
error . ("readsPrec RegExChar.ExtendedRegExChar:\tparse-error; " ++) . show --Failure to parse.
) `either` (
\pair@(extendedRegExChar, _) -> if ToolShed.SelfValidate.isValid extendedRegExChar
then [pair]
else error $ ToolShed.SelfValidate.getFirstError extendedRegExChar --Parsed OK, but invalid.
) $ Parsec.parse (
(,) <$> extendedRegExCharParser <*> Parsec.getInput
) "ExtendedRegExChar" s
instance Show ExtendedRegExChar where
showsPrec _ MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives',
extendedRegEx = RegEx.MkExtendedRegEx {
RegEx.bowAnchor = maybeBowAnchor,
RegEx.concatenation = concatenation',
RegEx.sternAnchor = maybeSternAnchor
}
} = RegEx.showsMaybeAnchor maybeBowAnchor . foldl (.) (showString "") (
(
let
showAlternatives :: RegEx.Alternatives Char -> [ShowS]
showAlternatives = Data.List.intersperse (showChar RegEx.alternativeExtendedRegExSeparatorToken) . map (shows . MkExtendedRegExChar False) . RegEx.deconstructAlternatives
in if hasNonCapturingTopLevelAlternatives'
then map (
\repeatablePattern -> case Repeatable.base repeatablePattern of
RegEx.CaptureGroup alternatives -> foldr (.) (showString "") $ showAlternatives alternatives
_ -> error $ "Show RegExChar.ExtendedRegExChar: unexpected " ++ show repeatablePattern
)
else map (
\repeatablePattern -> (
case Repeatable.base repeatablePattern of
RegEx.Require meta -> shows $ MetaChar.MkMetaChar meta
RegEx.CaptureGroup alternatives -> showChar (
fst RegEx.captureGroupDelimiters
) . foldr (.) (
showChar $ snd RegEx.captureGroupDelimiters --Initial value.
) (
showAlternatives alternatives
)
) . Repeatable.showSuffix repeatablePattern
)
) concatenation'
) . RegEx.showsMaybeAnchor maybeSternAnchor
-- | A veneer over the underlying polymorphic operator, 'RegEx.+~'.
(+~)
:: InputData -- ^ The input-data string.
-> RegExOpts.RegExOpts ExtendedRegExChar -- ^ The match-options, parameterised by the regex-specification.
-> RegEx.Result Char
inputData +~ regExOpts = inputData RegEx.+~ fmap extendedRegEx regExOpts --CAVEAT: .
-- | A veneer over the underlying polymorphic operator, 'RegEx.=~'.
(=~)
:: InputData -- ^ The input-data string.
-> RegExOpts.RegExOpts ExtendedRegExChar -- ^ The match-options, parameterised by the regex-specification.
-> Bool
inputData =~ regExOpts = inputData RegEx.=~ fmap extendedRegEx regExOpts --CAVEAT: .
-- | Pattern-mismatch operator.
(/~)
:: InputData -- ^ The input-data string.
-> RegExOpts.RegExOpts ExtendedRegExChar -- ^ The match-options, parameterised by the regex-specification.
-> Bool
(/~) inputData = not . (inputData =~)