module RegExChar.MetaChar(
MetaChar(..),
metaCharParser,
deconstruct
) where
import Control.Applicative((<$>), (<*>))
import Control.Arrow((***))
import qualified Data.Char
import qualified Data.Map
import qualified RegExDot.BracketExpression as BracketExpression
import qualified RegExDot.BracketExpressionMember as BracketExpressionMember
import qualified RegExDot.Consumer as Consumer
import qualified RegExDot.Meta as Meta
import qualified RegExDot.RegEx as RegEx
import qualified RegExDot.Repeatable as Repeatable
import qualified RegExDot.ShowablePredicate as ShowablePredicate
import qualified Text.ParserCombinators.Parsec as Parsec
import Text.ParserCombinators.Parsec((<?>))
import qualified ToolShed.Data.Pair
import qualified ToolShed.SelfValidate
type AssociationList key = [(key, ShowablePredicate.Predicate Char)]
type Dictionary key = Data.Map.Map key (ShowablePredicate.Predicate Char)
instance BracketExpressionMember.ShortcutExpander Char where
findPredicate shortcut = ShowablePredicate.MkShowablePredicate [Meta.shortcutToken, shortcut] <$> shortcut `Data.Map.lookup` dictionary where
dictionary :: Dictionary Char
dictionary = Data.Map.fromList $ perlShortcuts ++ invert perlShortcuts where
perlShortcuts :: AssociationList Char
perlShortcuts = [
('d', Data.Char.isDigit),
('s', Data.Char.isSpace),
('w', \c -> ($ c) `any` [Data.Char.isAlphaNum, (== '_')])
]
invert :: AssociationList Char -> AssociationList Char
invert = map (Data.Char.toUpper *** (not .))
instance Meta.ShortcutExpander Char where
expand c = case BracketExpressionMember.findPredicate c of
Just showablePredicate -> Meta.Predicate showablePredicate
_ -> Meta.Literal c
newtype MetaChar = MkMetaChar (Meta.Meta Char) deriving Eq
deconstruct :: MetaChar -> Meta.Meta Char
deconstruct (MkMetaChar metaChar) = metaChar
instance ToolShed.SelfValidate.SelfValidator MetaChar where
getErrors = ToolShed.SelfValidate.getErrors . deconstruct
instance Consumer.Consumer MetaChar where
consumptionProfile = Consumer.consumptionProfile . deconstruct
starHeight = Consumer.starHeight . deconstruct
metaCharParser :: Parsec.Parser MetaChar
metaCharParser = MkMetaChar <$> Parsec.choice [
(Parsec.char Meta.anyToken <?> "Meta.anyToken " ++ show Meta.anyToken) >> return Meta.Any,
(Parsec.char Meta.shortcutToken <?> "Meta.shortcutToken " ++ show Meta.shortcutToken) >> Meta.expand <$> Parsec.anyChar,
uncurry Parsec.between (ToolShed.Data.Pair.mirror Parsec.char BracketExpression.delimiterTokens) (
do
let
implementPosixCharacterClass :: String -> Maybe (ShowablePredicate.ShowablePredicate Char)
implementPosixCharacterClass identifier = ShowablePredicate.MkShowablePredicate (
fst posixCharacterClassDelimiters ++ identifier ++ snd posixCharacterClassDelimiters
) <$> identifier `Data.Map.lookup` dictionary where
dictionary :: Dictionary String
dictionary = Data.Map.fromList $ posixCharacterClasses ++ invert posixCharacterClasses where
posixCharacterClasses :: AssociationList String
posixCharacterClasses = [
("alnum", Data.Char.isAlphaNum),
("alpha", Data.Char.isAlpha),
("ascii", Data.Char.isAscii),
("blank", (`elem` " \t")),
("cntrl", Data.Char.isControl),
("digit", Data.Char.isDigit),
("graph", \c -> not $ ($ c) `any` [Data.Char.isSpace, Data.Char.isControl]),
("lower", Data.Char.isLower),
("print", Data.Char.isPrint),
("punct", Data.Char.isPunctuation),
("space", Data.Char.isSpace),
("upper", Data.Char.isUpper),
("word", \c -> ($ c) `any` [Data.Char.isAlphaNum, (== '_')]),
("xdigit", Data.Char.isHexDigit)
]
invert :: AssociationList String -> AssociationList String
invert = map $ (BracketExpression.negationToken :) *** (not .)
cTor <- Parsec.option Meta.AnyOf $ (Parsec.char BracketExpression.negationToken <?> "BracketExpression.negationToken " ++ show BracketExpression.negationToken) >> return Meta.NoneOf
literalBracketExpressionTerminator <- Parsec.option [] $ return . BracketExpressionMember.Literal <$> (
Parsec.char (snd BracketExpression.delimiterTokens) <?> "Literal Bracket-expression terminator " ++ show (snd BracketExpression.delimiterTokens)
)
cTor . (literalBracketExpressionTerminator ++) <$> Parsec.many (
Parsec.choice [
(
do
_ <- Parsec.char Meta.shortcutToken <?> "Meta.shortcutToken " ++ show Meta.shortcutToken
c <- Parsec.anyChar
return $ case BracketExpressionMember.findPredicate c of
Just showablePredicate -> BracketExpressionMember.Predicate showablePredicate
_ -> BracketExpressionMember.Literal c
) <?> "Perl-style shortcut",
Parsec.try (
uncurry Parsec.between (ToolShed.Data.Pair.mirror Parsec.string posixCharacterClassDelimiters) (
do
identifier <- Parsec.many1 $ Parsec.noneOf [head $ snd posixCharacterClassDelimiters]
case implementPosixCharacterClass identifier of
Just showablePredicate -> return $ BracketExpressionMember.Predicate showablePredicate
_ -> Parsec.unexpected $ "MetaChar.metaCharParser:\tunrecognised Posix Character-class; " ++ show identifier
) <?> "Posix Character-class " ++ show posixCharacterClassDelimiters
),
Parsec.try (
(
do
rangeStart <- Parsec.noneOf [snd BracketExpression.delimiterTokens]
_ <- Parsec.char bracketExpressionRangeToken <?> "bracketExpressionRangeToken " ++ show bracketExpressionRangeToken
rangeEnd <- Parsec.noneOf [snd BracketExpression.delimiterTokens]
return . BracketExpressionMember.Predicate . ShowablePredicate.MkShowablePredicate [
rangeStart,
bracketExpressionRangeToken,
rangeEnd
] $ \c -> rangeStart <= c && c <= rangeEnd
) <?> "Bracket-expression range"
),
BracketExpressionMember.Literal <$> Parsec.noneOf [snd BracketExpression.delimiterTokens] <?> "BracketExpressionMember.Literal"
] <?> "Bracket-expression member"
) <?> "Bracket-expression member-list"
) <?> "BracketExpression.delimiterTokens " ++ show BracketExpression.delimiterTokens,
Meta.Literal <$> Parsec.noneOf RegEx.tokens
]
instance Read MetaChar where
readsPrec _ = (
(error . ("readsPrec RegExChar.MetaChar:\tparse-error; " ++) . show) `either` return
) . Parsec.parse (
(,) <$> metaCharParser <*> Parsec.getInput
) "MetaChar"
bracketExpressionRangeToken :: Char
bracketExpressionRangeToken = '-'
posixCharacterClassDelimiters :: (String, String)
posixCharacterClassDelimiters = ("[:", ":]")
instance Show MetaChar where
showsPrec _ (MkMetaChar Meta.Any) = showChar Meta.anyToken
showsPrec _ (MkMetaChar (Meta.Literal c)) = (
if c `elem` [
fst BracketExpression.delimiterTokens,
fst Repeatable.rangeDelimiters,
Repeatable.oneOrMoreToken,
Repeatable.zeroOrMoreToken,
Repeatable.zeroOrOneToken,
Meta.anyToken,
Meta.shortcutToken
] ++ RegEx.tokens
then showChar Meta.shortcutToken
else id
) . showChar c
showsPrec _ (MkMetaChar (Meta.AnyOf bracketExpression)) = showChar (
fst BracketExpression.delimiterTokens
) . showString (
foldr (
\e -> case e of
BracketExpressionMember.Predicate showablePredicate -> shows showablePredicate
BracketExpressionMember.Literal literal -> (
if literal `elem` [
bracketExpressionRangeToken,
Meta.shortcutToken,
snd BracketExpression.delimiterTokens
]
then showChar Meta.shortcutToken
else id
) . showChar literal
) (
showChar (snd BracketExpression.delimiterTokens) ""
) bracketExpression
)
showsPrec _ (MkMetaChar (Meta.NoneOf bracketExpression)) = showChar x . showChar BracketExpression.negationToken . showString xs where (x : xs) = show . MkMetaChar $ Meta.AnyOf bracketExpression
showsPrec _ (MkMetaChar m) = shows m