{-| Module : Data.JustParse.Language Description : Regular expressions Copyright : Copyright Waived License : PublicDomain Maintainer : grantslatton@gmail.com Stability : experimental Portability : portable Allows for conversion from a regular expression and a 'Parser'. -} {-# LANGUAGE Safe #-} module Data.JustParse.Language ( Match (..), regex, regex_, regex', regex_' ) where import Data.JustParse import Data.JustParse.Internal import Data.JustParse.Combinator import Data.JustParse.Numeric import Data.JustParse.Char import Control.Monad ( liftM, mzero ) import Data.Monoid ( Monoid, mconcat, mempty, mappend ) import Data.Maybe ( isJust, fromMaybe ) import Data.List ( intercalate ) -- | @regex@ takes a regular expression in the form of a 'String' and, -- if the regex is valid, returns a 'Parser' that parses that regex. -- If the regex is invalid, it returns a Parser that will always fail. -- The returned parser is greedy. regex :: Stream s Char => String -> Parser s Match regex = greedy . fromMaybe mzero . parseOnly regular -- | Like 'regex', but returns a branching (non-greedy) parser. regex_ :: Stream s Char => String -> Parser s Match regex_ = fromMaybe mzero . parseOnly regular -- | The same as 'regex', but only returns the full matched text. regex' :: Stream s Char => String -> Parser s String regex' = liftM matched . regex -- | The same as 'regex_', but only returns the full matched text. regex_' :: Stream s Char => String -> Parser s String regex_' = liftM matched . regex_ -- | The result of a 'regex' data Match = Match { -- | The complete text matched within the regex matched :: String, -- | Any submatches created by using capture groups groups :: [Match] } instance Show Match where show = show' "" where show' i (Match m []) = i ++ m show' i (Match m gs) = i ++ m ++ "\n" ++ intercalate "\n" (map (show' ('\t':i)) gs) -- mconcat makes things very nice for concatenating the results of subregexes instance Monoid Match where mempty = Match "" [] mappend (Match m g) (Match m' g') = Match { matched = m ++ m', groups = g ++ g' } regular :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) regular = liftM (liftM mconcat . sequence) (many parser) parser :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) parser = choice [ asterisk, mn, pipe, plus, question, group, character, charClass, negCharClass, period ] parserNP :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) parserNP = choice [ asterisk, mn, plus, question, group, character, charClass, negCharClass, period ] restricted :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) restricted = choice [ character, charClass, negCharClass, group, period ] unreserved :: Stream s Char => Parser s Char unreserved = (char '\\' >> anyChar ) <|> noneOf "()[]\\*+{}^?|." character :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) character = do c <- unreserved return $ do c' <- char c return $ Match [c] [] charClass :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) charClass = do char '[' c <- many1 unreserved char ']' return $ do c' <- oneOf c return $ Match [c'] [] negCharClass :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) negCharClass = do string "[^" c <- many1 unreserved char ']' return $ do c' <- noneOf c return $ Match [c'] [] period :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) period = do char '.' return $ do c <- noneOf "\n\r" return $ Match [c] [] question :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) question = do p <- restricted char '?' return $ liftM mconcat (mN_ 0 1 p) group :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) group = do char '(' p <- regular char ')' return $ do r <- p return $ r { groups = [r] } asterisk :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) asterisk = do p <- restricted char '*' return $ liftM mconcat (many_ p) plus :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) plus = do p <- restricted char '+' return $ liftM mconcat (many1_ p) mn :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) mn = do p <- restricted char '{' l <- option 0 decInt char ',' r <- option (-1) decInt char '}' return $ liftM mconcat (mN_ l r p) pipe :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) pipe = do p <- parserNP char '|' p' <- parser return $ p <||> p'