{-# LANGUAGE BangPatterns, CPP, GADTs, Rank2Types, OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | -- Module : Data.Picoparsec.Monoid.Internal -- Copyright : Bryan O'Sullivan 2007-2011, Mario Blažević <blamario@yahoo.com> 2014 -- License : BSD3 -- -- Maintainer : Mario Blažević -- Stability : experimental -- Portability : unknown -- -- Simple, efficient combinator parsing for -- 'Data.Monoid.Cancellative.LeftGCDMonoid' and -- 'Data.Monoid.Factorial.FactorialMonoid' inputs, loosely based on -- the Parsec library. module Data.Picoparsec.Monoid.Internal ( -- * Parser types Parser , Result -- * Running parsers , parse , parseOnly -- * Combinators , module Data.Picoparsec.Combinator -- * Parsing individual tokens , anyToken , notToken , satisfy , satisfyWith , skip , peekToken -- ** Parsing individual characters , anyChar , char , notChar , satisfyChar , peekChar , peekChar' -- * Efficient string handling , scan , skipWhile , string , stringTransform , take , takeWhile , takeWhile1 , takeWith , takeTill -- ** Efficient character string handling , scanChars , skipCharsWhile , takeCharsWhile , takeCharsWhile1 , takeCharsTill , takeTillChar , takeTillChar1 -- ** Consume all remaining input , takeRest -- * Utilities , endOfLine , ensureOne ) where import Prelude hiding (getChar, null, span, take, takeWhile) import Control.Applicative ((<|>), (<$>)) import Control.Monad (when) import Data.Picoparsec.Combinator import Data.Picoparsec.Internal.Types import Data.Picoparsec.Internal (demandInput, get, prompt, put, wantInput) import Data.Monoid (Monoid(..), (<>)) import Data.Monoid.Cancellative (LeftGCDMonoid(..)) import Data.Monoid.Null (MonoidNull(null)) import qualified Data.Monoid.Factorial as Factorial import Data.Monoid.Factorial (FactorialMonoid) import Data.Monoid.Textual (TextualMonoid) import qualified Data.Monoid.Textual as Textual import Data.String (IsString(..)) import qualified Data.Picoparsec.Internal.Types as T type Result = IResult instance (IsString a, LeftGCDMonoid a, MonoidNull a, a ~ b) => IsString (Parser a b) where fromString = string . fromString -- | If at least one token of input is available, return the current -- input, otherwise fail. ensureOne :: MonoidNull t => Parser t t ensureOne = T.Parser $ \i0 a0 m0 kf ks -> if null (unI i0) then T.runParser (demandInput >> get) i0 a0 m0 kf ks else ks i0 a0 m0 (unI i0) {-# INLINE ensureOne #-} -- | This parser always succeeds. It returns 'True' if any input is -- available on demand, and 'False' if the end of all input has been reached. wantMoreInput :: MonoidNull t => Parser t Bool wantMoreInput = T.Parser $ \i0 a0 m0 _kf ks -> if m0 == Complete then ks i0 a0 m0 False else let kf' i a m = ks i a m False ks' i a m = ks i a m True in prompt i0 a0 m0 kf' ks' -- | The parser @satisfy p@ succeeds for any prime input token for -- which the predicate @p@ returns 'True'. Returns the token that is -- actually parsed. -- -- >digit = satisfy isDigit -- > where isDigit w = w >= "0" && w <= "9" satisfy :: FactorialMonoid t => (t -> Bool) -> Parser t t satisfy p = do s <- ensureOne let Just (first, rest) = Factorial.splitPrimePrefix s if p first then put rest >> return first else fail "satisfy" {-# INLINE satisfy #-} -- | The parser @satisfy p@ succeeds for any input character for -- which the predicate @p@ returns 'True'. Returns the character that -- is actually parsed. -- -- >digit = satisfy isDigit -- > where isDigit w = w >= "0" && w <= "9" satisfyChar :: TextualMonoid t => (Char -> Bool) -> Parser t Char satisfyChar p = do s <- ensureOne case Textual.splitCharacterPrefix s of Just (first, rest) | p first -> put rest >> return first _ -> fail "satisfy" {-# INLINE satisfyChar #-} -- | The parser @skip p@ succeeds for any prime input token for which -- the predicate @p@ returns 'True'. -- -- >skipDigit = skip isDigit -- > where isDigit w = w >= "0" && w <= "9" skip :: FactorialMonoid t => (t -> Bool) -> Parser t () skip p = do s <- ensureOne let Just (first, rest) = Factorial.splitPrimePrefix s if p first then put rest else fail "skip" -- | The parser @satisfyWith f p@ transforms an input token, and -- succeeds if the predicate @p@ returns 'True' on the transformed -- value. The parser returns the transformed token that was parsed. satisfyWith :: FactorialMonoid t => (t -> a) -> (a -> Bool) -> Parser t a satisfyWith f p = do s <- ensureOne let Just (first, rest) = Factorial.splitPrimePrefix s c = f $! first if p c then put rest >> return c else fail "satisfyWith" {-# INLINE satisfyWith #-} -- | Consume @n@ tokens of input, but succeed only if the predicate -- returns 'True'. takeWith :: FactorialMonoid t => Int -> (t -> Bool) -> Parser t t takeWith n0 p = get >>= \i-> let !(h, t) = Factorial.splitAt n0 i n1 = Factorial.length h in if null t && n1 < n0 then put mempty >> demandInput >> takeWith' h n1 p else if p h then put t >> return h else fail "takeWith" {-# INLINABLE takeWith #-} -- The uncommon case takeWith' :: FactorialMonoid t => t -> Int -> (t -> Bool) -> Parser t t takeWith' h0 n0 p = get >>= \i-> let !(h, t) = Factorial.splitAt n0 i n1 = Factorial.length h h1 = h0 <> h in if null t && n1 < n0 then put mempty >> demandInput >> takeWith' h1 n1 p else if p h1 then put t >> return h1 else fail "takeWith" {-# INLINABLE takeWith' #-} -- | Consume exactly @n@ prime input tokens. take :: FactorialMonoid t => Int -> Parser t t take n = takeWith n (const True) {-# INLINE take #-} -- | @string s@ parses a prefix of input that identically matches -- @s@. Returns the parsed string (i.e. @s@). This parser consumes no -- input if it fails (even if a partial match). -- -- /Note/: The behaviour of this parser is different to that of the -- similarly-named parser in Parsec, as this one is all-or-nothing. -- To illustrate the difference, the following parser will fail under -- Parsec given an input of @\"for\"@: -- -- >string "foo" <|> string "for" -- -- The reason for its failure is that the first branch is a -- partial match, and will consume the letters @\'f\'@ and @\'o\'@ -- before failing. In Attoparsec, the above parser will /succeed/ on -- that input, because the failed first branch will consume nothing. string :: (LeftGCDMonoid t, MonoidNull t) => t -> Parser t t string s = get >>= \i-> let !(p, s', i') = stripCommonPrefix s i in if null s' then put i' >> return s else if null i' then put mempty >> demandInput >> string' p s' else fail "string" {-# INLINE string #-} -- The uncommon case string' :: (LeftGCDMonoid t, MonoidNull t) => t -> t -> Parser t t string' consumed rest = get >>= \i-> let !(p, s', i') = stripCommonPrefix rest i in if null s' then put i' >> return (consumed <> rest) else if null i' then put mempty >> demandInput >> string' (consumed <> p) s' else put (consumed <> i) >> fail "string" stringTransform :: (FactorialMonoid t, Eq t) => (t -> t) -> t -> Parser t t stringTransform f s = takeWith (Factorial.length s) ((==f s) . f) {-# INLINE stringTransform #-} -- | Skip past input for as long as the predicate returns 'True'. skipWhile :: FactorialMonoid t => (t -> Bool) -> Parser t () skipWhile p = go where go = do t <- Factorial.dropWhile p <$> get put t when (null t) $ do input <- wantMoreInput when input go {-# INLINE skipWhile #-} -- | Skip past input characters for as long as the predicate returns 'True'. skipCharsWhile :: TextualMonoid t => (Char -> Bool) -> Parser t () skipCharsWhile p = go where go = do t <- Textual.dropWhile_ False p <$> get put t when (null t) $ do input <- wantMoreInput when input go {-# INLINE skipCharsWhile #-} -- | Consume input as long as the predicate returns 'False' -- (i.e. until it returns 'True'), and return the consumed input. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'True' on the first input token. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. takeTill :: FactorialMonoid t => (t -> Bool) -> Parser t t takeTill p = takeWhile (not . p) {-# INLINE takeTill #-} -- | Consume input characters as long as the predicate returns 'False' -- (i.e. until it returns 'True'), and return the consumed input. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'True' on the first input token. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. takeCharsTill :: TextualMonoid t => (Char -> Bool) -> Parser t t takeCharsTill p = takeCharsWhile (not . p) -- | Consume all input until the character for which the predicate -- returns 'True' and return the consumed input. -- -- The only difference between 'takeCharsTill' and 'takeTillChar' is -- in their handling of non-character data: The former never consumes -- it, the latter always does. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'True' on the first input token. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. takeTillChar :: TextualMonoid t => (Char -> Bool) -> Parser t t takeTillChar p = go id where go acc = do (h,t) <- Textual.break_ False p <$> get put t if null t then do input <- wantInput if input then go (acc . mappend h) else return (acc h) else return (acc h) {-# INLINE takeTillChar #-} -- | Consume all input until the character for which the predicate -- returns 'True' and return the consumed input. -- -- This parser always consumes at least one token: it will fail if the -- input starts with a character for which the predicate returns -- 'True' or if there is no input left. takeTillChar1 :: TextualMonoid t => (Char -> Bool) -> Parser t t takeTillChar1 p = do (`when` demandInput) =<< null <$> get (h,t) <- Textual.break_ False p <$> get when (null h && maybe True p (Textual.characterPrefix t)) $ fail "takeTillChar1" put t if null t then (h<>) <$> takeTillChar p else return h {-# INLINE takeTillChar1 #-} -- | Consume input as long as the predicate returns 'True', and return -- the consumed input. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'False' on the first input token. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. takeWhile :: FactorialMonoid t => (t -> Bool) -> Parser t t takeWhile p = go id where go acc = do (h,t) <- Factorial.span p <$> get put t if null t then do input <- wantMoreInput if input then go (acc . mappend h) else return (acc h) else return (acc h) {-# INLINE takeWhile #-} -- | Consume input characters as long as the predicate returns 'True', -- and return the consumed input. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'False' on the first input token. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. takeCharsWhile :: TextualMonoid t => (Char -> Bool) -> Parser t t takeCharsWhile p = {-# SCC takeCharsWhile #-} go id where go acc = do (h,t) <- Textual.span_ False p <$> get put t if null t then do input <- wantMoreInput if input then go (acc . mappend h) else return (acc h) else return (acc h) {-# INLINE takeCharsWhile #-} -- | Consume all remaining input and return it as a single string. takeRest :: MonoidNull t => Parser t t takeRest = go [] where go acc = do input <- wantInput if input then do s <- get put mempty go (s:acc) else return (mconcat $ reverse acc) {-# INLINABLE takeRest #-} -- | Consume input as long as the predicate returns 'True', and return -- the consumed input. -- -- This parser requires the predicate to succeed on at least one input -- token: it will fail if the predicate never returns 'True' -- or if there is no input left. takeWhile1 :: FactorialMonoid t => (t -> Bool) -> Parser t t takeWhile1 p = do (`when` demandInput) =<< null <$> get (h,t) <- Factorial.span p <$> get when (null h) $ fail "takeWhile1" put t if null t then (h<>) `fmap` takeWhile p else return h {-# INLINE takeWhile1 #-} takeCharsWhile1 :: TextualMonoid t => (Char -> Bool) -> Parser t t takeCharsWhile1 p = do (`when` demandInput) =<< null <$> get (h,t) <- Textual.span_ False p <$> get when (null h) $ fail "takeCharsWhile1" put t if null t then (h<>) `fmap` takeCharsWhile p else return h {-# INLINE takeCharsWhile1 #-} -- | A stateful scanner. The predicate consumes and transforms a -- state argument, and each transformed state is passed to successive -- invocations of the predicate on each token of the input until one -- returns 'Nothing' or the input ends. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'Nothing' on the first prime input factor. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. scan :: FactorialMonoid t => s -> (s -> t -> Maybe s) -> Parser t t scan s0 f = go s0 id where go s acc = do (h,t,s') <- Factorial.spanMaybe' s f <$> get put t if null t then do input <- wantMoreInput if input then go s' (acc . mappend h) else return (acc h) else return (acc h) {-# INLINE scan #-} -- | A stateful scanner. The predicate consumes and transforms a -- state argument, and each transformed state is passed to successive -- invocations of the predicate on each token of the input until one -- returns 'Nothing' or the input ends. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'Nothing' on the first prime input factor. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. scanChars :: TextualMonoid t => s -> (s -> Char -> Maybe s) -> Parser t t scanChars s0 fc = go s0 id where go s acc = do (h,t,s') <- Textual.spanMaybe_' s fc <$> get put t if null t then do input <- wantMoreInput if input then go s' (acc . mappend h) else return (acc h) else return (acc h) {-# INLINE scanChars #-} -- | Match any prime input token. anyToken :: FactorialMonoid t => Parser t t anyToken = satisfy $ const True {-# INLINE anyToken #-} -- | Match any prime input token except the given one. notToken :: (Eq t, FactorialMonoid t) => t -> Parser t t notToken t = satisfy (/= t) {-# INLINE notToken #-} -- | Match any prime input token. Returns 'mempty' if end of input -- has been reached. Does not consume any input. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. peekToken :: FactorialMonoid t => Parser t t peekToken = T.Parser $ \i0 a0 m0 _kf ks -> if null (unI i0) then if m0 == Complete then ks i0 a0 m0 mempty else let k' i a m = ks i a m $! Factorial.primePrefix (unI i) in prompt i0 a0 m0 k' k' else ks i0 a0 m0 $! Factorial.primePrefix (unI i0) {-# INLINE peekToken #-} -- | Match any character. anyChar :: TextualMonoid t => Parser t Char anyChar = satisfyChar $ const True {-# INLINE anyChar #-} -- | Match a specific character. char :: TextualMonoid t => Char -> Parser t Char char c = satisfyChar (== c) <?> show c {-# INLINE char #-} -- | Match any character except the given one. notChar :: TextualMonoid t => Char -> Parser t Char notChar c = satisfyChar (/= c) <?> "not" ++ show c {-# INLINE notChar #-} -- | Match any input character, if available. Does not consume any input. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. peekChar :: TextualMonoid t => Parser t (Maybe Char) peekChar = T.Parser $ \i0 a0 m0 _kf ks -> if null (unI i0) then if m0 == Complete then ks i0 a0 m0 Nothing else let k' i a m = ks i a m $! Textual.characterPrefix (unI i) in prompt i0 a0 m0 k' k' else ks i0 a0 m0 $! Textual.characterPrefix (unI i0) {-# INLINE peekChar #-} -- | Match any input character, failing if the input doesn't start -- with any. Does not consume any input. peekChar' :: TextualMonoid t => Parser t Char peekChar' = do s <- ensureOne case Textual.characterPrefix s of Just c -> return c _ -> fail "peekChar'" {-# INLINE peekChar' #-} -- | Match either a single newline character @\'\\n\'@, or a carriage -- return followed by a newline character @\"\\r\\n\"@. endOfLine :: (Eq t, TextualMonoid t) => Parser t () endOfLine = (char '\n' >> return ()) <|> (string "\r\n" >> return ()) -- | Terminal failure continuation. failK :: Failure t a failK i0 _a0 _m0 stack msg = Fail (unI i0) stack msg {-# INLINE failK #-} -- | Terminal success continuation. successK :: Success t a a successK i0 _a0 _m0 a = Done (unI i0) a {-# INLINE successK #-} -- | Run a parser. parse :: Monoid t => Parser t a -> t -> IResult t a parse m s = T.runParser m (I s) mempty Incomplete failK successK {-# INLINE parse #-} -- | Run a parser that cannot be resupplied via a 'Partial' result. parseOnly :: Monoid t => Parser t a -> t -> Either String a parseOnly m s = case T.runParser m (I s) mempty Complete failK successK of Fail _ _ err -> Left err Done _ a -> Right a _ -> error "parseOnly: impossible error!" {-# INLINE parseOnly #-}