{-# LANGUAGE BangPatterns, CPP, Rank2Types, OverloadedStrings #-} -- | -- Module : Data.Picoparsec.Monoid.Internal -- Copyright : Bryan O'Sullivan 2007-2011, Mario Blažević 2014 -- License : BSD3 -- -- Maintainer : Mario Blažević -- Stability : experimental -- Portability : unknown -- -- Simple, efficient combinator parsing for -- 'Data.Monoid.Cancellative.LeftGCDMonoid' and -- 'Data.Monoid.Factorial.FactorialMonoid' inputs, loosely based on -- the Parsec library. module Data.Picoparsec.Monoid.Internal ( -- * Parser types Parser , Result -- * Running parsers , parse , parseOnly -- * Combinators , module Data.Picoparsec.Combinator -- * Parsing individual tokens , satisfy , satisfyWith , anyToken , skip , peekToken -- ** Parsing individual characters , anyChar , char , satisfyChar , peekChar , peekChar' -- * Efficient string handling , scan , skipWhile , string , stringTransform , take , takeWhile , takeWhile1 , takeWith , takeTill -- ** Efficient character string handling , scanChars , skipCharsWhile , takeCharsWhile , takeCharsWhile1 , takeCharsTill , takeTillChar , takeTillChar1 -- ** Consume all remaining input , takeRest -- * Utilities , endOfLine , ensureOne ) where import Control.Applicative ((<|>), (<$>)) import Control.Monad (when) import Data.Picoparsec.Combinator import Data.Picoparsec.Internal.Types import Data.Monoid (Monoid(..), (<>)) import Data.Monoid.Cancellative (LeftGCDMonoid(..)) import Data.Monoid.Null (MonoidNull(null)) import qualified Data.Monoid.Factorial as Factorial import Data.Monoid.Factorial (FactorialMonoid) import Data.Monoid.Textual (TextualMonoid) import qualified Data.Monoid.Textual as Textual import Prelude hiding (getChar, null, span, take, takeWhile) import qualified Data.Picoparsec.Internal.Types as T type Result = IResult ensure' :: FactorialMonoid t => Int -> T.Input t -> T.Added t -> More -> T.Failure t r -> T.Success t t r -> IResult t r ensure' !n0 i0 a0 m0 kf0 ks0 = T.runParser (demandInput >> go n0) i0 a0 m0 kf0 ks0 where go !n = T.Parser $ \i a m kf ks -> if Factorial.length (unI i) >= n then ks i a m (unI i) else T.runParser (demandInput >> go n) i a m kf ks -- | If at least one token of input is available, return the current -- input, otherwise fail. ensureOne :: FactorialMonoid t => Parser t t ensureOne = T.Parser $ \i0 a0 m0 kf ks -> if null (unI i0) -- The uncommon case is kept out-of-line to reduce code size: then ensure' 1 i0 a0 m0 kf ks else ks i0 a0 m0 (unI i0) -- Non-recursive so the bounds check can be inlined: {-# INLINE ensureOne #-} -- | Ask for input. If we receive any, pass it to a success -- continuation, otherwise to a failure continuation. prompt :: MonoidNull t => Input t -> Added t -> More -> (Input t -> Added t -> More -> IResult t r) -> (Input t -> Added t -> More -> IResult t r) -> IResult t r prompt i0 a0 _m0 kf ks = Partial $ \s -> if null s then kf i0 a0 Complete else ks (i0 <> I s) (a0 <> A s) Incomplete -- | Immediately demand more input via a 'Partial' continuation -- result. demandInput :: MonoidNull t => Parser t () demandInput = T.Parser $ \i0 a0 m0 kf ks -> if m0 == Complete then kf i0 a0 m0 ["demandInput"] "not enough input" else let kf' i a m = kf i a m ["demandInput"] "not enough input" ks' i a m = ks i a m () in prompt i0 a0 m0 kf' ks' -- | This parser always succeeds. It returns 'True' if any input is -- available either immediately or on demand, and 'False' if the end -- of all input has been reached. wantInput :: MonoidNull t => Parser t Bool wantInput = T.Parser $ \i0 a0 m0 _kf ks -> case () of _ | not (null (unI i0)) -> ks i0 a0 m0 True | m0 == Complete -> ks i0 a0 m0 False | otherwise -> let kf' i a m = ks i a m False ks' i a m = ks i a m True in prompt i0 a0 m0 kf' ks' -- | This parser always succeeds. It returns 'True' if any input is -- available on demand, and 'False' if the end of all input has been reached. wantMoreInput :: MonoidNull t => Parser t Bool wantMoreInput = T.Parser $ \i0 a0 m0 _kf ks -> if m0 == Complete then ks i0 a0 m0 False else let kf' i a m = ks i a m False ks' i a m = ks i a m True in prompt i0 a0 m0 kf' ks' get :: Parser t t get = T.Parser $ \i0 a0 m0 _kf ks -> ks i0 a0 m0 (unI i0) put :: t -> Parser t () put s = T.Parser $ \_i0 a0 m0 _kf ks -> ks (I s) a0 m0 () -- | The parser @satisfy p@ succeeds for any prime input token for -- which the predicate @p@ returns 'True'. Returns the token that is -- actually parsed. -- -- >digit = satisfy isDigit -- > where isDigit w = w >= "0" && w <= "9" satisfy :: FactorialMonoid t => (t -> Bool) -> Parser t t satisfy p = do s <- ensureOne let Just (first, rest) = Factorial.splitPrimePrefix s if p first then put rest >> return first else fail "satisfy" {-# INLINE satisfy #-} -- | The parser @satisfy p@ succeeds for any input character for -- which the predicate @p@ returns 'True'. Returns the character that -- is actually parsed. -- -- >digit = satisfy isDigit -- > where isDigit w = w >= "0" && w <= "9" satisfyChar :: TextualMonoid t => (Char -> Bool) -> Parser t Char satisfyChar p = do s <- ensureOne case Textual.splitCharacterPrefix s of Just (first, rest) | p first -> put rest >> return first _ -> fail "satisfy" {-# INLINE satisfyChar #-} -- | The parser @skip p@ succeeds for any prime input token for which -- the predicate @p@ returns 'True'. -- -- >skipDigit = skip isDigit -- > where isDigit w = w >= "0" && w <= "9" skip :: FactorialMonoid t => (t -> Bool) -> Parser t () skip p = do s <- ensureOne let Just (first, rest) = Factorial.splitPrimePrefix s if p first then put rest else fail "skip" -- | The parser @satisfyWith f p@ transforms an input token, and -- succeeds if the predicate @p@ returns 'True' on the transformed -- value. The parser returns the transformed token that was parsed. satisfyWith :: FactorialMonoid t => (t -> a) -> (a -> Bool) -> Parser t a satisfyWith f p = do s <- ensureOne let Just (first, rest) = Factorial.splitPrimePrefix s c = f $! first if p c then put rest >> return c else fail "satisfyWith" {-# INLINE satisfyWith #-} -- | Consume @n@ tokens of input, but succeed only if the predicate -- returns 'True'. takeWith :: FactorialMonoid t => Int -> (t -> Bool) -> Parser t t takeWith n0 p = get >>= \i-> let !(h, t) = Factorial.splitAt n0 i n1 = Factorial.length h in if null t && n1 < n0 then put mempty >> demandInput >> takeWith' h n1 p else if p h then put t >> return h else fail "takeWith" {-# INLINABLE takeWith #-} -- The uncommon case takeWith' :: FactorialMonoid t => t -> Int -> (t -> Bool) -> Parser t t takeWith' h0 n0 p = get >>= \i-> let !(h, t) = Factorial.splitAt n0 i n1 = Factorial.length h h1 = h0 <> h in if null t && n1 < n0 then put mempty >> demandInput >> takeWith' h1 n1 p else if p h1 then put t >> return h1 else fail "takeWith" {-# INLINABLE takeWith' #-} -- | Consume exactly @n@ prime input tokens. take :: FactorialMonoid t => Int -> Parser t t take n = takeWith n (const True) {-# INLINE take #-} -- | @string s@ parses a prefix of input that identically matches -- @s@. Returns the parsed string (i.e. @s@). This parser consumes no -- input if it fails (even if a partial match). -- -- /Note/: The behaviour of this parser is different to that of the -- similarly-named parser in Parsec, as this one is all-or-nothing. -- To illustrate the difference, the following parser will fail under -- Parsec given an input of @\"for\"@: -- -- >string "foo" <|> string "for" -- -- The reason for its failure is that the first branch is a -- partial match, and will consume the letters @\'f\'@ and @\'o\'@ -- before failing. In Attoparsec, the above parser will /succeed/ on -- that input, because the failed first branch will consume nothing. string :: (LeftGCDMonoid t, MonoidNull t) => t -> Parser t t string s = get >>= \i-> let !(p, s', i') = stripCommonPrefix s i in if null s' then put i' >> return s else if null i' then put mempty >> demandInput >> string' p s' else fail "string" {-# INLINE string #-} -- The uncommon case string' :: (LeftGCDMonoid t, MonoidNull t) => t -> t -> Parser t t string' consumed rest = get >>= \i-> let !(p, s', i') = stripCommonPrefix rest i in if null s' then put i' >> return (consumed <> rest) else if null i' then put mempty >> demandInput >> string' (consumed <> p) s' else put (consumed <> i) >> fail "string" stringTransform :: (FactorialMonoid t, Eq t) => (t -> t) -> t -> Parser t t stringTransform f s = takeWith (Factorial.length s) ((==f s) . f) {-# INLINE stringTransform #-} -- | Skip past input for as long as the predicate returns 'True'. skipWhile :: FactorialMonoid t => (t -> Bool) -> Parser t () skipWhile p = go where go = do t <- Factorial.dropWhile p <$> get put t when (null t) $ do input <- wantMoreInput when input go {-# INLINE skipWhile #-} -- | Skip past input characters for as long as the predicate returns 'True'. skipCharsWhile :: TextualMonoid t => (Char -> Bool) -> Parser t () skipCharsWhile p = go where go = do t <- Textual.dropWhile_ False p <$> get put t when (null t) $ do input <- wantMoreInput when input go {-# INLINE skipCharsWhile #-} -- | Consume input as long as the predicate returns 'False' -- (i.e. until it returns 'True'), and return the consumed input. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'True' on the first input token. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. takeTill :: FactorialMonoid t => (t -> Bool) -> Parser t t takeTill p = takeWhile (not . p) {-# INLINE takeTill #-} -- | Consume input characters as long as the predicate returns 'False' -- (i.e. until it returns 'True'), and return the consumed input. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'True' on the first input token. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. takeCharsTill :: TextualMonoid t => (Char -> Bool) -> Parser t t takeCharsTill p = takeCharsWhile (not . p) -- | Consume all input until the character for which the predicate -- returns 'True' and return the consumed input. -- -- The only difference between 'takeCharsTill' and 'takeTillChar' is -- in their handling of non-character data: The former never consumes -- it, the latter always does. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'True' on the first input token. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. takeTillChar :: TextualMonoid t => (Char -> Bool) -> Parser t t takeTillChar p = go id where go acc = do (h,t) <- Textual.break_ False p <$> get put t if null t then do input <- wantInput if input then go (acc . mappend h) else return (acc h) else return (acc h) {-# INLINE takeTillChar #-} -- | Consume all input until the character for which the predicate -- returns 'True' and return the consumed input. -- -- This parser always consumes at least one token: it will fail if the -- input starts with a character for which the predicate returns -- 'True' or if there is no input left. takeTillChar1 :: TextualMonoid t => (Char -> Bool) -> Parser t t takeTillChar1 p = do (`when` demandInput) =<< null <$> get (h,t) <- Textual.break_ False p <$> get when (null h && maybe True p (Textual.characterPrefix t)) $ fail "takeTillChar1" put t if null t then (h<>) <$> takeTillChar p else return h {-# INLINE takeTillChar1 #-} -- | Consume input as long as the predicate returns 'True', and return -- the consumed input. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'False' on the first input token. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. takeWhile :: FactorialMonoid t => (t -> Bool) -> Parser t t takeWhile p = go id where go acc = do (h,t) <- Factorial.span p <$> get put t if null t then do input <- wantMoreInput if input then go (acc . mappend h) else return (acc h) else return (acc h) {-# INLINE takeWhile #-} -- | Consume input characters as long as the predicate returns 'True', -- and return the consumed input. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'False' on the first input token. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. takeCharsWhile :: TextualMonoid t => (Char -> Bool) -> Parser t t takeCharsWhile p = {-# SCC takeCharsWhile #-} go id where go acc = do (h,t) <- Textual.span_ False p <$> get put t if null t then do input <- wantMoreInput if input then go (acc . mappend h) else return (acc h) else return (acc h) {-# INLINE takeCharsWhile #-} -- | Consume all remaining input and return it as a single string. takeRest :: MonoidNull t => Parser t t takeRest = go [] where go acc = do input <- wantInput if input then do s <- get put mempty go (s:acc) else return (mconcat $ reverse acc) {-# INLINABLE takeRest #-} -- | Consume input as long as the predicate returns 'True', and return -- the consumed input. -- -- This parser requires the predicate to succeed on at least one input -- token: it will fail if the predicate never returns 'True' -- or if there is no input left. takeWhile1 :: FactorialMonoid t => (t -> Bool) -> Parser t t takeWhile1 p = do (`when` demandInput) =<< null <$> get (h,t) <- Factorial.span p <$> get when (null h) $ fail "takeWhile1" put t if null t then (h<>) `fmap` takeWhile p else return h {-# INLINE takeWhile1 #-} takeCharsWhile1 :: TextualMonoid t => (Char -> Bool) -> Parser t t takeCharsWhile1 p = do (`when` demandInput) =<< null <$> get (h,t) <- Textual.span_ False p <$> get when (null h) $ fail "takeCharsWhile1" put t if null t then (h<>) `fmap` takeCharsWhile p else return h {-# INLINE takeCharsWhile1 #-} -- | A stateful scanner. The predicate consumes and transforms a -- state argument, and each transformed state is passed to successive -- invocations of the predicate on each token of the input until one -- returns 'Nothing' or the input ends. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'Nothing' on the first prime input factor. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. scan :: FactorialMonoid t => s -> (s -> t -> Maybe s) -> Parser t t scan s0 f = go s0 id where go s acc = do (h,t,s') <- Factorial.spanMaybe' s f <$> get put t if null t then do input <- wantMoreInput if input then go s' (acc . mappend h) else return (acc h) else return (acc h) {-# INLINE scan #-} -- | A stateful scanner. The predicate consumes and transforms a -- state argument, and each transformed state is passed to successive -- invocations of the predicate on each token of the input until one -- returns 'Nothing' or the input ends. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'Nothing' on the first prime input factor. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. scanChars :: TextualMonoid t => s -> (s -> Char -> Maybe s) -> Parser t t scanChars s0 fc = go s0 id where go s acc = do (h,t,s') <- Textual.spanMaybe_' s fc <$> get put t if null t then do input <- wantMoreInput if input then go s' (acc . mappend h) else return (acc h) else return (acc h) {-# INLINE scanChars #-} -- | Match any prime input token. anyToken :: FactorialMonoid t => Parser t t anyToken = satisfy $ const True {-# INLINE anyToken #-} -- | Match any prime input token. Returns 'mempty' if end of input -- has been reached. Does not consume any input. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. peekToken :: FactorialMonoid t => Parser t t peekToken = T.Parser $ \i0 a0 m0 _kf ks -> if null (unI i0) then if m0 == Complete then ks i0 a0 m0 mempty else let k' i a m = ks i a m $! Factorial.primePrefix (unI i) in prompt i0 a0 m0 k' k' else ks i0 a0 m0 $! Factorial.primePrefix (unI i0) {-# INLINE peekToken #-} -- | Match any character. anyChar :: TextualMonoid t => Parser t Char anyChar = satisfyChar $ const True {-# INLINE anyChar #-} -- | Match a specific character. char :: TextualMonoid t => Char -> Parser t Char char c = satisfyChar (== c) show c {-# INLINE char #-} -- | Match any input character, if available. Does not consume any input. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. peekChar :: TextualMonoid t => Parser t (Maybe Char) peekChar = T.Parser $ \i0 a0 m0 _kf ks -> if null (unI i0) then if m0 == Complete then ks i0 a0 m0 Nothing else let k' i a m = ks i a m $! Textual.characterPrefix (unI i) in prompt i0 a0 m0 k' k' else ks i0 a0 m0 $! Textual.characterPrefix (unI i0) {-# INLINE peekChar #-} -- | Match any input character, failing if the input doesn't start -- with any. Does not consume any input. peekChar' :: TextualMonoid t => Parser t Char peekChar' = do s <- ensureOne case Textual.characterPrefix s of Just c -> return c _ -> fail "peekChar'" {-# INLINE peekChar' #-} -- | Match either a single newline character @\'\\n\'@, or a carriage -- return followed by a newline character @\"\\r\\n\"@. endOfLine :: (Eq t, TextualMonoid t) => Parser t () endOfLine = (char '\n' >> return ()) <|> (string "\r\n" >> return ()) -- | Terminal failure continuation. failK :: Failure t a failK i0 _a0 _m0 stack msg = Fail (unI i0) stack msg {-# INLINE failK #-} -- | Terminal success continuation. successK :: Success t a a successK i0 _a0 _m0 a = Done (unI i0) a {-# INLINE successK #-} -- | Run a parser. parse :: Monoid t => Parser t a -> t -> IResult t a parse m s = T.runParser m (I s) mempty Incomplete failK successK {-# INLINE parse #-} -- | Run a parser that cannot be resupplied via a 'Partial' result. parseOnly :: Monoid t => Parser t a -> t -> Either String a parseOnly m s = case T.runParser m (I s) mempty Complete failK successK of Fail _ _ err -> Left err Done _ a -> Right a _ -> error "parseOnly: impossible error!" {-# INLINE parseOnly #-}