------------------------------------------------------------------------------- {- LANGUAGE CPP #-} -- Promoted to .cabal flag, as showPat (which presently lives elsewhere, -- although that is temporary!) needs to know, too. --- #define NEW_SEMICOLON_TYPE_LIST 1 #define DO_TRACE 0 #if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 && ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 #error Please set at most one of the flags ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 and ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 to True. #endif -- Changed my mind again -- we won't allow mixed nomenclature, -- but we WILL auto-detect, within a given pattern string, -- which convention is in use ... ah, but then what if you -- want to concatenate pattern strings to build patterns -- at the String DSL level, and these source strings don't -- originate in the same convention? Nah! Forget it, the -- toggle switch here is fine. [Now promoted to cabal flag -- so can use in showPat as well as tests.] ------ -- We cannot allow "{(})"!... And I'm not into playing matching -- games at this juncture. --- #define USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS 0 ----- #define ACCEPT_CURLY_BRACE_OR_PAREN_SUBPATTERNS 0 -- This /is/ important, and is /almost/ working. -- I really don't like struggling with libraries like parsec/atto. -- Or HXT and arrows. Honestly, just give me a solid combinator -- language with clean syntax, and a quality compiler, and we're -- off to the races. Grrrr....... -- Later: Oh, pshaw!!... #define ALLOW_ESCAPED_TYPE_LIST_SEPARATOR 1 #define SAVE_ME_HERE 1 #define WARN_IGNORED_SUBPATTERNS 1 #define NEVER_IGNORE_SUBPATTERNS 0 -- Formerly DEBUG_WITH_DEEPSEQ_GENERICS. -- Now also needed to force issuance of all compilePat warnings -- (so not strictly a debugging flag anymore). -- [Except it didn't work...] --- #define NFDATA_INSTANCE_PATTERN 0 -- now a .cabal flag #define DO_DERIVE_DATA_AND_TYPEABLE 0 #define DO_DERIVE_ONLY_TYPEABLE 1 #if DO_DERIVE_ONLY_TYPEABLE && DO_DERIVE_DATA_AND_TYPEABLE #undef DO_DERIVE_ONLY_TYPEABLE #warning DO_DERIVE_ONLY_TYPEABLE forced 0, due to DO_DERIVE_DATA_AND_TYPEABLE being 1. #define DO_DERIVE_ONLY_TYPEABLE 0 #endif -- Now specified via --flag=[-]USE_WWW_DEEPSEQ --- #define USE_WW_DEEPSEQ 1 ------------------------------------------------------------------------------- {- LANGUAGE PatternSignatures #-} -- debugging only {-# LANGUAGE ScopedTypeVariables #-} -- debugging only #if USE_ATTOPARSEC {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE BangPatterns #-} -- for forcing tracelines in monadic code #endif #if DO_DERIVE_DATA_AND_TYPEABLE {-# LANGUAGE DeriveDataTypeable #-} #endif -- XXX Only needed for something in Blah.hs. -- Check into it, and see if can't get rid of the need -- for Typeable instances in here! #if DO_DERIVE_ONLY_TYPEABLE {-# LANGUAGE DeriveDataTypeable #-} #endif #if NFDATA_INSTANCE_PATTERN -- For testing only (controlling trace interleaving): {-# LANGUAGE DeriveGeneric #-} #endif {- LANGUAGE DeriveFunctor #-} ------------------------------------------------------------------------------- -- | -- Module : Control.DeepSeq.Bounded.Compile_new_grammar -- Copyright : Andrew G. Seniuk 2014-2015 -- License : BSD-style (see the file LICENSE) -- -- Maintainer : Andrew Seniuk -- Stability : provisional -- Portability : portable -- ------------------------------------------------------------------------------- module Control.DeepSeq.Bounded.Compile_new_grammar -- XXX If commented out, it's for debugging only! #if 1 --- {-# DEPRECATED "Use Wobble instead" #-} ( #if USE_ATTOPARSEC #if HASKELL98_FRAGMENT #error Sorry, HASKELL98_FRAGMENT incompatible with NEW_IMPROVED_PATTERN_GRAMMAR, because only USE_ATTOPARSEC parser is working. With some artful CPP you could cut out the Pattern parser/compiler, and then resort to the bare PatNode constructors... #endif compileUsingAttoparsec , -- parsePat , #else compilePat' , #endif ) #endif where ------------------------------------------------------------------------------- import Control.DeepSeq.Bounded.Pattern import Control.DeepSeq.Bounded.Compile_shared_utils import Data.Maybe ( isNothing, fromJust ) import Data.Maybe ( isJust ) import Debug.Trace ( trace ) #if USE_ATTOPARSEC #if 0 -- This attoparsec module is intended for parsing text that is -- represented using an 8-bit character set, e.g. ASCII or ISO-8859-15. -- XXX Since this is to include type names, -- the character set should be bigger... #if 1 import Data.Attoparsec.Text import qualified Data.Text as T #else import Data.Attoparsec.Char8 import qualified Data.ByteString.Char8 as B #endif import Control.Applicative ( (<*), (<|>) ) #endif import Control.Applicative -------------- -- Data.Aeson.Parser.Internal.hs imports: #if 0 import Data.ByteString.Builder (Builder, byteString, toLazyByteString, charUtf8, word8) #endif import Control.Applicative ((*>), (<$>), (<*), liftA2, pure) import Control.Applicative ( (<|>) ) -- not in aeson --import Data.Aeson.Types (Result(..), Value(..)) #if 0 import Data.Attoparsec.ByteString.Char8 (Parser, char, endOfInput, scientific, skipSpace, string) import Data.Bits ((.|.), shiftL) import Data.ByteString (ByteString) import Data.Char (chr) import Data.Monoid (mappend, mempty) import Data.Text (Text) import Data.Text.Encoding (decodeUtf8') #endif import qualified Data.Text as T -- not in aeson --import qualified Data.Text.Lazy as T -- not in aeson #if 0 --import Data.Vector as Vector (Vector, fromList) import Data.Word (Word8) #endif #if 0 import qualified Data.Attoparsec.ByteString as A import qualified Data.Attoparsec.Lazy as AL import qualified Data.Attoparsec.Zepto as Z import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Unsafe as B --import qualified Data.HashMap.Strict as H #endif import qualified Data.Attoparsec.Text as AT -- not in aeson --import qualified Data.Attoparsec.Text.Lazy as AT -- not in aeson --import qualified Data.Attoparsec.Text as AT -- (Parser, char, endOfInput, scientific, -- skipSpace, string) import Data.Char ( isLetter ) import Data.Char ( isDigit ) import Control.Monad ( liftM ) --import Control.Monad ( foldM ) --import Data.Foldable ( fold ) --import Control.Monad ( mzero ) import Data.Monoid ( mempty ) import Data.Char ( ord ) import Control.Monad ( mzero ) #endif ------------------------------------------------------------------------------- #if DO_TRACE mytrace = trace #else mytrace _ = id #endif #if USE_ATTOPARSEC ------------------------------------------------------------------------------- -- Although this accepts zero or more Pattern, the caller -- will fail the parse unless the list contains exactly -- one Pattern. Later: That's not true. Now, the callers -- (or someone up there) uses liftPat if multiple patterns -- are parsed (adds new common root). parsePatsTop :: AT.Parser [Pattern] parsePatsTop = do !_ <- mytrace ("parsePatsTop.") $ return () let as = emptyPatNodeAttrs AT.many' (AT.skipSpace *> parsePat as <* AT.skipSpace) -- I'm not sure why this particular skipSpace is necessary, but -- the pattern " ." will fail to parse without it, even though -- "( .)" is fine. --parsePatsTop = AT.skipSpace *> AT.many' (parsePat emptyPatNodeAttrs) ------------------------------------------------------------------------------- -- This differs from parsePatsTop in that it assumes an -- opening grouping token has been consumed (so will be -- expecting a corresponding closing token). parsePats :: AT.Parser [Pattern] #if 1 parsePats = do !_ <- mytrace "parsePats." $ return () (AT.endOfInput *> pure []) <|> (do c <- AT.peekChar' #if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS if c == '}' #else if c == ')' #endif then pure [] --- then AT.char '}' >> pure [] else let as = emptyPatNodeAttrs in AT.many' (AT.skipSpace *> parsePat as <* AT.skipSpace)) #else -- XXX Why does this work so badly? parsePats = AT.manyTill' (AT.skipSpace *> parsePat emptyPatNodeAttrs) (AT.endOfInput <|> (AT.char ')' *> return ())) #endif ------------------------------------------------------------------------------- -- Deal with all /prefix/ PatNode attributes (PatNodeAttrs). -- These are all except the two /postfix/ attrs, *n and :[:]types. -- Later: Now, all except *n -- the :...: type constraints -- are treated as just another prefix modifier, but the T* -- PatNode's still exist (probably they will be absorbed in 0.7). parsePat :: PatNodeAttrs -> AT.Parser Pattern parsePat as = do -- XXX Should test if next character is non-attribute, up front, -- and skip all this attribute stuff in that case!... !_ <- mytrace "parsePat." $ return () #if 1 foldr (<|>) mempty $ -- foldM (<|>) mempty $ -- fold (<|>) mempty $ -- fold (<|>) mzero $ ( map (\ (c,s,p,b,a) -> do let q = (c,s,b,a) #if SAVE_ME_HERE if c == '\0' then fail "" else do #endif #if 0 mempty -- Not very efficient to be doing the first two every time! -- With one more bit of abstraction, we could get it all snug. <|> (parsePat1''' as) --- @:tys: : tys are typeConstraints <|> (parsePat1' as) --- @n : n is threadDelay in microsec. #if USE_PSEQ_PATNODE <|> (parsePat1'' as) --- >p : p is a permutation, eg. cdba #endif <|> ( AT.char c <* AT.skipSpace ) #endif ( p c b a s <* AT.skipSpace ) ) #if SAVE_ME_HERE [ ( '\0' , "" , dud_parser , const False , as ) , ( ':' , "types" , types_parser , doConstrainType , as_types ) , ( '@' , "delay" , delay_parser , doDelay , as_delay ) #if USE_PAR_PATNODE , ( '=' , "spark" , no_arg_parser , doSpark , as_spark ) #endif #if USE_PSEQ_PATNODE , ( '>' , "pseq" , pseq_parser , doPseq , as_pseq ) #endif #if USE_TRACE_PATNODE , ( '+' , "trace" , no_arg_parser , doTrace , as_trace ) #endif #if USE_PING_PATNODE , ( '^' , "ping" , no_arg_parser , doPing , as_ping ) #endif #if USE_DIE_PATNODE , ( '/' , "die" , no_arg_parser , doDie , as_die ) #endif #if USE_TIMING_PATNODE , ( '%' , "timing" , no_arg_parser , doTiming , as_timing ) #endif ] ) ++ [ parsePat3 as ] --- ) ++ [ parsePat2 as ] #else -- (Later: bitrotten now.) -- I could comment, but this speaks for itself. [ #if USE_PAR_PATNODE {-,-} ( '=' , "spark" , doSpark , as_spark ) #endif #if USE_PSEQ_PATNODE #if USE_PAR_PATNODE , ( '+' , "trace" , doTrace , as_trace ) #else {-,-} ( '>' , "pseq" , doPseq , as_pseq ) #endif #endif #if USE_TRACE_PATNODE , ( '+' , "trace" , doTrace , as_trace ) #endif #if USE_PING_PATNODE , ( '^' , "ping" , doPing , as_ping ) #endif #if USE_DIE_PATNODE , ( '/' , "die" , doDie , as_die ) #endif ] ) ++ [ parsePat2 as ] #endif #else -- XXX This has fallen into obsolescence. -- XXX It looks more orderly, and is easier to understand, -- but it's actually harder to maintain, so I guess that -- would argue overall for using the fold, above... ( AT.char '=' >> if doSpark as then fail "compilePat: duplicate '=' (spark) node attribute" else parsePat as_spark) <|> ( AT.char '+' >> if doTrace as then fail "compilePat: duplicate '+' (trace) node attribute" else parsePat as_trace) <|> ( AT.char '^' >> if doPing as then fail "compilePat: duplicate '^' (ping) node attribute" else parsePat as_ping) <|> (parsePat2 as) #endif where dud_parser _ _ _ _ = fail "dud_parser" -- (it is never run; should use Proxy) no_arg_parser c b a s = do ( (AT.char c) <* AT.skipSpace ) >> ( if b as then fail $ "compilePat: duplicate " ++ show c ++ " (" ++ s ++ ") " ++ "node attribute" else parsePat a) types_parser _ _ a _ = parsePat1''' a delay_parser _ _ a _ = parsePat1' a pseq_parser _ _ a _ = parsePat1'' a -- (doConstrainType, doDelay, and doPseq handled separately, -- due to their taking arguments.) as_types = as { doConstrainType = True } as_delay = as { doDelay = True } #if USE_PAR_PATNODE as_spark = as { doSpark = True } #endif #if USE_PSEQ_PATNODE as_pseq = as { doPseq = True } #endif #if USE_TRACE_PATNODE as_trace = as { doTrace = True } #endif #if USE_PING_PATNODE as_ping = as { doPing = True } #endif #if USE_DIE_PATNODE as_die = as { doDie = True } #endif #if USE_TIMING_PATNODE as_timing = as { doTiming = True } #endif -- Parse the ":Int;Maybe Float:" typeConstraints attribute, if present. --parsePat1''' :: PatNodeAttrs -> AT.Parser PatNodeAttrs parsePat1''' :: PatNodeAttrs -> AT.Parser Pattern parsePat1''' as = do !_ <- mytrace "parsePat1'''." $ return () AT.char ':' >> ( ( parse_type_constraints True <* AT.skipSpace ) >>= \ (tcs,ncol) -> let as' = as { doConstrainType = True , typeConstraints = map T.unpack tcs } #if 0 in return as' ) #else in do -- This continues to look fine. #if 0 roi <- AT.takeText error $ "DEVEXIT: " ++ show tcs ++ " " ++ show roi ++ "\n" ++ show as' #endif parsePat as' ) -- in parsePat as' ) #endif -- Parse the "@50000" delayus attribute, if present. --parsePat1' :: PatNodeAttrs -> AT.Parser PatNodeAttrs parsePat1' :: PatNodeAttrs -> AT.Parser Pattern parsePat1' as = do !_ <- mytrace "parsePat1'." $ return () AT.char '@' >> ( ( AT.decimal <* AT.skipSpace ) >>= \ dly -> let as' = as { doDelay = True , delayus = dly } #if 0 in return as' ) #else in parsePat as' ) #endif #if USE_PSEQ_PATNODE -- Parse the ">cdba" pseqPerm attribute, if present. --parsePat1'' :: PatNodeAttrs -> AT.Parser PatNodeAttrs parsePat1'' :: PatNodeAttrs -> AT.Parser Pattern parsePat1'' as = do !_ <- mytrace "parsePat1''." $ return () AT.char '>' >> ( ( AT.many1' AT.letter ) <* AT.skipSpace >>= \ perm -> let perm' = map (\c -> ord c - ord 'a') perm as' = as { doPseq = True , pseqPerm = Just perm' } #if 0 in return as' ) #else in parsePat as' ) #endif #endif ------------------------------------------------------------------------------- #if 0 -- Now we're past the prefix attributes; next, test if this -- as a TR node (signalled by ':' being the next character), -- and branch accordingly. (There is still TI, TN and TW -- cases to handle, see parsePat4*.) parsePat2 :: PatNodeAttrs -> AT.Parser Pattern parsePat2 as = do !_ <- mytrace "parsePat2." $ return () (AT.char ':' >> parsePat2_t as) <|> (parsePat3 as) -- Parse the type constraints which must follow TR's opening ':'. parsePat2_t :: PatNodeAttrs -> AT.Parser Pattern parsePat2_t as = do !_ <- mytrace "parsePat2_t." $ return () (tcs,ncol) <- parse_type_constraints True -- (tcs,ncol) <- spaceSeparated parseTypeName '{' -- error $ show tcs !_ <- mytrace ("(ncol,tcs)=("++show ncol ++ "," ++ (T.unpack $ T.intercalate " " tcs)) $ return () if ncol > 0 then fail "compilePat: unexpected \"::\"" else do let as_t = as { typeConstraints = map T.unpack tcs } parsePat_TR_tail 'x' as_t #endif ------------------------------------------------------------------------------- -- Handle "*23"-style (WN and TN) nodes. -- (The integer depth attribute always precedes any type constraint; -- in contrast to the prefix attributes, which can occur in any order.) parsePat3 :: PatNodeAttrs -> AT.Parser Pattern parsePat3 as = do !_ <- mytrace "parsePat3." $ return () b <- AT.peekChar' #if ( ! ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 ) && ( ! ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 ) AT.anyChar #endif !_ <- mytrace ("boo-0: "++show b) $ return () #if ( ! ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 ) && ( ! ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 ) !_ <- mytrace ("boo-1: "++show b) $ return () if b == '*' #else !_ <- mytrace ("boo-1-_19__9: "++show b) $ return () if isDigit b #endif then do ( (parsePat3_aux b as) <|> (do #if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 || ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 #if 1 #if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 AT.anyChar (case b of #if 1 '0' -> parsePat4 '0' Nothing as '1' -> parsePat4 '1' Nothing as #else #if NEW_CONCRETE_WI_AND_WS '0' -> parsePat4 '.' Nothing as '1' -> parsePat4 '!' Nothing as #else '0' -> parsePat4 '#' Nothing as '1' -> parsePat4 '.' Nothing as #endif #endif _ -> parsePat4 '*' Nothing as ) ) ) #else (parsePat4 '*' Nothing as) ) ) #endif #else !_ <- mytrace ("parsePat3: unexpected digit " ++ [b]) $ return () fail $ "parsePat3: unexpected digit " ++ [b] ) ) #endif #else (parsePat4 '*' Nothing as) ) ) #endif else do #if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 || ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 -- fail $ "parsePat3: #2 unexpected digit " ++ [b] AT.anyChar #endif parsePat4 b Nothing as -- else fail "compilePat: unexpected char not in \"#.*\"" -- Actual handler, in case it /was/ WN or TN node. parsePat3_aux :: Char -> PatNodeAttrs -> AT.Parser Pattern parsePat3_aux b as = do !_ <- mytrace "parsePat3_aux." $ return () !_ <- mytrace ("boo-2: "++show b) $ return () -- These should be safe cutoffs without having to worry about exact figures. --- DEPTH_USES_INT64 isn't implemented yet, this is just a note --- for future consideration. (Should be in NFDataN if anywhere...). --- I'm not ready to make this sweeping change yet. --- #if DEPTH_USES_INT64 --- if length n_integer_str > 19 = fail $ "compilePat: *" ++ n_integer_str ++ " is too large" !_ <- mytrace ("boo-3: "++show b) $ return () #if ( ! ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 ) && ( ! ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 ) AT.skipSpace #endif #if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 n_integer_cs <- if b == '1' then AT.anyChar *> AT.option "1" (AT.digit >>= \ c -> return ('1':[c])) else AT.anyChar *> return [b] !_ <- mytrace ("boo-3.2: n_integer_ns="++n_integer_cs) $ return () let n_integer = read n_integer_cs :: Integer #elif ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 n_integer_c <- AT.anyChar :: AT.Parser Char !_ <- mytrace ("n_integer_c="++show n_integer_c) $ return () n_integer <- case n_integer_c of '0' -> mytrace ("ANDDD...(0):") $ fail "" '1' -> mytrace ("ANDDD...(1):") $ fail "" _ -> parsePat4 '*' Nothing as !_ <- mytrace "oops!!!!" $ return () let n_integer = read [n_integer_c] :: Integer #else n_integer <- AT.decimal :: AT.Parser Integer #endif !_ <- mytrace ("boo-3.5: "++show b) $ return () let n_integer_str = show n_integer !_ <- mytrace ("boo-4: "++show b++" "++n_integer_str) $ return () if length n_integer_str > 9 then fail $ "compilePat: *" ++ n_integer_str ++ " is too large" else parsePat4 '*' (Just (read n_integer_str :: Int)) as ------------------------------------------------------------------------------- #if 1 -- This handles whether or not it's a type-constrainted node. -- (The constraints themselves will have already been parsed.) parsePat4 :: Char -> Maybe Int -> PatNodeAttrs -> AT.Parser Pattern parsePat4 b mdepth as = do !_ <- mytrace "parsePat4." $ return () !_ <- mytrace ("GOO-1: "++show b++" "++show (doConstrainType as)) $ return () if doConstrainType as then parsePat4_t b mdepth 0 as else parsePat4_w b mdepth as #else -- This handles whether or not it's a type-constrainted node, -- not including TR nodes which were handled earlier. parsePat4 :: Char -> Maybe Int -> PatNodeAttrs -> AT.Parser Pattern parsePat4 b mdepth as = do !_ <- mytrace "parsePat4." $ return () !_ <- mytrace ("GOO-1: "++show b) $ return () parsePat4_aux b mdepth as <|> parsePat4_w b mdepth as -- Actual handler, in case it /was/ a type-constrained node (TI, TN or TW). parsePat4_aux :: Char -> Maybe Int -> PatNodeAttrs -> AT.Parser Pattern parsePat4_aux b mdepth as = do #if 0 roi <- AT.takeText error $ "DEVEXIT -- " ++ T.unpack roi #endif !_ <- mytrace "parsePat4_aux." $ return () !_ <- mytrace ("GOO-2.1: b="++show b++" mdepth="++show mdepth) $ return () c <- AT.peekChar' !_ <- mytrace ("GOO-2.2: c="++show c) $ return () if c == ':' then do !_ <- mytrace ("GOO-2.3.1: "++show b) $ return () (tcs,ncol) <- parse_type_constraints False -- (tcs,ncol) <- spaceSeparated parseTypeName '{' !_ <- mytrace ("GOO-2.3.2: "++show (tcs,ncol)) $ return () let as_t = as { typeConstraints = map T.unpack tcs } !_ <- mytrace ("GOO-2.3.3: "++show (typeConstraints as_t)) $ return () parsePat4_t b mdepth ncol as_t else do !_ <- mytrace ("GOO-2.4: "++show b) $ return () parsePat4_w b mdepth as -- fail "compilePat: expected ':'" #endif -- Actual handler, in case it was /NOT/ a type-constrained node; -- i.e. a WI, WR, WS (if still exists), WN or WW node. parsePat4_w :: Char -> Maybe Int -> PatNodeAttrs -> AT.Parser Pattern parsePat4_w b mdepth as = do !_ <- mytrace "parsePat4_w." $ return () !_ <- mytrace ("GOO-3: "++show b) $ return () case b of #if VACANT_HASH ' ' -> return (Node (WI as) []) '#' -> return (Node (WI as) []) -- still accept actual #, too #else #if NEW_CONCRETE_WI_AND_WS #if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 '0' -> return (Node (WI as) []) #else '.' -> return (Node (WI as) []) #endif #else '#' -> return (Node (WI as) []) #endif #endif #if NEW_CONCRETE_WI_AND_WS #if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 '1' -> return (Node (WS as) []) #else '!' -> return (Node (WS as) []) #endif #else '.' -> return (Node (WS as) []) #endif '*' -> if isNothing mdepth then return (Node (WW as) []) else return (Node (WN as_n) []) #if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS '{' -> parsePat_WR_tail b as #else '(' -> parsePat_WR_tail b as #endif #if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS _ -> fail $ "compilePat: expected one of \"#.*{\" (got " ++ show b ++ ")" #else _ -> fail $ "compilePat: expected one of \"#.*(\" (got " ++ show b ++ ")" #endif where as_n = as { depth = fromJust mdepth } -- This is a helper of patsePat4_aux. parsePat4_t :: Char -> Maybe Int -> Int -> PatNodeAttrs -> AT.Parser Pattern parsePat4_t b mdepth ncol as_t = do !_ <- mytrace "parsePat4_t." $ return () #if 1 #if 0 !_ <- mytrace "parsePat4_t: trying to eat ':'..." $ return () AT.char ':' !_ <- mytrace "parsePat4_t: ate ':'!" $ return () #endif if False then fail "dummy" -- will never run #else if ncol /= 2 then do c <- AT.peekChar' fail $ "compilePat: after \"" ++ [c] ++ "\", expect \"::\" not \":\"" #endif else do #if NEW_SEMICOLON_TYPE_LIST !_ <- mytrace ("parsePat4_t: b="++show b) $ return () case b of #if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS '{' -> do #else '(' -> do #endif !_ <- mytrace "parsePat4_t: entering TR_tail..." $ return () parsePat_TR_tail 'x' as_t --- !_ <- mytrace "parsePat4_t: exited TR_tail!" $ return () #if VACANT_HASH ' ' -> return (Node (TI as_t) []) '#' -> return (Node (TI as_t) []) -- still accept actual #, too #else #if NEW_CONCRETE_WI_AND_WS #if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 '0' -> return (Node (TI as_t) []) #else '.' -> return (Node (TI as_t) []) #endif #else '#' -> return (Node (TI as_t) []) #endif #endif '*' -> if isNothing mdepth then return (Node (TW as_t) []) else return (Node (TN as_t_n) []) #else #if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS let endch = '}' #else let endch = ')' #endif case b of #if VACANT_HASH ' ' -> AT.char endch >> return (Node (TI as_t) []) '#' -> AT.char endch >> return (Node (TI as_t) []) -- still accept actual #, too #else #if NEW_CONCRETE_WI_AND_WS #if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 '0' -> AT.char endch >> return (Node (TI as_t) []) #else '.' -> AT.char endch >> return (Node (TI as_t) []) #endif #else '#' -> AT.char endch >> return (Node (TI as_t) []) #endif #endif '*' -> AT.char endch >> if isNothing mdepth then return (Node (TW as_t) []) else return (Node (TN as_t_n) []) #endif #if NEW_CONCRETE_WI_AND_WS #if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 _ -> fail $ "compilePat: expected '*' or digit (got " ++ show b ++ ")" #elif ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__SAFE_DOWN_TO_DEPTH_19 _ -> fail $ "compilePat: expected '.' or '*' or digit (got " ++ show b ++ ")" #else _ -> fail $ "compilePat: expected '.' or '*' (got " ++ show b ++ ")" #endif #else _ -> fail $ "compilePat: expected '#' or '*' (got " ++ show b ++ ")" #endif where as_t_n = as_t { depth = fromJust mdepth } ------------------------------------------------------------------------------- -- XXX I hesitate to document these ... they're both concerned -- with parsing grouped subpatterns, but it's still not clear -- whether the opening '(' (or '{') is expected to have been -- previously consumed or not, and I think the convention -- is different in each of these -- if it were the same, there -- would be no need for two functions! #if 0 parsePat_WR :: PatNodeAttrs -> AT.Parser Pattern parsePat_WR as = AT.char '{' *> parsePat_WR_tail as #endif parsePat_WR_tail :: Char -> PatNodeAttrs -> AT.Parser Pattern parsePat_WR_tail x as = do !_ <- mytrace "parsePat_WR_tail." $ return () !_ <- mytrace ("**HWR1**: "++show x) $ return () #if 0 #if NEW_SEMICOLON_TYPE_LIST #if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS AT.skipSpace *> AT.char '{' #else AT.skipSpace *> AT.char '(' #endif #endif #endif !_ <- mytrace ("**HWR1.5**: "++show x) $ return () pats <- parsePats <|> pure [] !_ <- mytrace ("**HWR2**: "++show x) $ return () #if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS AT.char '}' #else AT.char ')' #endif !_ <- mytrace ("**HWR3**: "++show x) $ return () return (Node (WR as) pats) #if 0 parsePat_TR :: PatNodeAttrs -> AT.Parser Pattern parsePat_TR as = AT.char '{' *> parsePat_TR_tail as #endif parsePat_TR_tail :: Char -> PatNodeAttrs -> AT.Parser Pattern parsePat_TR_tail x as_t = do !_ <- mytrace "parsePat_TR_tail." $ return () !_ <- mytrace ("**HTR1**: "++show x) $ return () #if 0 roi <- AT.takeText error $ "DEVEXIT: " ++ show roi #endif #if 0 #if NEW_SEMICOLON_TYPE_LIST #if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS AT.skipSpace *> AT.char '{' #else AT.skipSpace *> AT.char '(' #endif #endif #endif !_ <- mytrace ("**HTR1.5**: "++show x) $ return () pats <- parsePats <|> pure [] !_ <- mytrace ("**HTR2**: "++show x) $ return () #if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS AT.char '}' #else AT.char ')' #endif !_ <- mytrace ("**HTR3**: "++show x) $ return () return (Node (TR as_t) pats) ------------------------------------------------------------------------------- -- XXX In isTR case, it seems the (single) colon has already been consumed; -- whereas in non-isTR case, neither of the (double) colons have been. -- It's important to note that this parser begins -- by consuming initial [whitespace, and] colons. -- It also counts them, and returns the count. parse_type_constraints :: Bool -> AT.Parser ( [T.Text], Int ) parse_type_constraints isTR = do -- AT.take 3 >>= \ test -> error $ "test="++show test !_ <- mytrace "parse_type_constraints." $ return () #if NEW_SEMICOLON_TYPE_LIST let endchar = ':' #else #if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS let endchar = '{' #else let endchar = '(' #endif #endif #if TYPE_CONSTRAINTED_NODES_USE_UNESCAPED_SPACE_AS_TYPE_LIST_SEPARATOR let sepchar = ' ' #else #if NEW_SEMICOLON_TYPE_LIST let sepchar = ';' #else let sepchar = ':' #endif #endif #if NEW_SEMICOLON_TYPE_LIST ncs <- if isTR then pure 0 else AT.string "::" *> pure 2 #endif #if NEW_SEMICOLON_TYPE_LIST -- (1) Grab (or be ready to grab) input up to the next unescaped ':' -- character, which must exist. We might as well do this up front, -- since we /will/ actually consume all of it. -- XXX I'll finish this using peekChar, but I think in atto you are -- supposed to just use <|>, it is backtracking by default, so try such -- a variant and see if it works (after), it would be way more compact! let loop = do seg <- AT.takeWhile (\c->c/=endchar&&c/='\\') :: AT.Parser T.Text !_ <- mytrace ("loop: seg="++T.unpack seg) $ return () if T.null seg then do !_ <- mytrace "loop: T.null seg" $ return () return [] else do mnc <- AT.peekChar !_ <- mytrace ("loop: mnc="++show mnc) $ return () let nc = fromJust mnc if isNothing mnc then do !_ <- mytrace "trace: \"parse_type_constraints: unexpected end of input #1\"" $ return () fail "parse_type_constraints: unexpected end of input" else do AT.take 1 if nc == '\\' then do mnc' <- AT.peekChar !_ <- mytrace ("loop: mnc'="++show mnc') $ return () let nc' = fromJust mnc' if isNothing mnc' then do !_ <- mytrace "trace: \"parse_type_constraints: unexpected end of input #2\"" $ return () fail "parse_type_constraints: unexpected end of input" else do -- We don't care if it was : or not. If the character -- after '\\' (i.e. nc') was not ':', the result is -- no different ("\\c" in all cases); however, we -- distinguish ':' conceptually because, by passing it -- through, we affect the termination properties of loop. !_ <- mytrace "loop: " $ return () AT.take 1 *> ( ( ( T.snoc seg '\\' `T.snoc` nc' ) : ) <$> loop ) --- AT.anyChar >>= \ c -> ( T.snoc seg c : ) <$> loop else do !_ <- mytrace ("loop: [seg]="++show [seg]) $ return () return [seg] -- we know it was endchar segs <- loop !_ <- mytrace ("segs="++show segs) $ return () let seg = T.concat segs !_ <- mytrace ("seg="++T.unpack seg) $ return () #else -- (1) Grab (or be ready to grab) input up to the first '(' (or '{') -- character, which must exist. We might as well do this up front, -- since we /will/ actually consume all of it. seg' <- AT.takeWhile (/=endchar) :: AT.Parser T.Text !_ <- mytrace ("seg'="++T.unpack seg') $ return () #endif #if NEW_SEMICOLON_TYPE_LIST -- (1.5) I guess we're supposed to consume the closing ':' as well: -- Later: And it looks like we did already, although I don't see why... -- do { x <- AT.take 2 ; !_ <- mytrace ("x="++show x) $ return () ; fail "" } if isTR -- For TR case, we need a parse error if see a second closing colon. -- This should happen in the normal course of parsing; we don't -- need to do anything here (and it would be difficult to do so, -- but according to my analysis the parse should eventually fail). -- (But a later note says, "no!", we should/must do it here?...) then do !_ <- mytrace "HERE isTR" $ return () mnc <- AT.peekChar !_ <- mytrace ("isTR: mnc="++show mnc) $ return () let nc = fromJust mnc if isNothing mnc then do !_ <- mytrace "isTR: \"parse_type_constraints: unexpected end of input\"" $ return () fail "parse_type_constraints: unexpected end of input" else do !_ <- mytrace ("isTR: nc="++show nc) $ return () if nc == ':' then do !_ <- mytrace "isTR: \"parse_type_constraints: unexpected end of input\"" $ return () fail "parse_type_constraints: unexpected end of input" else do AT.take 0 --- else AT.anyChar *> AT.anyChar >>= \ c -> ( ( seg `T.snoc` nc `T.snoc` c ) : ) <$> loop -- If there are two (or more) contiguous colons closing, then -- see if can get an accept by taking the (leading) pair as -- a single close token; otherwise, the second (and subsequent) -- colons must be part of the next pattern. -- XXX Later: Hopefully AT.option will give me what I think it will... -- (Still debugging numerous sites since added this code, so untested.) else do AT.take 0 -- ( AT.option T.empty (pure (T.singleton endchar)) ) *> AT.take 0 --- ( AT.option T.empty (AT.char endchar *> pure (T.singleton endchar)) ) *> ( ( ( T.singleton endchar ) : ) <$> loop ) ---- ( ( AT.option T.empty (AT.char endchar) ) *> ( ( T.singleton endchar ) : ) ) <$> loop ---- ( AT.option T.empty (AT.char endchar) ) >>= \ c-> ( ( T.singleton c ) : )<$> loop ---- ( AT.option T.empty (AT.char endchar) ) <$> loop ----- AT.option T.empty (pure $ T.singleton endchar) ----- AT.option T.empty (AT.takeWhile (==endchar)) #else -- (1.5) I guess we're supposed to consume the '(' as well: AT.take 1 #endif !_ <- mytrace ("HERE!") $ return () #if ! NEW_SEMICOLON_TYPE_LIST -- (2) Prefix colons: Have different syntax and semantics than -- the separator colons. We must eat them now, and keep -- count since we return that figure as part of the result. let (ecs :: Either String T.Text) = AT.parseOnly ((AT.takeWhile (==':')) :: AT.Parser T.Text) seg !_ <- mytrace ("ecs="++show ecs) $ return () #endif #if NEW_SEMICOLON_TYPE_LIST let (eblocksncs :: Either String ([T.Text],Int)) = AT.parseOnly ( ( AT.sepBy1' (AT.takeWhile (/=';')) (AT.char ';') ) >>= \ y -> return (y,ncs) ) seg !_ <- mytrace ("eblocksncs="++show eblocksncs) $ return () let (blocks,ncs) = case eblocksncs of Left msg -> error $ "parse_type_constraints: eblocks parse failure: " ++ msg Right (blocks,ncs) -> (blocks,ncs) :: ([T.Text],Int) !_ <- mytrace ("(blocks,ncs)="++show (blocks,ncs)) $ return () let blocks' = map (helper False) blocks -- (so get "\\c" not "\c" in names) return (blocks',ncs) #else let (eblocksncs :: Either String ([T.Text],Int)) = case ecs of Left msg -> fail $ "parse_type_constraints: expected colon: " ++ msg -- (3) Split the remaining pre-( fragment at all colon characters. -- (Never mind escapes whatsoever; leave the '\\' chars alone, -- but split on every ':' regardless if it was preceded by '\\'.) Right cs -> let ncs = T.length cs seg' = T.drop ncs seg in #if 0 #if 0 #elif 1 AT.sepBy1' (AT.many1' AT.anyChar) (AT.string $ T.singleton sepchar) >>= (\x -> return (x,ncs)) #elif 0 AT.sepBy1' (liftM T.concat $ AT.many1' AT.anyChar) (AT.string $ T.singleton sepchar) >>= (\x -> return (x,ncs)) #elif 0 AT.sepBy1' (liftM T.concat $ AT.many1' AT.anyChar) (AT.char sepchar) >>= (\x -> return (x,ncs)) #endif #else AT.parseOnly ( ( AT.sepBy1' #if 1 (AT.takeWhile (/=':')) (AT.char ':') #else (AT.many1' AT.anyChar) (AT.symbol ":") -- (AT.string $ T.singleton sepchar) #endif ) >>= \ x -> return x -- >>= \ x -> return $ liftM T.pack x >>= \ y -> return (y,ncs) ) seg' #endif !_ <- mytrace ("eblocksncs="++show eblocksncs) $ return () #if 0 let blocks = map T.pack eblocks :: [T.Text] -- debugging sig. -- let blocks = map T.concat eblocks :: [T.Text] -- debugging sig. -- let blocks = eblocks :: [T.Text] -- debugging sig. -- let blocks = eblocks #else let (blocks,ncs) = case eblocksncs of Left msg -> error $ "parse_type_constraints: eblocks parse failure: " ++ msg -- Left msg -> fail $ "parse_type_constraints: eblocks parse failure: " ++ msg -- XXX No instance for (Monad ((,) [T.Text])) arising from a use of fail!! Why just this fail call, and not the similar others above??... Right (blocks,ncs) -> (blocks,ncs) :: ([T.Text],Int) -- Right blocks -> map T.pack blocks :: [T.Text] -- Right blocks -> AT.parseOnly ( do ... #endif !_ <- mytrace ("(blocks,ncs)="++show (blocks,ncs)) $ return () #if ! ALLOW_ESCAPED_TYPE_LIST_SEPARATOR let blocks' = map (helper False) blocks -- (so get "\\c" not "\c" in names) return (blocks',ncs) #else -- XXX I do believe this is still broken in the case of "\\\\:". -- No. String Pattern Blocks -- 1 :A() Node TR (A) [] ["A"] -- correct -- 2 :A:B() Node TR (A:B) [] ["A", "B"] -- correct -- 3 :A\:B() "lex error at ':'" -- correct -- 4 :A\\:B() Node TR (A\:B) [] ["A\\:B"] -- correct -- 5 :A\\\:B() "lex error at ':'" -- correct -- 6 :A\\\\:B() Node TR (A\\:B) [] ["A\\\\:B"] -- WRONG! ---------- -- Should be: -- 6 :A\\\\:B() Node TR (A\\:B) [] ["A\\\\","B"] -- (As it happens, the show output looks the same either way, since I'm -- using : as separator there; if you change separator in the show, it's -- easier to debug such stuff as this.) ------ -- So, which semantics do we want? Should "A\\\\:B" become ["A\\","B"]? -- And the "A\\\\\\:B" would become ["A\\\:B"] (sic!). -- (We allow "[^\]\:" as a pattern, not for typing in patterns, but -- in the showing of them -- this is wrong actually, I think it should -- be a double-backslash...). -- (4) Now post-process this [Text]: For every block (except the first), -- if it begins ':' -- they all do, oops. -- So, rather, for every block (except the last), if the final -- character is '\\', then fuse this block to its successor. -- As to whether or not to elide the (rightmost) '\\' preceding -- the colon in the fused pair, is a matter of policy and easily -- settled later. let blocks' = (map (helper True) . dealWithEscapedSeparators) blocks -- let blocks' = dealWithEscapedSeparators blocks !_ <- mytrace ("blocks'="++show blocks') $ return () return (blocks',ncs) #endif #endif where helper :: Bool -> T.Text -> T.Text helper b t | T.null t = t --- | T.head t == '\\' = T.concat ["\\\\", helper b $ T.tail t] | otherwise = T.cons (T.head t) $ helper b $ T.tail t #if ALLOW_ESCAPED_TYPE_LIST_SEPARATOR dealWithEscapedSeparators :: [T.Text] -> [T.Text] dealWithEscapedSeparators (t1:t2:ts) | dofuse = t' : dealWithEscapedSeparators ts | otherwise = t' : dealWithEscapedSeparators (t2:ts) where dofuse | T.null t1 || T.null t2 = True | T.last (T.init t1) == '\\' = False -- sic | otherwise = T.last t1 == '\\' --- | otherwise = T.last t1 == '\\' && T.head t2 == ':' t' | dofuse = T.concat [t1,':' `T.cons` t2] | otherwise = t1 dealWithEscapedSeparators x = x #endif {-# INLINE parse_type_constraints #-} ------------------------------------------------------------------------------- -- Try attoparsec. -- XXX If you toggle this, don't forget to also toggle -- the one in Compile_shared_utils2.hs (compilePat_). #if 1 --compileUsingAttoparsec :: String -> AT.Result [Pattern] --compileUsingAttoparsec :: T.Text -> AT.Result [Pattern] --compileUsingAttoparsec :: BL.ByteString -> AL.Result [Pattern] compileUsingAttoparsec input -- = let rslt = AT.parse (parsePatsTop input) input) T.empty --let A.Partial f = A.parse (someWithSep A.skipSpace A.decimal) $ B.pack "123 45 67 89" in f B.empty --Done "" [123,45,67,89] = AT.feed (AT.parse parsePatsTop input) T.empty -- = AT.parse parsePatsTop input -- = AT.parse (AT.many' $ parsePat emptyPatNodeAttrs) input -- = AL.parse (AL.many' $ parsePat emptyPatNodeAttrs) input -- = AL.parse (AL.many' $ parsePat emptyPatNodeAttrs) $ BL.pack input #else --compileUsingAttoparsec :: T.Text -> Either String [Pattern] --compileUsingAttoparsec :: String -> Either String Pattern compileUsingAttoparsec :: T.Text -> Either String [Pattern] compileUsingAttoparsec input = AT.parseOnly parsePatsTop input -- = AT.parseOnly (AT.many' $ parsePat emptyPatNodeAttrs) $ T.pack input -- = AT.parseOnly (parsePat emptyPatNodeAttrs <* endOfInput) $ T.pack input -- = AT.parseOnly (AT.many' $ parsePat emptyPatNodeAttrs) $ B.pack input -- = AT.parseOnly (AT.many' $ parsePat emptyPatNodeAttrs <* endOfInput) $ B.pack input -- no! #endif ------------------------------------------------------------------------------- #else --- #error NOTE TO SELF: Change CPP so if new grammar and h98_frag, allow it but omit compilePat from the API (with a warning, and with suitable alternate Haddock comments). compilePat' :: String -> Pattern compilePat' _ = error "\nSorry, at this time (version 0.6.0.*) there is no non-attoparsec parser\nfor the new pattern grammar. This also implies that HASKELL98_FRAGMENT\nhas no pattern DSL facilities (except for showPat), and it is necessary\nto work with the PatNode constructors directly. The situation should\nbe remedied by version 0.6.1." #endif -------------------------------------------------------------------------------