{- The following provides a string -> string preprocessor for Fortran programs that deals with label-delimited 'do'-'continue' blocks of FORTRAN 77 era. With a traditional LR(1) parser, these are not easily (or not at all) parsable. Consider the valid FORTRAN 77 code: do j = 1,5 do 17 i=1,10 print *,i 17 continue end do Here the 'continue' acts as an 'end do' (not as a usual 'continue' statement) because it is labelled with the same label '17' as the 'do' statement which starts the block. Parsing this requires arbitrary look-ahead (e.g., LR(infinity)) which is provided by the following parsec parser, but not by the 'happy' parser generator. This pre processor is currently quite heavy handed. It replaces 'continue' in the above program with 'end do'. E.g., the above program is transformed to: do j = 1,5 do 17 i=1,10 print *,i 17 end do end do -} module Language.Fortran.PreProcess where import Text.ParserCombinators.Parsec hiding (spaces) import Text.ParserCombinators.Parsec.Expr import qualified Text.ParserCombinators.Parsec.Token as Token import Text.ParserCombinators.Parsec.Language import System.Environment import Debug.Trace num = many1 digit small = lower <|> char '_' idchar = small <|> upper <|> digit ident = do{ c <- small <|> upper ; cs <- many idchar; return (c:cs) } spaces = many space manyTillEnd p end = scan where scan = (try end) <|> do { x <- p; xs <- scan; return (x:xs) } pre_parser labels = manyTillEnd anyChar (try $ if (labels == []) then (try $ end_or_start_do labels) <|> (eof >> return "") else end_or_start_do labels) end_or_start_do labels = (try $ doBlock labels) <|> (end_do labels) doBlock labels = do doStr <- string "do" <|> string "DO" updateState (+1) sp <- spaces label <- (try numberedBlock) <|> (do { loop <- loop_control; return (Nothing, loop) }) p <- pre_parser $ (fst label) : labels return $ doStr ++ sp ++ snd label ++ p end_do labels = do label' <- optionMaybe (do {space; n <- num; space; return n}) sp <- spaces lookAhead (end_do_marker <|> continue) ender <- case (labels, label') of ([], _) -> do { ender <- end_do_marker; return $ sp ++ ender } (Nothing:_, _) -> do { ender <- end_do_marker; return $ sp ++ ender } ((Just n):_, Nothing) -> do { ender <- end_do_marker; return $ sp ++ ender } ((Just n):_, Just m) -> if (n==m) then do ender <- end_do_marker <|> continue return $ " " ++ m ++ " " ++ sp ++ ender else -- Labels don't match! -- If the label doesn't appear anywhere in the label stack, -- then this is allowed (e.g. extra 'continue' points) if (not ((Just m) `elem` labels)) then do ender <- end_do_marker <|> continue_non_replace return $ " " ++ m ++ " " ++ sp ++ ender else -- otherwise, we consider the do loops to be not properly bracketted error $ "Ill formed do blocks, labels do not match: " ++ n ++ " and " ++ m ++ " - with label stack " ++ (show labels) level <- getState updateState (\x -> x-1) -- "Level " ++ show level) `trace` ( p <- pre_parser (if labels == [] then [] else tail labels) return $ ender ++ p continue_non_replace = string "continue" <|> string "CONTINUE" continue = do string "continue" <|> string "CONTINUE" return "end do " -- replaces continue with 'end do', this is the goal! end_do_marker = do endStr <- string "end" <|> string "END" sp <- spaces doStr <- string "do" <|> string "DO" return $ endStr ++ sp ++ doStr numberedBlock = do label <- num space sp1 <- spaces comma <- optionMaybe (string ",") sp2 <- spaces loop <- loop_control return $ (Just label, label ++ " " ++ sp1 ++ (maybe "" id comma) ++ sp2 ++ loop) newline' = (try $ do { c <- char '\r'; n <- newline; return [c,n] }) <|> do { n <- newline; return [n] } loop_control = do var <- ident sp1 <- spaces char '=' sp2 <- spaces lower <- num <|> ident sp3 <- spaces char ',' sp4 <- spaces upper <- num <|> ident rest <- manyTillEnd anyChar (try newline') return $ var ++ sp1 ++ "=" ++ sp2 ++ lower ++ sp3 ++ "," ++ sp4 ++ upper ++ rest parseExpr :: String -> String -> String parseExpr file input = case (runParser p (0::Int) "" input) of Left err -> fail $ show err Right x -> x where p = do pos <- getPosition setPosition $ (flip setSourceName) file $ (flip setSourceLine) 1 $ (flip setSourceColumn) 1 $ pos x <- pre_parser [] return x pre_process input = parseExpr "" input go filename = do args <- getArgs srcfile <- readFile filename return $ parseExpr filename srcfile