{-| The following provides a string → string preprocessor for Fortran programs that deals with label-delimited @do@-@continue@ blocks of FORTRAN 77 era. With a traditional LR(1) parser, these are not easily (or not at all) parsable. Consider the valid FORTRAN 77 code: > do j = 1,5 > do 17 i=1,10 > print *,i > 17 continue > end do Here the \'continue\' acts as an \'end do\' (not as a usual \'continue\' statement) because it is labelled with the same label \'17\' as the \'do\' statement which starts the block. Parsing this requires arbitrary look-ahead (e.g., LR(infinity)) which is provided by the following parsec parser, but not by the \'happy\' parser generator. This pre processor is currently quite heavy handed. It replaces \'continue\' in the above program with \'end do\'. E.g., the above program is transformed to: > do j = 1,5 > do 17 i=1,10 > print *,i > 17 end do > end do -} module Language.Fortran.PreProcess ( pre_process , pre_process_fixed_form , parseExpr ) where import Text.ParserCombinators.Parsec hiding (spaces) import System.Environment import Debug.Trace num = many1 digit small = lower <|> char '_' idchar = small <|> upper <|> digit ident = do{ c <- small <|> upper ; cs <- many idchar; return (c:cs) } spaces = many space manyTillEnd p end = scan where scan = (try end) <|> do { x <- p; xs <- scan; return (x:xs) } pre_parser labels = manyTillEnd anyChar (try $ if null labels then try (end_or_start_do labels) <|> (eof >> return "") else end_or_start_do labels) end_or_start_do labels = (try $ doBlock labels) <|> (end_do labels) doBlock labels = do doStr <- string "do" <|> string "DO" updateState (+1) sp <- spaces label <- (try numberedBlock) <|> (do { loop <- loop_control; return (Nothing, loop) }) p <- pre_parser $ (fst label) : labels return $ doStr ++ sp ++ snd label ++ p end_do labels = do label' <- optionMaybe (do {space; n <- num; space; return n}) sp <- spaces lookAhead (end_do_marker <|> continue) ender <- case (labels, label') of ([], _) -> do { ender <- end_do_marker; return $ sp ++ ender } (Nothing:_, _) -> do { ender <- end_do_marker; return $ sp ++ ender } ((Just n):_, Nothing) -> do { ender <- end_do_marker; return $ sp ++ ender } ((Just n):_, Just m) -> if (n==m) then do ender <- end_do_marker <|> continue return $ " " ++ m ++ " " ++ sp ++ ender else -- Labels don't match! -- If the label doesn't appear anywhere in the label stack, -- then this is allowed (e.g. extra 'continue' points) if Just m `notElem` labels then do ender <- end_do_marker <|> continue_non_replace return $ " " ++ m ++ " " ++ sp ++ ender else -- otherwise, we consider the do loops to be not properly bracketted error $ "Ill formed do blocks, labels do not match: " ++ n ++ " and " ++ m ++ " - with label stack " ++ (show labels) level <- getState updateState (\x -> x-1) -- "Level " ++ show level) `trace` ( p <- pre_parser (if labels == [] then [] else tail labels) return $ ender ++ p continue_non_replace = string "continue" <|> string "CONTINUE" continue = do string "continue" <|> string "CONTINUE" return "end do " -- replaces continue with 'end do', this is the goal! end_do_marker = do endStr <- string "end" <|> string "END" sp <- spaces doStr <- string "do" <|> string "DO" return $ endStr ++ sp ++ doStr numberedBlock = do label <- num space sp1 <- spaces comma <- optionMaybe (string ",") sp2 <- spaces loop <- loop_control return $ (Just label, label ++ " " ++ sp1 ++ (maybe "" id comma) ++ sp2 ++ loop) newline' = (try $ do { c <- char '\r'; n <- newline; return [c,n] }) <|> do { n <- newline; return [n] } loop_control = do var <- ident sp1 <- spaces char '=' sp2 <- spaces lower <- num <|> ident sp3 <- spaces char ',' sp4 <- spaces upper <- num <|> ident rest <- manyTillEnd anyChar (try newline') return $ var ++ sp1 ++ "=" ++ sp2 ++ lower ++ sp3 ++ "," ++ sp4 ++ upper ++ rest parseExpr :: String -> String -> String parseExpr file input = case (runParser p (0::Int) "" input) of Left err -> fail $ show err Right x -> x where p = do pos <- getPosition setPosition $ (flip setSourceName) file $ (flip setSourceLine) 1 $ (flip setSourceColumn) 1 $ pos pre_parser [] {- - Change Fortran77 style C, c, and * comments to ! comments. -} processComments :: String -> String processComments source = unlines $ map changeComment $ lines source changeComment :: String -> String changeComment "" = "" changeComment original@(x:xs) | isComment original = '!':xs | otherwise = original isComment :: String -> Bool isComment "" = False isComment (f:_) | f == 'c' = True | f == 'C' = True | f == '*' = True | otherwise = False {- - Old continuation used in fixed form Fortran are specified in column 6 - and are in effect whenever the characeter is not ' ' or '0'. This processing - stage connects those lines to the line before. - - If the continuation line has something else such as a label in its first - 6 columns then an error is thrown. -} processOldContLines :: String -> String processOldContLines source = unlines (eliminateContLines (lines source) 2) eliminateContLines :: [String] -> Integer -> [String] eliminateContLines [] _ = [] eliminateContLines [x] _ = [x] eliminateContLines (l1:l2:rest) lineNumb | length l2 <= 6 = l1:eliminateContLines (l2:rest) (lineNumb + 1) | isContLine == False = l1:eliminateContLines (l2:rest) (lineNumb + 1) | isComment l2 = l1:eliminateContLines (l2:rest) (lineNumb + 1) | isContLine && isFirst5ColsEmpty = eliminateContLines ((removeTrailingWhitespace l1 ++ (statement l2)):rest) (lineNumb + 1) | otherwise = error $ "Cannot preprocess continuation at line " ++ show lineNumb where statement = (\s -> drop 6 s) newLineNumb = lineNumb + 1 first5Cols = take 5 l2 col6 = l2 !! 5 isContLine = col6 /= ' ' && col6 /= '0' isFirst5ColsEmpty = first5Cols == " " removeTrailingWhitespace :: String -> String removeTrailingWhitespace line = reverse $ dropWhile (==' ') $ reverse line pre_process :: String -> String pre_process input = parseExpr "" input pre_process_fixed_form input = parseExpr "" $ processComments $ processOldContLines input go filename = do args <- getArgs srcfile <- readFile filename return $ parseExpr filename srcfile