-- | Parse RNAz output -- For more information on RNAz consult: String -> String -> IO ExitCode systemRNAz options inputFilePath outputFilePath = system ("RNAz " ++ options ++ " " ++ inputFilePath ++ " >" ++ outputFilePath) -- | Parse the input as RNAz datatype genParseRNAz :: GenParser Char st RNAz genParseRNAz = do char '\n' many1 (oneOf "# ") string "RNAz" space _version <- many1 (noneOf " ") space space many1 (char '#') newline space _sequences <- parseRNAzIntField "Sequences:" _columns <- parseRNAzIntField "Columns:" _readingDirection <- parseRNAzStringField "Reading direction:" _meanPairwiseIdentity <- parseRNAzDoubleField "Mean pairwise identity:" _shannonEntropy <- parseRNAzDoubleField "Shannon entropy:" _gcContent <- parseRNAzDoubleField "G+C content:" _meanSingleSequenceMFE <- parseRNAzDoubleField "Mean single sequence MFE:" _consensusMFE <- parseRNAzDoubleField "Consensus MFE:" _energyContribution <- parseRNAzDoubleField "Energy contribution:" _covarianceContribution <- parseRNAzDoubleField "Covariance contribution:" _combinationsPair <- parseRNAzDoubleField "Combinations/Pair:" _meanZScore <- parseRNAzDoubleField "Mean z-score:" _structureConservationIndex <- parseRNAzDoubleField "Structure conservation index:" _backgroundModel <- parseRNAzStringField "Background model:" _decisionModel <- parseRNAzStringField "Decision model:" _svmDecisionValue <- parseRNAzDoubleField "SVM decision value:" _svmRNAClassProbability <- parseRNAzDoubleField "SVM RNA-class probability:" _prediction <- parseRNAzStringField "Prediction:" _ <- many (try (parseRNAzStringField "WARNING:")) newline many1 (char '#') newline newline _rnaZResults <- many1 (try parseRNAzResult) _rnaZConsensus <- parseRNAzConsensus return $ RNAz _version _sequences _columns _readingDirection _meanPairwiseIdentity _shannonEntropy _gcContent _meanSingleSequenceMFE _consensusMFE _energyContribution _covarianceContribution _combinationsPair _meanZScore _structureConservationIndex _backgroundModel _decisionModel _svmDecisionValue _svmRNAClassProbability _prediction _rnaZResults _rnaZConsensus -- | Parse a RNAz field containing a Double parseRNAzDoubleField :: String -> GenParser Char st Double parseRNAzDoubleField fieldname = do optional space string fieldname many1 space double <- (many1 (noneOf " ")) space return $ (readDouble double) -- | Parse a RNAz field containing a String parseRNAzStringField :: String -> GenParser Char st String parseRNAzStringField fieldname = do optional space string fieldname space stringField <- many1 (noneOf "\n") space return $ stringField -- | Parse a RNAz field containing a Int parseRNAzIntField :: String -> GenParser Char st Int parseRNAzIntField fieldname = do optional space string fieldname space int <- many1 (noneOf " ") space return $ (readInt int) -- | Parse a RNAz result parseRNAzResult :: GenParser Char st RNAzResult parseRNAzResult = do _header <- many1 (noneOf "\n") newline notFollowedBy (string (">consensus")) _resultSequence <- parseNucleotideAlignmentEntry newline _dotBracket <- many1 (oneOf "-().,") space char ('(') space _mfe <- many1 (noneOf ",") char ',' space string ("z-score") space char '=' space _zscore <- many1 (noneOf ",") char ',' space _zScoreCalculationApproach <- choice [char 'S', char 'R'] --oneOf "RS" char (')') newline return $ RNAzResult _header _resultSequence _dotBracket (readDouble _mfe) (readDouble _zscore) _zScoreCalculationApproach -- | Parse the consenus of RNAz results parseRNAzConsensus :: GenParser Char st RNAzConsensus parseRNAzConsensus = do string (">consensus") newline _consensusSequence <- parseNucleotideAlignmentEntry newline _dotBracket <- many1 (oneOf "().,") many (try (char ' ')) char '(' many (try (char ' ')) many1 (oneOf "-1234567890.") many (try (char ' ')) char '=' many (try (char ' ')) many1 (oneOf "-1234567890.") many (try (char ' ')) char '+' many (try (char ' ')) many1 (noneOf ")") char ')' many (try (char ' ')) newline eof return $ RNAzConsensus _consensusSequence _dotBracket -- | parse RNAz from input string parseRNAz :: [Char] -> Either ParseError RNAz parseRNAz input = parse genParseRNAz "parseRNAz" input -- | parse from input filePath readRNAz :: String -> IO (Either ParseError RNAz) readRNAz filePath = do parsedFile <- parseFromFile genParseRNAz filePath CE.evaluate parsedFile