{-# LANGUAGE TupleSections #-} module Bio.MAE.Parser ( maeP , versionP , blockP , tableP ) where import Bio.MAE.Type (Block (..), Mae (..), MaeValue (..), Table (..)) import Control.Applicative ((<|>)) import Control.Monad (replicateM, when, zipWithM) import Data.Attoparsec.Text (Parser, char, decimal, endOfInput, endOfLine, many', many1', string, takeWhile, takeWhile1) import Data.Char (isSpace) import Data.List (transpose) import Data.Map.Strict (Map) import qualified Data.Map.Strict as M (fromList) import Data.Text (Text) import qualified Data.Text as T (pack, uncons) import qualified Data.Text.Read as TR (decimal, rational, signed) import Prelude hiding (takeWhile) maeP :: Parser Mae maeP = Mae <$> versionP <*> many' blockP <* endOfInput versionP :: Parser Text versionP = inBrackets $ lineP *> delimiterP *> lineP blockP :: Parser Block blockP = uncurry <$> (Block <$> anyStringP <* many' oneSpaceP) <*> inBrackets ((,) <$> fieldsP <*> many' tableP) where fieldsP :: Parser (Map Text MaeValue) fieldsP = do fieldNames <- upToDelimiterP lineP fieldMaeValues <- replicateM (length fieldNames) lineP M.fromList <$> zipWithM (\k v -> (k,) <$> textToMaeValue k v) fieldNames fieldMaeValues textToMaeValue :: Text -> Text -> Parser MaeValue textToMaeValue k v = if v == absentMaeValue then pure Absent else case T.uncons k of Just (c, _) -> getMaeValueReader c v _ -> fail "Absent field name." where absentMaeValue :: Text absentMaeValue = "<>" getMaeValueReader :: Char -> Text -> Parser MaeValue getMaeValueReader 'i' = textToIntMaeValueReader getMaeValueReader 'r' = textToRealMaeValueReader getMaeValueReader 'b' = textToBoolMaeValueReader getMaeValueReader 's' = textToStringMaeValueReader getMaeValueReader _ = const $ fail "Unknown value type." textToIntMaeValueReader :: Text -> Parser MaeValue textToIntMaeValueReader = either fail (pure . IntMaeValue . fst) . TR.signed TR.decimal textToRealMaeValueReader :: Text -> Parser MaeValue textToRealMaeValueReader = either fail (pure . RealMaeValue . fst) . TR.signed TR.rational textToBoolMaeValueReader :: Text -> Parser MaeValue textToBoolMaeValueReader t = case t of "0" -> pure $ BoolMaeValue False "1" -> pure $ BoolMaeValue True _ -> fail "Can't parse bool value." textToStringMaeValueReader :: Text -> Parser MaeValue textToStringMaeValueReader = pure . StringMaeValue tableP :: Parser Table tableP = do name <- many' oneSpaceP *> takeWhile1 (/= leftSquareBracket) numberOfEntries <- char leftSquareBracket *> decimal <* char rightSquareBracket _ <- many' oneSpaceP contents <- inBrackets $ do fieldNames <- upToDelimiterP lineP let readers = fmap textToMaeValue fieldNames entries <- replicateM numberOfEntries $ entryP readers delimiterP pure $ M.fromList $ zip fieldNames $ transpose entries pure $ Table name contents where leftSquareBracket :: Char leftSquareBracket = '[' rightSquareBracket :: Char rightSquareBracket = ']' entryP :: [Text -> Parser MaeValue] -> Parser [MaeValue] entryP readers = do valuesT <- many1' (many' oneSpaceP *> valueTP <* many' oneSpaceP) <* tillEndOfLine when (length readers /= length valuesT - 1) $ fail "Wrong number of values in an entry." zipWithM ($) readers $ drop 1 valuesT -------------------------------------------------------------------------------- -- Utility functions. -------------------------------------------------------------------------------- inBrackets :: Parser a -> Parser a inBrackets p = char leftBracket *> many1' tillEndOfLine *> p <* many' oneSpaceP <* char rightBracket <* many1' tillEndOfLine where leftBracket :: Char leftBracket = '{' rightBracket :: Char rightBracket = '}' delimiterP :: Parser () delimiterP = many' oneSpaceP *> string delimiter *> tillEndOfLine where delimiter :: Text delimiter = ":::" upToDelimiterP :: Parser a -> Parser [a] upToDelimiterP p = ([] <$ delimiterP) <|> ((:) <$> p <*> upToDelimiterP p) oneSpaceP :: Parser Char oneSpaceP = char ' ' anyStringP :: Parser Text anyStringP = takeWhile1 (not . isSpace) valueTP :: Parser Text valueTP = ((<>) <$> string quoteT <*> ((<>) <$> takeWhile (/= quote) <*> string quoteT)) <|> anyStringP where quote :: Char quote = '\"' quoteT :: Text quoteT = T.pack $ pure quote commentaryP :: Parser () commentaryP = () <$ many' (many' oneSpaceP *> char '#' *> takeWhile (`notElem` ['\n', '\r']) *> endOfLine) lineP :: Parser Text lineP = commentaryP *> many' oneSpaceP *> valueTP <* tillEndOfLine <* commentaryP tillEndOfLine :: Parser () tillEndOfLine = () <$ many' oneSpaceP <* endOfLine