-- | Import PAM/BLOSUM substituion matrices.

module Biobase.SubstMatrix.Import where

import           Control.Applicative
import           Control.Monad.Except
import           Control.Monad.IO.Class
import           Data.ByteString.Char8 (ByteString,unpack)
import           Data.Char (toLower)
import qualified Data.ByteString.Char8 as BS
import qualified Data.Map as M
import           System.Directory (doesFileExist)

import           Biobase.Primary.AA (charAA)
import           Biobase.Primary.Letter (getLetter,LimitType(..))
import           Data.PrimitiveArray hiding (map)
import           Numeric.Discretized
import qualified Biobase.Primary.AA as AA
import           Statistics.Odds

import           Biobase.SubstMatrix.Types



-- | Import substituion matrix from a bytestring.
--
-- TODO the parser is fragile, since it uses @read@. This should be fixed.

fromByteString  (MonadError String m)  ByteString  m (AASubstMat t (DiscLogOdds k) a)
fromByteString bs = do
  let (x:xs) = dropWhile (("#"==).take 1) . lines $ unpack bs
  let cs = map head . words $ x -- should give us the characters encoding an amino acid
  let ss = map (map (DiscLogOdds . Discretized) . map read . drop 1 . words) $ xs
  let xs = [ ((Z:.charAA k1:.charAA k2),z)
           | (k1,s) <- zip cs ss
           , (k2,z) <- zip cs s
           ]
  return . AASubstMat $ fromAssocs (ZZ:..LtLetter AA.Z:..LtLetter AA.Z) (DiscLogOdds . Discretized $ -999) xs

-- | Import substitution matrix from file.

fromFile  (MonadIO m, MonadError String m)  FilePath  m (AASubstMat t (DiscLogOdds k) a)
fromFile fname = liftIO (BS.readFile fname) >>= fromByteString