module Biobase.FR3D.Import where
import Control.Arrow
import Data.ByteString.Char8 as BS
import Data.Char
import Data.Iteratee as I
import Data.Iteratee.Char as I
import Data.Iteratee.IO as I
import Data.Iteratee.ListLike as I
import Data.List as L
import Data.Maybe
import System.FilePath.Find as F
import Biobase.FR3D
iFR3D :: (Monad m) => Iteratee ByteString m FR3D
iFR3D = joinI $ enumLinesBS f where
f = do
I.head
I.head
cs <- I.break ((/="#") . BS.take 1)
I.head
xs <- stream2list
return FR3D
{ pdbid = maybe "" (BS.take 4) $ listToMaybe xs
, chains = L.map (second (BS.drop 1) . BS.span isAlpha . BS.drop 2) cs
, basepairs = L.map bs2basepair $ xs
}
bs2basepair :: ByteString -> Basepair
bs2basepair s
| L.length ws /= 10 = error $ "can't parse line: " ++ unpack s
| otherwise = Basepair
{ interaction = ws!!1
, nucleotide1 = BS.head $ ws!!2
, pdbnumber1 = maybe (1) fst . readInt $ ws!!3
, chain1 = ws!!4
, seqpos1 = maybe (1) fst . readInt $ ws!!5
, nucleotide2 = BS.head $ ws!!6
, pdbnumber2 = maybe (1) fst . readInt $ ws!!7
, chain2 = ws!!8
, seqpos2 = maybe (1) fst . readInt $ ws!!9
}
where ws = BS.words s
fromDirSelect :: String -> FilePath -> IO [FR3D]
fromDirSelect select fp = do
fs <- F.find always (fileName ~~? select) fp
mapM (\f -> run =<< enumFile 8192 f iFR3D) fs
fromDirNear = fromDirSelect "*near_interactions_FR3D.txt"
fromDir = fromDirSelect "*basepairs_FR3D.txt"