module Bio.PDB.StructureBuilder.Internals
--(parse)
where
import Prelude hiding (String)
import qualified Data.ByteString.Char8 as BS hiding (reverse)
import qualified Control.Monad.ST as ST
import Control.Monad.State.Strict as State
import Control.Monad(when)
import Data.STRef as STRef
import Data.Maybe(isNothing, isJust)
import Bio.PDB.EventParser.PDBEvents(PDBEvent(..), RESID(..))
import qualified Bio.PDB.EventParser.PDBEventParser(parsePDBRecords)
import Bio.PDB.Structure
import Bio.PDB.Structure.List as L
type ParsingMonad t a = State.StateT (BState t) (ST.ST t) a
parsePDBRec :: String -> String -> (() -> PDBEvent -> ParsingMonad t ()) -> () -> ParsingMonad t ()
parsePDBRec = Bio.PDB.EventParser.PDBEventParser.parsePDBRecords
parseSerial :: FilePath -> String -> (Structure, List PDBEvent)
parseSerial fname contents = ST.runST $ do initial <- initializeState
(s, e) <- State.evalStateT parsing initial
return (s :: Structure, e :: L.List PDBEvent)
where parsing = do parsePDBRec (BS.pack fname) contents (\() !ev -> parseStep ev) ()
closeStructure
s <- State.gets currentStructure
e <- State.gets errors
e' <- L.finalize e
return (s, e')
data BState s = BState { currentResidue :: Maybe Residue,
currentModel :: Maybe Model,
currentChain :: Maybe Chain,
currentStructure :: Structure,
residueContents :: L.TempList s Atom,
chainContents :: L.TempList s Residue,
modelContents :: L.TempList s Chain,
structureContents :: L.TempList s Model,
errors :: L.TempList s PDBEvent,
lineNo :: STRef.STRef s Int
}
initializeState :: ST.ST t (BState t)
initializeState = do r <- L.initialNew L.residueVectorSize
c <- L.initialNew L.chainVectorSize
m <- L.initialNew 1
s <- L.initialNew 1
e <- L.initialNew 100
l <- STRef.newSTRef 1
return BState { currentResidue = Nothing,
currentModel = Nothing,
currentChain = Nothing,
currentStructure = Structure { models = L.empty },
residueContents = r,
chainContents = c,
modelContents = m,
structureContents = s,
errors = e,
lineNo = l }
checkResidue :: Bio.PDB.EventParser.PDBEvents.RESID -> ParsingMonad t ()
checkResidue (RESID (newName, newChain, newResseq, newInsCode)) =
do checkChain newChain
res <- State.gets currentResidue
when (residueChanged res) $ do closeResidue
l <- L.new L.residueVectorSize
State.modify $! createResidue l
where
residueChanged Nothing = True
residueChanged (Just (Residue { resName = oldResName,
resSeq = oldResSeq ,
insCode = oldInsCode,
atoms = _atoms })) =
(oldResName, oldResSeq, oldInsCode) /= (newName, newResseq, newInsCode)
createResidue l st = st { currentResidue = Just newResidue,
residueContents = l }
newResidue = Bio.PDB.Structure.Residue { resName = newName,
resSeq = newResseq,
insCode = newInsCode,
atoms = L.empty }
checkChain :: Char -> ParsingMonad t ()
checkChain name = do checkModel
curChain <- State.gets currentChain
when (chainChanged curChain) $ do closeChain
l <- L.new L.chainVectorSize
State.modify $ createChain l
where
chainChanged Nothing = True
chainChanged (Just (Chain { chainId = oldChain })) = oldChain /= name
createChain l state = state { currentChain = Just Chain { chainId = name,
residues = L.empty },
chainContents = l }
checkModel :: ParsingMonad t ()
checkModel = do curModel <- State.gets currentModel
when (isNothing curModel) $ openModel defaultModelId
defaultModelId = 1
closeResidue :: ParsingMonad t ()
closeResidue = do r <- State.gets currentResidue
when (isJust r) $ do let Just res = r
rc <- State.gets residueContents
rf <- L.finalize rc
cc <- State.gets chainContents
cc' <- L.add cc $ res { Bio.PDB.Structure.atoms = rf }
State.modify clearResidue
where
clearResidue st = st { currentResidue = Nothing }
closeChain :: ParsingMonad t ()
closeChain = do closeResidue
c <- State.gets currentChain
ac <- State.gets chainContents
when (isJust c) $ do l <- State.gets chainContents
l' <- L.finalize l
let Just ch = c
ch' = ch { Bio.PDB.Structure.residues = l' }
m <- State.gets currentModel
when (isNothing m) $ do mli <- State.gets structureContents
i <- L.tempLength mli
openModel i
addError ["Trying to close chain when currentChain is ",
BS.pack . show $ ch,
" and currentModel is ",
BS.pack . show $ m]
ml <- State.gets modelContents
ml' <- L.add ml ch'
State.modify clearChain
where
clearChain st = st { currentChain = Nothing }
addError :: [String] -> ParsingMonad t ()
addError msg = do e <- State.gets errors
lnref <- State.gets lineNo
ln <- lift $ STRef.readSTRef lnref
lift $ STRef.modifySTRef lnref (+1)
L.add e $ anError ln
where
anError ln = PDBParseError ln 0 $ BS.concat msg
closeModel :: ParsingMonad t ()
closeModel = do closeChain
cm <- State.gets currentModel
case cm of
Nothing -> return ()
Just m -> do mc <- State.gets modelContents
chs <- L.finalize mc
let m' = m { chains = chs }
sc <- State.gets structureContents
State.modify clearModel
L.add sc m'
where clearModel st = st { currentModel = Nothing }
closeStructure :: ParsingMonad t ()
closeStructure = do closeModel
sc <- State.gets structureContents
sc' <- L.finalize sc
State.modify (closeStructure' sc')
where
closeStructure' sc bstate@(BState { currentStructure = aStructure}) =
bstate { currentStructure = aStructure { models = sc },
structureContents = undefined }
nextLine :: ParsingMonad t ()
nextLine = do lnref <- State.gets lineNo
lift $ STRef.modifySTRef lnref (+1)
parseStep pe@(PDBParseError l _ _) = do e <- State.gets errors
L.add e pe
lnref <- State.gets lineNo
lift $ STRef.writeSTRef lnref l
parseStep (ATOM { no = atSer,
atomtype = atType,
restype = resName,
chain = chainName,
resid = resSeq,
resins = resInsCode,
altloc = altloc,
coords = atCoord,
occupancy = atOccupancy,
bfactor = atBFactor,
segid = atSegId,
elt = atElement,
charge = atCharge,
hetatm = isHet
}) =
do checkResidue $ RESID (resName, chainName, resSeq, resInsCode)
reslist <- State.gets residueContents
newAtom `seq` L.add reslist newAtom
nextLine
where newAtom = Atom { atName = atType,
atSerial = atSer,
coord = atCoord,
bFactor = atBFactor,
occupancy = atOccupancy,
element = atElement,
segid = atSegId,
charge = atCharge,
hetatm = isHet
}
parseStep (MODEL { num = n }) = do closeModel
openModel n
nextLine
parseStep ENDMDL = do closeModel
nextLine
parseStep END = do closeModel
nextLine
parseStep (TER {..}) = do closeChain
nextLine
parseStep (MASTER {..}) = do closeModel
nextLine
parseStep _ = nextLine
openModel :: Int -> ParsingMonad t ()
openModel n = do l <- L.new L.defaultSize
State.modify $ changeModel l
where changeModel l st = st { currentModel = Just newModel,
modelContents = l }
newModel = Bio.PDB.Structure.Model { modelId = n,
chains = empty }
parseFinish :: ParsingMonad t (Structure, L.List PDBEvent)
parseFinish = do closeStructure
st <- State.gets currentStructure
er <- State.gets errors
er' <- finalize er
st `seq` return (st, er')