{-# OPTIONS_GHC -fno-warn-orphans #-}
module Bio.PDB
  ( modelsFromPDBText, modelsToPDBText
  , modelsFromPDBFile, modelsToPDBFile
  ) where

import           Bio.PDB.BondRestoring  (residueID,
                                         restoreChainLocalBonds,
                                         restoreModelGlobalBonds)
import           Bio.PDB.Functions      (groupChainByResidue)
import           Bio.PDB.Reader         (PDBWarnings, fromTextPDB)
import qualified Bio.PDB.Type           as PDB
import           Bio.PDB.Writer         (pdbToFile, pdbToText)
import           Bio.Structure         (Residue(..),
                                        Bond,
                                        Atom(..),
                                        LocalID,
                                        GlobalID(GlobalID, getGlobalID),
                                        SecondaryStructure(Undefined),
                                        Chain(..),
                                        Model(Model, modelChains),
                                        StructureSerializable(..),
                                        StructureModels(..))
import qualified Bio.Utils.Map          as M ((!?!))
import qualified Bio.Utils.Vector       as V ((!?!))
import           Control.Arrow          ((&&&))
import           Control.Lens           ((^.))
import           Control.Monad          (join)
import           Control.Monad.IO.Class (MonadIO, liftIO)
import           Data.List              (sort)
import           Data.Map.Strict        (Map)
import qualified Data.Map.Strict        as M (fromList)
import           Data.Maybe             (fromMaybe)
import           Data.Text              (Text)
import qualified Data.Text              as T (head, pack, singleton, strip,
                                              unpack)
import           Data.Text.IO           as TIO (readFile)
import           Data.Vector            (Vector)
import qualified Data.Vector            as V (toList,
                                              concat,
                                              fromList,
                                              length)
import           Linear.V3              (V3 (..), _x, _y, _z)
import           Text.Read              (readMaybe)

instance StructureModels PDB.PDB where
    modelsOf :: PDB -> Vector Model
modelsOf PDB.PDB {Text
Map RemarkCode RemarkData
Map FieldType RemarkData
Vector Model
otherFields :: PDB -> Map FieldType RemarkData
remarks :: PDB -> Map RemarkCode RemarkData
models :: PDB -> Vector Model
title :: PDB -> Text
otherFields :: Map FieldType RemarkData
remarks :: Map RemarkCode RemarkData
models :: Vector Model
title :: Text
..} = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Model -> Model
mkModel Vector Model
models
      where
        mkModel :: PDB.Model -> Model
        mkModel :: Model -> Model
mkModel Model
model = if forall (t :: * -> *) a. Foldable t => t a -> Int
length Map Atom Int
atomToNilBasedIndex forall a. Eq a => a -> a -> Bool
== forall (t :: * -> *) a. Foldable t => t a -> Int
length [Atom]
allModelAtoms
                          then Vector Chain -> Vector (Bond GlobalID) -> Model
Model (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Chain -> Chain
mkChain Model
model) (Map Atom Int -> Model -> Vector (Bond GlobalID)
restoreModelGlobalBonds Map Atom Int
atomToNilBasedIndex Model
model)
                          else forall a. HasCallStack => [Char] -> a
error [Char]
"Mapping from PDB id to nil based index must be a bijection."
          where
            atomToNilBasedIndex :: Map PDB.Atom Int
            atomToNilBasedIndex :: Map Atom Int
atomToNilBasedIndex = forall k a. Ord k => [(k, a)] -> Map k a
M.fromList forall a b. (a -> b) -> a -> b
$ [Atom]
allModelAtoms forall a b. [a] -> [b] -> [(a, b)]
`zip` [Int
0..]

            allModelAtoms :: [PDB.Atom]
            allModelAtoms :: [Atom]
allModelAtoms = forall a. Ord a => [a] -> [a]
sort forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Vector a -> [a]
V.toList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [Vector a] -> Vector a
V.concat forall a b. (a -> b) -> a -> b
$ forall a. Vector a -> [a]
V.toList Model
model

            mkChain :: PDB.Chain -> Chain
            mkChain :: Chain -> Chain
mkChain = forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Text -> Vector Residue -> Chain
Chain forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Chain -> Text
mkChainName forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& Chain -> Vector Residue
mkChainResidues)

            mkChainName :: PDB.Chain -> Text
            mkChainName :: Chain -> Text
mkChainName = Char -> Text
T.singleton forall b c a. (b -> c) -> (a -> b) -> a -> c
. Atom -> Char
PDB.atomChainID forall b c a. (b -> c) -> (a -> b) -> a -> c
. Chain -> Atom
safeFirstAtom

            mkChainResidues :: PDB.Chain -> Vector Residue
            mkChainResidues :: Chain -> Vector Residue
mkChainResidues Chain
chain = forall a. [a] -> Vector a
V.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Map Text (Vector (Bond LocalID)) -> [Atom] -> Residue
mkResidue (Chain -> Map Text (Vector (Bond LocalID))
restoreChainLocalBonds Chain
chain)) forall a b. (a -> b) -> a -> b
$ Chain -> [[Atom]]
groupChainByResidue Chain
chain

            safeFirstAtom :: Vector PDB.Atom -> PDB.Atom
            safeFirstAtom :: Chain -> Atom
safeFirstAtom Chain
arr | forall a. Vector a -> Int
V.length Chain
arr forall a. Ord a => a -> a -> Bool
> Int
0 = Chain
arr forall a. (HasCallStack, Show a) => Vector a -> Int -> a
V.!?! Int
0
                              | Bool
otherwise        = forall a. HasCallStack => [Char] -> a
error [Char]
"Could not pick first atom"

            mkResidue :: Map Text (Vector (Bond LocalID)) -> [PDB.Atom] -> Residue
            mkResidue :: Map Text (Vector (Bond LocalID)) -> [Atom] -> Residue
mkResidue Map Text (Vector (Bond LocalID))
_ []    = forall a. HasCallStack => [Char] -> a
error [Char]
"Cound not make residue from empty list"
            mkResidue Map Text (Vector (Bond LocalID))
localBondsMap [Atom]
atoms' = Text
-> Int
-> Char
-> Vector Atom
-> Vector (Bond LocalID)
-> SecondaryStructure
-> Text
-> Residue
Residue (Text -> Text
T.strip forall a b. (a -> b) -> a -> b
$ Atom -> Text
PDB.atomResName Atom
firstResidueAtom)
                                                     (Atom -> Int
PDB.atomResSeq Atom
firstResidueAtom)
                                                     (Atom -> Char
PDB.atomICode Atom
firstResidueAtom)
                                                     (forall a. [a] -> Vector a
V.fromList forall a b. (a -> b) -> a -> b
$ Atom -> Atom
mkAtom forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Atom]
atoms')
                                                     (Map Text (Vector (Bond LocalID))
localBondsMap forall k a.
(HasCallStack, Ord k, Show k, Show a) =>
Map k a -> k -> a
M.!?! Atom -> Text
residueID Atom
firstResidueAtom)
                                                     SecondaryStructure
Undefined -- now we do not read secondary structure
                                                     Text
""        -- chemical component type?!
              where
                firstResidueAtom :: Atom
firstResidueAtom = forall a. [a] -> a
head [Atom]
atoms'

            mkAtom :: PDB.Atom -> Atom
            mkAtom :: Atom -> Atom
mkAtom atom :: Atom
atom@PDB.Atom{Char
Float
Int
Text
atomCharge :: Atom -> Text
atomElement :: Atom -> Text
atomTempFactor :: Atom -> Float
atomOccupancy :: Atom -> Float
atomZ :: Atom -> Float
atomY :: Atom -> Float
atomX :: Atom -> Float
atomAltLoc :: Atom -> Char
atomName :: Atom -> Text
atomSerial :: Atom -> Int
atomCharge :: Text
atomElement :: Text
atomTempFactor :: Float
atomOccupancy :: Float
atomZ :: Float
atomY :: Float
atomX :: Float
atomICode :: Char
atomResSeq :: Int
atomChainID :: Char
atomResName :: Text
atomAltLoc :: Char
atomName :: Text
atomSerial :: Int
atomICode :: Atom -> Char
atomResSeq :: Atom -> Int
atomResName :: Atom -> Text
atomChainID :: Atom -> Char
..} = GlobalID
-> Int -> Text -> Text -> V3 Float -> Int -> Float -> Float -> Atom
Atom (Int -> GlobalID
GlobalID forall a b. (a -> b) -> a -> b
$ Map Atom Int
atomToNilBasedIndex forall k a.
(HasCallStack, Ord k, Show k, Show a) =>
Map k a -> k -> a
M.!?! Atom
atom)
                                       Int
atomSerial
                                       (Text -> Text
T.strip Text
atomName)
                                       Text
atomElement
                                       (forall a. a -> a -> a -> V3 a
V3 Float
atomX Float
atomY Float
atomZ)
                                       (forall a. a -> Maybe a -> a
fromMaybe Int
0 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Read a => [Char] -> Maybe a
readMaybe forall a b. (a -> b) -> a -> b
$ Text -> [Char]
T.unpack Text
atomCharge)
                                       Float
atomTempFactor
                                       Float
atomOccupancy

modelsFromPDBFile :: (MonadIO m) => FilePath -> m (Either Text ([PDBWarnings], Vector Model))
modelsFromPDBFile :: forall (m :: * -> *).
MonadIO m =>
[Char] -> m (Either Text ([PDBWarnings], Vector Model))
modelsFromPDBFile = forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IO Text
TIO.readFile

modelsFromPDBText :: Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText :: Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText Text
pdbText = do
  ([PDBWarnings]
warnings, PDB
parsedPDB) <- Text -> Either Text ([PDBWarnings], PDB)
fromTextPDB Text
pdbText
  let models :: Vector Model
models = forall a. StructureModels a => a -> Vector Model
modelsOf PDB
parsedPDB
  forall (f :: * -> *) a. Applicative f => a -> f a
pure ([PDBWarnings]
warnings, Vector Model
models)

instance StructureSerializable PDB.PDB where
  serializeModels :: Vector Model -> PDB
serializeModels Vector Model
models = Text
-> Vector Model
-> Map RemarkCode RemarkData
-> Map FieldType RemarkData
-> PDB
PDB.PDB Text
"Serialized model" Vector Model
pdbModels forall a. Monoid a => a
mempty forall a. Monoid a => a
mempty
    where
      pdbModels :: Vector Model
pdbModels = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Model -> Model
toPDBModel Vector Model
models

      toPDBModel :: Model -> PDB.Model
      toPDBModel :: Model -> Model
toPDBModel = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Chain -> Chain
toPDBChain forall b c a. (b -> c) -> (a -> b) -> a -> c
. Model -> Vector Chain
modelChains

      toPDBChain :: Chain -> PDB.Chain
      toPDBChain :: Chain -> Chain
toPDBChain Chain
ch = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Chain, Residue, Atom) -> Atom
toPDBAtom forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *) a. Monad m => m (m a) -> m a
join forall a b. (a -> b) -> a -> b
$ (\Residue
r -> forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((,,) Chain
ch Residue
r) forall a b. (a -> b) -> a -> b
$ Residue -> Vector Atom
resAtoms Residue
r) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Chain -> Vector Residue
chainResidues Chain
ch

      toPDBAtom :: (Chain, Residue, Atom) -> PDB.Atom
      toPDBAtom :: (Chain, Residue, Atom) -> Atom
toPDBAtom (Chain{Text
Vector Residue
chainName :: Chain -> Text
chainResidues :: Vector Residue
chainName :: Text
chainResidues :: Chain -> Vector Residue
..}, Residue{Char
Int
Text
Vector (Bond LocalID)
Vector Atom
SecondaryStructure
resChemCompType :: Residue -> Text
resSecondary :: Residue -> SecondaryStructure
resBonds :: Residue -> Vector (Bond LocalID)
resInsertionCode :: Residue -> Char
resNumber :: Residue -> Int
resName :: Residue -> Text
resChemCompType :: Text
resSecondary :: SecondaryStructure
resBonds :: Vector (Bond LocalID)
resAtoms :: Vector Atom
resInsertionCode :: Char
resNumber :: Int
resName :: Text
resAtoms :: Residue -> Vector Atom
..}, Atom{Float
Int
Text
V3 Float
GlobalID
occupancy :: Atom -> Float
bFactor :: Atom -> Float
formalCharge :: Atom -> Int
atomCoords :: Atom -> V3 Float
atomElement :: Atom -> Text
atomName :: Atom -> Text
atomInputIndex :: Atom -> Int
atomId :: Atom -> GlobalID
occupancy :: Float
bFactor :: Float
formalCharge :: Int
atomCoords :: V3 Float
atomElement :: Text
atomName :: Text
atomInputIndex :: Int
atomId :: GlobalID
..}) = Atom
res
        where
          res :: Atom
res =
            Int
-> Text
-> Char
-> Text
-> Char
-> Int
-> Char
-> Float
-> Float
-> Float
-> Float
-> Float
-> Text
-> Text
-> Atom
PDB.Atom
              (GlobalID -> Int
getGlobalID GlobalID
atomId forall a. Num a => a -> a -> a
+ Int
1)
              Text
atomName
              Char
nullAltLoc
              Text
resName
              (Text -> Char
T.head Text
chainName)
              Int
resNumber
              Char
resInsertionCode
              (V3 Float
atomCoords forall s a. s -> Getting a s a -> a
^. forall (t :: * -> *) a. R1 t => Lens' (t a) a
_x)
              (V3 Float
atomCoords forall s a. s -> Getting a s a -> a
^. forall (t :: * -> *) a. R2 t => Lens' (t a) a
_y)
              (V3 Float
atomCoords forall s a. s -> Getting a s a -> a
^. forall (t :: * -> *) a. R3 t => Lens' (t a) a
_z)
              Float
occupancy
              Float
bFactor
              Text
atomElement
              ([Char] -> Text
T.pack forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> [Char]
show Int
formalCharge)

          nullAltLoc :: Char
          nullAltLoc :: Char
nullAltLoc = Char
' '

-- | Writes models to the given path as PDB.
--
modelsToPDBFile :: MonadIO m => Vector Model -> FilePath -> m ()
modelsToPDBFile :: forall (m :: * -> *). MonadIO m => Vector Model -> [Char] -> m ()
modelsToPDBFile Vector Model
models = forall (m :: * -> *). MonadIO m => PDB -> [Char] -> m ()
pdbToFile forall a b. (a -> b) -> a -> b
$ forall a. StructureSerializable a => Vector Model -> a
serializeModels Vector Model
models

-- | Converts models to their 'Text' representation as PDB.
--
modelsToPDBText :: Vector Model -> Text
modelsToPDBText :: Vector Model -> Text
modelsToPDBText = PDB -> Text
pdbToText forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. StructureSerializable a => Vector Model -> a
serializeModels