Source code for meeko.receptor_pdbqt

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Meeko
#

from collections import defaultdict
import json
eol="\n"

import numpy as np
from scipy import spatial

from .utils.covalent_radius_table import covalent_radius
from .utils.autodock4_atom_types_elements import autodock4_atom_types_elements
from .reactive import get_reactive_atype


atom_property_definitions = {'H': 'vdw', 'C': 'vdw', 'A': 'vdw', 'N': 'vdw', 'P': 'vdw', 'S': 'vdw',
                             'Br': 'vdw', 'I': 'vdw', 'F': 'vdw', 'Cl': 'vdw',
                             'NA': 'hb_acc', 'OA': 'hb_acc', 'SA': 'hb_acc', 'OS': 'hb_acc', 'NS': 'hb_acc',
                             'HD': 'hb_don', 'HS': 'hb_don',
                             'Mg': 'metal', 'Ca': 'metal', 'Fe': 'metal', 'Zn': 'metal', 'Mn': 'metal',
                             'MG': 'metal', 'CA': 'metal', 'FE': 'metal', 'ZN': 'metal', 'MN': 'metal',
                             'W': 'water',
                             'G0': 'glue', 'G1': 'glue', 'G2': 'glue', 'G3': 'glue',
                             'CG0': 'glue', 'CG1': 'glue', 'CG2': 'glue', 'CG3': 'glue'}


def _read_receptor_pdbqt_string(pdbqt_string, skip_typing=False):
    atoms = []
    atoms_dtype = [('idx', 'i4'), ('serial', 'i4'), ('name', 'U4'), ('resid', 'i4'),
                   ('resname', 'U3'), ('chain', 'U1'), ("xyz", "f4", (3)),
                   ('partial_charges', 'f4'), ('atom_type', 'U2'),
                   ('alt_id', 'U1'), ('in_code', 'U1'),
                   ('occupancy', 'f4'), ('temp_factor', 'f4'), ('record_type', 'U6')
                  ]
    atom_annotations = {'hb_acc': [], 'hb_don': [],
                        'all': [], 'vdw': [],
                        'metal': []}
    # TZ is a pseudo atom for AutoDock4Zn FF
    pseudo_atom_types = ['TZ']

    idx = 0
    for line in pdbqt_string.split(eol):
        if line.startswith('ATOM') or line.startswith("HETATM"):
            serial = int(line[6:11].strip())
            name = line[12:16].strip()
            resname = line[17:20].strip()
            chainid = line[21].strip()
            resid = int(line[22:26].strip())
            xyz = np.array([line[30:38].strip(), line[38:46].strip(), line[46:54].strip()], dtype=np.float32)
            try:
                partial_charges = float(line[70:76].strip())
            except:
                partial_charges = None # probably reading a PDB, not PDBQT
            atom_type = line[77:79].strip()
            alt_id = line[16:17].strip()
            in_code = line[26:27].strip()
            try:
                occupancy = float(line[54:60])
            except:
                occupancy = None
            try:
                temp_factor = float(line[60:68])
            except:
                temp_factor = None
            record_type = line[0:6].strip()

            if skip_typing:
                atoms.append((idx, serial, name, resid, resname, chainid, xyz, partial_charges, atom_type,
                              alt_id, in_code, occupancy, temp_factor, record_type))
                continue
            if not atom_type in pseudo_atom_types:
                atom_annotations['all'].append(idx)
                atom_annotations[atom_property_definitions[atom_type]].append(idx)
                atoms.append((idx, serial, name, resid, resname, chainid, xyz, partial_charges, atom_type,
                              alt_id, in_code, occupancy, temp_factor, record_type))

            idx += 1
    if idx == 0:
        raise ValueError(f"no atoms found in {pdbqt_string=}") 
    atoms = np.array(atoms, dtype=atoms_dtype)

    return atoms, atom_annotations


def _identify_bonds(atom_idx, positions, atom_types):
    bonds = defaultdict(list)
    KDTree = spatial.cKDTree(positions)
    bond_allowance_factor = 1.1
    # If we ask more than the number of coordinates/element
    # in the BHTree, we will end up with some inf values
    k = 5 if len(atom_idx) > 5 else len(atom_idx)
    atom_idx = np.array(atom_idx)

    for atom_i, position, atom_type in zip(atom_idx, positions, atom_types):
        distances, indices = KDTree.query(position, k=k)
        r_cov = covalent_radius[autodock4_atom_types_elements[atom_type]]

        optimal_distances = [bond_allowance_factor * (r_cov + covalent_radius[autodock4_atom_types_elements[atom_types[i]]]) for i in indices[1:]]
        bonds[atom_i] = atom_idx[indices[1:][np.where(distances[1:] < optimal_distances)]].tolist()

    return bonds


[docs] class PDBQTReceptor: skip_types=("H",) def __init__(self, pdbqt_string, skip_typing=False): self._pdbqt_filename = None self._atoms = None self._atom_annotations = None self._KDTree = None self._atoms, self._atom_annotations = _read_receptor_pdbqt_string(pdbqt_string, skip_typing) # We add to the KDTree only the rigid part of the receptor self._KDTree = spatial.cKDTree(self._atoms['xyz']) self._bonds = _identify_bonds(self._atom_annotations['all'], self._atoms['xyz'], self._atoms['atom_type']) self.atom_idxs_by_res = self.get_atom_indices_by_residue(self._atoms)
[docs] @classmethod def from_pdbqt_filename(cls, pdbqt_filename, skip_typing=False): with open(pdbqt_filename) as f: pdbqt_string = f.read() receptor = cls(pdbqt_string, skip_typing) receptor._pdbqt_filename = pdbqt_filename return receptor
def __repr__(self): if self._pdbqt_filename is None: msg = '<Receptor containing %d atoms>' % self._atoms.shape[0] else: msg ='<Receptor from PDBQT file %s containing %d atoms>' % (self._pdbqt_filename, self._atoms.shape[0]) return msg
[docs] @staticmethod def get_atom_indices_by_residue(atoms): """ return a dictionary where residues are keys and values are lists of atom indices >>> atom_idx_by_res = {("A", "LYS", 417): [0, 1, 2, 3, ..., 8]} """ atom_idx_by_res = {} for atom_index, atom in enumerate(atoms): res_id = (atom["chain"], atom["resname"], atom["resid"]) atom_idx_by_res.setdefault(res_id, []) atom_idx_by_res[res_id].append(atom_index) return atom_idx_by_res
[docs] def atoms(self, atom_idx=None): """Return the atom i Args: atom_idx (int, list): index of one or multiple atoms Returns: ndarray: 2d ndarray (atom_id, atom_name, resname, resid, chainid, xyz, q, t) """ if atom_idx is not None and self._atoms.size > 1: if not isinstance(atom_idx, (list, tuple, np.ndarray)): atom_idx = np.array(atom_idx, dtype=int) atoms = self._atoms[atom_idx] else: atoms = self._atoms return atoms.copy()
[docs] def positions(self, atom_idx=None): """Return coordinates (xyz) of all atoms or a certain atom Args: atom_idx (int, list): index of one or multiple atoms (0-based) Returns: ndarray: 2d ndarray of coordinates (xyz) """ return np.atleast_2d(self.atoms(atom_idx)['xyz'])
[docs] def closest_atoms_from_positions(self, xyz, radius, atom_properties=None, ignore=None): """Retrieve indices of the closest atoms around a positions/coordinates at a certain radius. Args: xyz (np.ndarray): array of 3D coordinates raidus (float): radius atom_properties (str): property of the atoms to retrieve (properties: ligand, flexible_residue, vdw, hb_don, hb_acc, metal, water, reactive, glue) ignore (int or list): ignore atom for the search using atom id (0-based) Returns: ndarray: 2d ndarray (atom_id, atom_name, resname, resid, chainid, xyz, q, t) """ index = self._KDTree.query_ball_point(xyz, radius, p=2, return_sorted=True) # When nothing was found around... if not index: return np.array([]) # Handle the case when positions for of only one atom was passed in the input try: index = {i for j in index for i in j} except: index = set(index) if atom_properties is not None: if not isinstance(atom_properties, (list, tuple)): atom_properties = [atom_properties] try: for atom_property in atom_properties: index.intersection_update(self._atom_annotations[atom_property]) except: error_msg = 'Atom property %s is not valid. Valid atom properties are: %s' raise KeyError(error_msg % (atom_property, self._atom_annotations.keys())) if ignore is not None: if not isinstance(ignore, (list, tuple, np.ndarray)): ignore = [ignore] index = index.difference([i for i in ignore]) index = list(index) atoms = self._atoms[index].copy() return atoms
[docs] def closest_atoms(self, atom_idx, radius, atom_properties=None): """Retrieve indices of the closest atoms around a positions/coordinates at a certain radius. Args: atom_idx (int, list): index of one or multiple atoms (0-based) raidus (float): radius atom_properties (str or list): property of the atoms to retrieve (properties: ligand, flexible_residue, vdw, hb_don, hb_acc, metal, water, reactive, glue) Returns: ndarray: 2d ndarray (atom_id, atom_name, resname, resid, chainid, xyz, q, t) """ return self.closest_atoms_from_positions(self._atoms[atom_idx]['xyz'], radius, atom_properties, atom_idx)
[docs] def neighbor_atoms(self, atom_idx): """Return neighbor (bonded) atoms Args: atom_idx (int, list): index of one or multiple atoms (0-based) Returns: list_of_list: list of lists containing the neighbor (bonded) atoms (0-based) """ if not isinstance(atom_idx, (list, tuple, np.ndarray)): atom_idx = [atom_idx] return [self._bonds[i] for i in atom_idx]