Source code for meeko.molecule_pdbqt

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Meeko
#

import logging
import os
from collections import defaultdict

import numpy as np
from scipy import spatial

from .utils.covalent_radius_table import covalent_radius
from .utils.autodock4_atom_types_elements import autodock4_atom_types_elements

logger = logging.getLogger(__name__)

atom_property_definitions = {'H': 'vdw', 'C': 'vdw', 'A': 'vdw', 'N': 'vdw', 'P': 'vdw', 'S': 'vdw',
                             'Br': 'vdw', 'I': 'vdw', 'F': 'vdw', 'Cl': 'vdw',
                             'NA': 'hb_acc', 'OA': 'hb_acc', 'SA': 'hb_acc', 'OS': 'hb_acc', 'NS': 'hb_acc',
                             'HD': 'hb_don', 'HS': 'hb_don',
                             'Mg': 'metal', 'Ca': 'metal', 'Fe': 'metal', 'Zn': 'metal', 'Mn': 'metal',
                             'MG': 'metal', 'CA': 'metal', 'FE': 'metal', 'ZN': 'metal', 'MN': 'metal',
                             'W': 'water',
                             'G0': 'glue', 'G1': 'glue', 'G2': 'glue', 'G3': 'glue',
                             'CG0': 'glue', 'CG1': 'glue', 'CG2': 'glue', 'CG3': 'glue'}


def _read_ligand_pdbqt_file(pdbqt_string, poses_to_read=-1, energy_range=-1, is_dlg=False, skip_typing=False):
    """
    Read PDBQT file and return the atoms, positions and annotations. 

    Parameters
    ----------
    pdbqt_string : str
        PDBQT string. 
    poses_to_read : int
        Number of poses to read. 
        Default is -1 (read all poses). 
    energy_range : float
        Read docked poses until the maximum energy difference from best pose is reach, for example 2.5 kcal/mol. 
        Default is -1 (read all poses). 
    is_dlg : bool
        Input file is in dlg (AutoDock docking log) format. 
    skip_typing : bool
        Flag indicating that atomtyping should be skipped. 
    
    Returns
    -------
    atoms : ndarray
        Array of atoms. 
    positions : ndarray
        Array of positions. 
    atom_annotations : dict
        Dictionary of atom annotations. 
    pose_data : dict
        Dictionary of pose data. 
    """
    i = 0
    n_poses = 0
    previous_serial = 0
    tmp_positions = []
    tmp_atoms = []
    tmp_actives = []
    tmp_pdbqt_string = ''
    water_indices = {*()}  
    location = 'ligand'
    energy_best_pose = None
    is_first_pose = True
    is_model = False
    mol_index = -1 # incremented for each ROOT keyword
    atoms_dtype = [('idx', 'i4'), ('serial', 'i4'), ('name', 'U4'), ('resid', 'i4'),
                   ('resname', 'U3'), ('chain', 'U1'), ('xyz', 'f4', (3)),
                   ('partial_charges', 'f4'), ('atom_type', 'U3')]

    atoms = None
    positions = []

    # flexible_residue is for atoms between BEGIN_RES and END_RES keywords, ligand otherwise.
    # flexres assigned "ligand" if BEGIN/END RES keywords are missing
    # mol_index distinguishes different ligands and flexres because ROOT keyword increments mol_index  
    atom_annotations = {'ligand': [], 'flexible_residue': [], 'water': [],
                        'hb_acc': [], 'hb_don': [],
                        'all': [], 'vdw': [],
                        'glue': [], 'reactive': [], 'metal': [],
                        'mol_index': {},
                        }
    pose_data = {
        'n_poses': None,
        'active_atoms': [],
        'free_energies': [], 
        'intermolecular_energies': [],
        'internal_energies': [],
        'index_map': {},
        'pdbqt_string': [],
        'smiles': {},
        'smiles_index_map': {},
        'smiles_h_parent': {},
        'cluster_id': [],
        'rank_in_cluster': [],
        'cluster_leads_sorted': [],
        'cluster_size': [],
        "mol_index_to_flexible_residue": {},
    }

    tmp_cluster_data = {}

    buffer_index_map = {}
    buffer_smiles = None
    buffer_smiles_index_map = []
    buffer_smiles_h_parent = []
    buffer_flexres_id = None

    lines = pdbqt_string.split('\n')
    if len(lines[-1]) == 0: lines = lines[:-1]
    lines = [line + '\n' for line in lines]
    for line in lines:
        if is_dlg:
            if line.startswith('DOCKED'):
                line = line[8:]
            # parse clustering
            elif line.endswith('RANKING\n'):
                fields = line.split()
                cluster_id = int(fields[0])
                subrank = int(fields[1])
                run_id = int(fields[2])
                tmp_cluster_data[run_id] = (cluster_id, subrank)
            else:
                continue

        if not line.startswith(('MODEL', 'ENDMDL')):
            """This is very lazy I know...
            But would you rather spend time on rebuilding the whole torsion tree and stuff
            for writing PDBQT files or drinking margarita? Energy was already spend to build
            that, so let's re-use it!"""
            tmp_pdbqt_string += line

        if line.startswith('MODEL'):
            # Reinitialize variables
            i = 0
            previous_serial = 0
            tmp_positions = []
            tmp_atoms = []
            tmp_actives = []
            tmp_pdbqt_string = ''
            is_model = True
            mol_index = -1 # incremented for each ROOT keyword
        elif line.startswith('ATOM') or line.startswith("HETATM"):
            serial = int(line[6:11].strip())
            name = line[12:16].strip()
            resname = line[17:20].strip()
            chainid = line[21].strip()
            resid = int(line[22:26].strip())
            xyz = np.array([line[30:38].strip(), line[38:46].strip(), line[46:54].strip()], dtype=float)
            try:
                # PDBQT files from dry.py script are stripped from their partial charges. sigh...
                partial_charges = float(line[70:76].strip())
            except:
                partial_charges = 0.0
            atom_type = line[77:-1].strip()

            """ We are looking for gap in the serial atom numbers. Usually if they
            are not following it means that atoms are missing. This will happen with
            water molecules after using dry.py, only non-overlapping water molecules
            are kept. Also if the current serial becomes suddenly inferior than the
            previous and equal to 1, it means that we are now in another molecule/flexible 
            residue. So here we are adding dummy atoms
            """
            if (previous_serial + 1 != serial) and not (serial < previous_serial and serial == 1):
                diff = serial - previous_serial - 1
                for _ in range(diff):
                    xyz_nan = [999.999, 999.999, 999.999]
                    tmp_atoms.append((i, 9999, 'XXXX', 9999, 'XXX', 'X', xyz_nan, 999.999, 'XX'))
                    tmp_positions.append(xyz_nan)
                    i += 1

            # Once it is done, we can return to a normal life... and add existing atoms
            tmp_atoms.append((i, serial, name, resid, resname, chainid, xyz, partial_charges, atom_type))
            tmp_positions.append(xyz)
            tmp_actives.append(i)

            if is_first_pose:
                atom_annotations["mol_index"].setdefault(mol_index, [])
                atom_annotations["mol_index"][mol_index].append(i)
                # We store water idx separately from the rest since their number can be variable
                if atom_type != 'W':
                    atom_annotations[location].append(i)
                    atom_annotations['all'].append(i)
                    if not skip_typing:
                        atom_annotations[atom_property_definitions[atom_type]].append(i)

            if atom_type == 'W':
                water_indices.update([i])

            previous_serial = serial
            i += 1
        elif line.startswith("ROOT") and is_first_pose:
            mol_index += 1
            # buffers needed because REMARKS preceeds ROOT
            pose_data["index_map"][mol_index] = buffer_index_map
            pose_data["smiles"][mol_index] = buffer_smiles
            pose_data["smiles_index_map"][mol_index] = buffer_smiles_index_map
            pose_data["smiles_h_parent"][mol_index] = buffer_smiles_h_parent
            pose_data["mol_index_to_flexible_residue"][mol_index] = buffer_flexres_id
            buffer_index_map = {}
            buffer_smiles = None
            buffer_smiles_index_map = []
            buffer_smiles_h_parent = []
            buffer_flexres_id = None
        elif line.startswith('REMARK INDEX MAP') and is_first_pose:
            integers = [int(integer) for integer in line.split()[3:]]
            if len(integers) % 2 == 1:
                raise RuntimeError("Number of indices in INDEX MAP is odd")
            for j in range(int(len(integers) / 2)): 
                buffer_index_map[integers[j*2]] = integers[j*2 + 1]
        elif line.startswith('REMARK SMILES IDX') and is_first_pose:
            integers = [int(integer) for integer in line.split()[3:]]
            if len(integers) % 2 == 1:
                raise RuntimeError("Number of indices in SMILES IDX is odd")
            buffer_smiles_index_map.extend(integers)
        elif line.startswith('REMARK H PARENT') and is_first_pose:
            integers = [int(integer) for integer in line.split()[3:]]
            if len(integers) % 2 == 1:
                raise RuntimeError("Number of indices in H PARENT is odd")
            buffer_smiles_h_parent.extend(integers)
        elif line.startswith('REMARK SMILES') and is_first_pose: # must check after SMILES IDX
            buffer_smiles = line.split()[2]
        elif line.startswith('REMARK VINA RESULT') or line.startswith('USER    Estimated Free Energy of Binding    ='):
            # Read free energy from output PDBQT files
            try:
                # Vina
                energy = float(line.split()[3])
            except:
                # AD4
                energy = float(line[45:].split()[0]) # no guarantee of space between = and number

            if energy_best_pose is None:
                energy_best_pose = energy
            energy_current_pose = energy

            # NOTE this assumes poses are sorted by increasing energy
            diff_energy = energy_current_pose - energy_best_pose
            if (energy_range <= diff_energy and energy_range != -1):
                break

            pose_data['free_energies'].append(energy)
        elif not is_dlg and line.startswith('REMARK INTER:'):
            pose_data['intermolecular_energies'].append(float(line.split()[2]))
        elif not is_dlg and line.startswith('REMARK INTRA:'):
            pose_data['internal_energies'].append(float(line.split()[2]))
        elif is_dlg and line.startswith('USER    (1) Final Intermolecular Energy     ='):
            pose_data['intermolecular_energies'].append(float(line[45:].split()[0]))
        elif is_dlg and line.startswith('USER    (2) Final Total Internal Energy     ='):
            pose_data['internal_energies'].append(float(line[45:].split()[0]))
        elif line.startswith('BEGIN_RES'):
            location = 'flexible_residue'
            buffer_flexres_id = " ".join(line.strip().split()[1:])
        elif line.startswith('END_RES'):
            # We never know if there is a molecule just after the flexible residue...
            location = 'ligand'
        elif line.startswith('ENDMDL'):
            n_poses += 1
            # After reading the first pose no need to store atom properties
            # anymore, it is the same for every pose
            is_first_pose = False

            tmp_atoms = np.array(tmp_atoms, dtype=atoms_dtype)

            if atoms is None:
                """We store the atoms (topology) only once, since it is supposed to be
                the same for all the molecules in the PDBQT file (except when water molecules
                are involved... classic). But we will continue to compare the topology of
                the current pose with the first one seen in the PDBQT file, to be sure only
                the atom positions are changing."""
                atoms = tmp_atoms.copy()
            else:
                # Check if the molecule topology is the same for each pose
                # We ignore water molecules (W) and atom type XX
                columns = ['idx', 'serial', 'name', 'resid', 'resname', 'chain', 'partial_charges', 'atom_type']
                topology1 = atoms[np.isin(atoms['atom_type'], ['W', 'XX'], invert=True)][columns]
                topology2 = tmp_atoms[np.isin(atoms['atom_type'], ['W', 'XX'], invert=True)][columns]

                if not np.array_equal(topology1, topology2):
                    error_msg = 'molecules have different topologies'
                    raise RuntimeError(error_msg)

                # Update information about water molecules (W) as soon as we find new ones
                tmp_water_molecules_idx = tmp_atoms[tmp_atoms['atom_type'] == 'W']['idx']
                water_molecules_idx = atoms[atoms['atom_type'] == 'XX']['idx']
                new_water_molecules_idx = list(set(tmp_water_molecules_idx).intersection(water_molecules_idx))
                atoms[new_water_molecules_idx] = tmp_atoms[new_water_molecules_idx]

            positions.append(tmp_positions)
            pose_data['active_atoms'].append(tmp_actives)
            pose_data['pdbqt_string'].append(tmp_pdbqt_string)

            if (n_poses >= poses_to_read and poses_to_read != -1):
                break

    """ if there is no model, it means that there is only one molecule
    so when we reach the end of the file, we store the atoms, 
    positions and actives stuff. """
    if not is_model:
        n_poses += 1
        atoms = np.array(tmp_atoms, dtype=atoms_dtype)
        positions.append(tmp_positions)
        pose_data['active_atoms'].append(tmp_actives)
        pose_data['pdbqt_string'].append(tmp_pdbqt_string)

    positions = np.array(positions).reshape((n_poses, atoms.shape[0], 3))

    pose_data['n_poses'] = n_poses

    # We add indices of all the water molecules we saw
    if water_indices:
        atom_annotations['water'] = list(water_indices)

    # clustering        
    if len(tmp_cluster_data) > 0:
        if len(tmp_cluster_data) != n_poses:
            raise RuntimeError("Nr of poses in cluster data (%d) differs from nr of poses (%d)" % (len(tmp_cluster_data, n_poses)))
        pose_data["cluster_id"] = [None] * n_poses
        pose_data["rank_in_cluster"] = [None] * n_poses
        pose_data["cluster_size"] = [None] * n_poses
        cluster_ids = [cluster_id for _, (cluster_id, _) in tmp_cluster_data.items()]
        n_clusters = max(cluster_ids)
        pose_data["cluster_leads_sorted"] = [None] * n_clusters
        for pose_index, (cluster_id, rank_in_cluster) in tmp_cluster_data.items():
            pose_data["cluster_id"][pose_index - 1] = cluster_id
            pose_data["rank_in_cluster"][pose_index - 1] = rank_in_cluster
            pose_data["cluster_size"][pose_index - 1] = cluster_ids.count(cluster_id)
            if rank_in_cluster == 1: # is cluster lead
                pose_data["cluster_leads_sorted"][cluster_id - 1] = pose_index - 1
    return atoms, positions, atom_annotations, pose_data


def _identify_bonds(atom_idx, positions, atom_types):
    """
    Identify bonds between atoms based on their positions and types. 
    
    Parameters
    ----------
    atom_idx : list
        List of atom indices. 
    positions : ndarray
        Array of atom positions. 
    atom_types : list
        List of atom types. 
    
    Returns
    -------
    bonds : dict
        Dictionary of bonds. 
    """
    bonds = defaultdict(list)
    KDTree = spatial.cKDTree(positions)
    bond_allowance_factor = 1.1
    # If we ask more than the number of coordinates/element
    # in the BHTree, we will end up with some inf values
    k = 5 if len(atom_idx) > 5 else len(atom_idx)
    atom_idx = np.array(atom_idx)

    # If there is only one atom, we know there won't be a single bond..
    if len(atom_idx) == 1:
        return bonds

    for atom_i, position, atom_type in zip(atom_idx, positions, atom_types):
        distances, indices = KDTree.query(position, k=k)
        r_cov = covalent_radius[autodock4_atom_types_elements[atom_type]]

        optimal_distances = [bond_allowance_factor * (r_cov + covalent_radius[autodock4_atom_types_elements[atom_types[i]]]) for i in indices[1:]]
        bonds[atom_i] = atom_idx[indices[1:][np.where(distances[1:] < optimal_distances)]].tolist()

    return bonds


[docs] class PDBQTMolecule: """ PDBQTMolecule class for reading PDBQT (or dlg) files from AutoDock4, AutoDock-GPU or AutoDock-Vina. Attributes ---------- _current_pose : int Index of the current pose. _pdbqt_filename : str Filename of the PDBQT file. _atoms : ndarray. Array of atoms. _positions : ndarray Array of positions. _bonds : dict Dictionary of bonds. _atom_annotations : dict Dictionary of atom annotations. _pose_data : dict Dictionary of pose data. _name : str Name of the molecule. _KDTrees : list List of KDTree objects for each pose. """ def __init__(self, pdbqt_string, name=None, poses_to_read=None, energy_range=None, is_dlg=False, skip_typing=False): """ Initialize PDBQTMolecule object. Parameters ---------- pdbqt_string : str PDBQT string. name : str, optional Name of the molecule. Default is None (use filename without pdbqt suffix). poses_to_read : int, optional Total number of poses to read. Default is None (read all). energy_range : float, optional Read docked poses until the maximum energy difference from best pose is reach, for example 2.5 kcal/mol. Default is None (read all). is_dlg : bool, optional Input file is in dlg (AutoDock docking log) format. Default is False (input file is not in dlg format). skip_typing : bool, optional Flag indicating that atomtyping should be skipped. Default is False (do not skip typing). """ self._current_pose = 0 self._pdbqt_filename = None self._atoms = None self._positions = None self._bonds = None self._atom_annotations = None self._pose_data = None self._name = name # Juice all the information from that PDBQT file poses_to_read = poses_to_read if poses_to_read is not None else -1 energy_range = energy_range if energy_range is not None else -1 results = _read_ligand_pdbqt_file(pdbqt_string, poses_to_read, energy_range, is_dlg, skip_typing) self._atoms, self._positions, self._atom_annotations, self._pose_data = results if self._atoms.shape[0] == 0: raise RuntimeError('read 0 atoms. Consider PDBQTMolecule.from_file(fname)') # Build KDTrees for each pose (search closest atoms by distance) self._KDTrees = [spatial.cKDTree(positions) for positions in self._positions] # Identify bonds in the ligands if not skip_typing: mol_atoms = self._atoms[self._atom_annotations['ligand']] self._bonds = _identify_bonds(self._atom_annotations['ligand'], mol_atoms['xyz'], mol_atoms['atom_type']) """... then in the flexible residues Since we are extracting bonds from docked poses, we might be in the situation where the ligand reacted with one the flexible residues and we don't want to consider them as normally bonded...""" if self.has_flexible_residues(): flex_atoms = self._atoms[self._atom_annotations['flexible_residue']] self._bonds.update(_identify_bonds(self._atom_annotations['flexible_residue'], flex_atoms['xyz'], flex_atoms['atom_type']))
[docs] @classmethod def from_file(cls, pdbqt_filename, name=None, poses_to_read=None, energy_range=None, is_dlg=False, skip_typing=False): """ Read PDBQT file and return PDBQTMolecule object. Parameters ---------- pdbqt_filename : str Filename of the PDBQT file. name : str, optional Name of the molecule. De is None (use filename without pdbqt suffix). poses_to_read : int, optional Total number of poses to read. Default is None (read all). energy_range : float, optional Read docked poses until the maximum energy difference from best pose is reach, for example 2.5 kcal/mol. Default is None (read all). is_dlg : bool, optional Input file is in dlg (AutoDock docking log) format. Default is False (input file is not in dlg format). skip_typing : bool, optional Flag indicating that atomtyping should be skipped. Default is False (do not skip typing). """ if name is None: name = os.path.splitext(os.path.basename(pdbqt_filename))[0] with open(pdbqt_filename) as f: pdbqt_string = f.read() instance = cls(pdbqt_string, name, poses_to_read, energy_range, is_dlg, skip_typing) instance._pdbqt_filename = pdbqt_filename return instance
def __getitem__(self, value): """ Get a pose by index. Parameters ---------- value : int Index of the pose to get. Returns ------- self : PDBQTMolecule PDBQTMolecule object with the specified pose. Raises ------- IndexError If the index is out of range. TypeError If the argument type is invalid. """ if isinstance(value, int): if value < 0 or value >= self._positions.shape[0]: raise IndexError('The index (%d) is out of range.' % value) elif isinstance(value, slice): raise TypeError('Slicing is not implemented for PDBQTMolecule object.') else: raise TypeError('Invalid argument type.') self._current_pose = value return self def __iter__(self): """Return the iterator object.""" self._current_pose = -1 return self def __next__(self): """Return the next pose in the iteration.""" if self._current_pose + 1 >= self._positions.shape[0]: raise StopIteration self._current_pose += 1 return self def __repr__(self): """Return a string representation of the PDBQTMolecule object.""" repr_str = '<Molecule named %s containing %d poses of %d atoms>' return (repr_str % (self._name, self._pose_data['n_poses'], self._atoms.shape[0])) @property def name(self): """Return the name of the molecule.""" return self._name @property def pose_id(self): """Return the index of the current pose.""" return self._current_pose @property def score(self): """Return the score (kcal/mol) of the current pose.""" return self._pose_data['free_energies'][self._current_pose]
[docs] def available_atom_properties(self, ignore_properties=None): """ Return all the available atom properties for that molecule. The following properties are awlays ignored: - ligand - flexible_residue - water Parameters ---------- ignore_properties : list, optional List of properties to ignore. Default is None (no properties are ignored). Returns ------- list : list List of available atom properties. """ if ignore_properties is None: ignore_properties = [] if not isinstance(ignore_properties, (list, tuple)): ignore_properties = [ignore_properties] ignore_properties += ['ligand', 'flexible_residue', 'water'] return [k for k, v in self._atom_annotations.items() if k not in ignore_properties and len(v) > 0]
[docs] def has_flexible_residues(self): """ Tell if the molecule contains a flexible residue or not. Returns ------- bool True if contains flexible residues, otherwise False. """ if self._atom_annotations['flexible_residue']: return True else: return False
[docs] def has_water_molecules(self): """ Tell if the molecules contains water molecules or not in the current pose. Returns ------- bool True if contains water molecules in the current pose, otherwise False. """ active_atoms_idx = self._pose_data['active_atoms'][self._current_pose] if set(self._atom_annotations['water']).intersection(active_atoms_idx): return True else: return False
[docs] def atoms(self, atom_idx=None, only_active=True): """ Return the ith atom. Parameters ---------- atom_idx : int, list, optional Index of one or multiple atoms (0-based). Default is None (return all atoms). only_active : bool, optional Return only active atoms. Default is True (return only active atoms). Returns ------- atoms : ndarray ndarray (atom_id, atom_name, resname, resid, chainid, xyz, q, t). """ if atom_idx is not None: if not isinstance(atom_idx, (list, tuple, np.ndarray)): atom_idx = np.array(atom_idx, dtype=np.int) else: atom_idx = np.arange(0, self._atoms.shape[0]) # Get index of only the active atoms if only_active: active_atoms_idx = self._pose_data['active_atoms'][self._current_pose] atom_idx = sorted(list(set(atom_idx).intersection(active_atoms_idx))) atoms = self._atoms[atom_idx].copy() atoms['xyz'] = self._positions[self._current_pose, atom_idx,:] return atoms
[docs] def positions(self, atom_idx=None, only_active=True): """ Return coordinates (xyz) of all atoms or a certain atom. Parameters ---------- atom_idx : int, list, optional Index of one or multiple atoms (0-based). Default is None (return all atoms). only_active : bool, optional Return only active atoms. Default is True (return only active atoms). Returns ------- ndarray ndarray of coordinates (xyz). """ return np.atleast_2d(self.atoms(atom_idx, only_active)['xyz'])
[docs] def atoms_by_properties(self, atom_properties, only_active=True): """ Return atom based on their properties. Parameters ---------- atom_properties : str or list Property of the atoms to retrieve (properties: ligand, flexible_residue, vdw, hb_don, hb_acc, metal, water, reactive, glue). only_active : bool, optional Return only active atoms. Default is True (return only active atoms). Returns ------- ndarray ndarray of atoms (atom_id, atom_name, resname, resid, chainid, xyz, q, t). Raises ------- KeyError If the atom property is not valid. """ if not isinstance(atom_properties, (list, tuple)): atom_properties = [atom_properties] if len(atom_properties) > 1: try: atom_idx = set(self._atom_annotations[atom_properties[0]]) for atom_property in atom_properties[1:]: atom_idx.intersection_update(self._atom_annotations[atom_property]) except: error_msg = 'Atom property %s is not valid. Valid atom properties are: %s' raise KeyError(error_msg % (atom_property, self._atom_annotations.keys())) atom_idx = list(atom_idx) else: try: atom_idx = self._atom_annotations[atom_properties[0]] except: error_msg = 'Atom property %s is not valid. Valid atom properties are: %s' raise KeyError(error_msg % (atom_properties[0], self._atom_annotations.keys())) if atom_idx: return self.atoms(atom_idx, only_active) else: return np.array([])
[docs] def closest_atoms_from_positions(self, xyz, radius, atom_properties=None, ignore=None): """ Retrieve indices of the closest atoms around a positions/coordinates at a certain radius. Parameters ---------- xyz : np.ndarray array of 3D coordinates. radius : float radius. atom_properties : str or list, optional Property of the atoms to retrieve (properties: ligand, flexible_residue, vdw, hb_don, hb_acc, metal, water, reactive, glue). ignore : int or list, optional Ignore atom for the search using atom id (0-based). Default is None (no atoms are ignored). Returns ------- ndarray ndarray (atom_id, atom_name, resname, resid, chainid, xyz, q, t). Raises ------- KeyError If the atom property is not valid. """ atom_idx = self._KDTrees[self._current_pose].query_ball_point(xyz, radius, p=2, return_sorted=True) # When nothing was found around... if not atom_idx: return np.array([]) # Handle the case when positions for of only one atom was passed in the input try: atom_idx = {i for j in atom_idx for i in j} except: atom_idx = set(atom_idx) if atom_properties is not None: if not isinstance(atom_properties, (list, tuple)): atom_properties = [atom_properties] try: for atom_property in atom_properties: atom_idx.intersection_update(self._atom_annotations[atom_property]) except: error_msg = 'Atom property %s is not valid. Valid atom properties are: %s' raise KeyError(error_msg % (atom_property, self._atom_annotations.keys())) if ignore is not None: if not isinstance(ignore, (list, tuple, np.ndarray)): ignore = [ignore] atom_idx = atom_idx.difference([i for i in ignore]) # Get index of only the active atoms active_atoms_idx = self._pose_data['active_atoms'][self._current_pose] atom_idx = list(set(atom_idx).intersection(active_atoms_idx)) if atom_idx: atoms = self._atoms[atom_idx].copy() atoms['xyz'] = self._positions[self._current_pose, atom_idx,:] return atoms else: return np.array([])
[docs] def closest_atoms(self, atom_idx, radius, atom_properties=None): """ Retrieve indices of the closest atoms around a positions/coordinates at a certain radius. Parameters ---------- atom_idx : int, list index of one or multiple atoms (0-based). radius : float radius. atom_properties : str or list, optional Property of the atoms to retrieve (properties: ligand, flexible_residue, vdw, hb_don, hb_acc, metal, water, reactive, glue). Returns ------- ndarray ndarray (atom_id, atom_name, resname, resid, chainid, xyz, q, t). """ if not isinstance(atom_idx, (list, tuple)): atom_idx = [atom_idx] # Get index of only the active atoms active_atoms_idx = self._pose_data['active_atoms'][self._current_pose] atom_idx = list(set(atom_idx).intersection(active_atoms_idx)) if atom_idx: positions = self._positions[self._current_pose, atom_idx,:] return self.closest_atoms_from_positions(positions, radius, atom_properties, atom_idx) else: return np.array([])
[docs] def neighbor_atoms(self, atom_idx): """ Return neighbor (bonded) atoms of certain atom(s) by their index. Parameters ---------- atom_idx : int, list index of one or multiple atoms (0-based). Returns ------- list list of lists containing the neighbor (bonded) atoms (0-based). """ if not isinstance(atom_idx, (list, tuple, np.ndarray)): atom_idx = [atom_idx] # Get index of only the active atoms active_atoms_idx = self._pose_data['active_atoms'][self._current_pose] atom_idx = list(set(atom_idx).intersection(active_atoms_idx)) return [self._bonds[i] for i in atom_idx]
[docs] def write_pdbqt_string(self, as_model=True): """ Write PDBQT output string of the current pose. Parameters ---------- as_model : bool, optional Add MODEL/ENDMDL keywords to the output PDBQT string Default is True (add MODEL/ENDMDL keywords). Returns ------- str PDBQT string of the current pose. """ if as_model: pdbqt_string = 'MODEL %5d\n' % (self._current_pose + 1) pdbqt_string += self._pose_data['pdbqt_string'][self._current_pose] pdbqt_string += 'ENDMDL\n' return pdbqt_string else: return self._pose_data['pdbqt_string'][self._current_pose]
[docs] def write_pdbqt_file(self, output_pdbqtfilename, overwrite=False, as_model=False): """ Write PDBQT file of the current pose Parameters ---------- output_pdbqtfilename : str Filename of the output PDBQT file. overwrite : bool, optional Overwrite on existing PDBQT file. Default is False (do not overwrite). as_model : bool, optional Add MODEL/ENDMDL keywords to the output PDBQT string. Default is False (do not add MODEL/ENDMDL keywords). Raises ------ RuntimeError If the output PDBQT file already exists and overwrite is False. Returns ------- None """ logger.debug(overwrite and os.path.isfile(output_pdbqtfilename)) if not overwrite and os.path.isfile(output_pdbqtfilename): raise RuntimeError('Output PDBQT file %s already exists' % output_pdbqtfilename) if as_model: pdbqt_string = 'MODEL %5d\n' % (self._current_pose + 1) pdbqt_string += self._pose_data['pdbqt_string'][self._current_pose] pdbqt_string += 'ENDMDL\n' else: pdbqt_string = self._pose_data['pdbqt_string'][self._current_pose] with open(output_pdbqtfilename, 'w') as w: w.write(pdbqt_string)