Source code for prepmd.get_residues
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Read residue information from structure files
"""
NO_MODELLER = False
try:
from modeller import *
except:
NO_MODELLER = True
from prepmd import util
from prepmd import ligand
[docs]
def get_residues_pdb(pdb, code, get_hetatms=False):
"""
Get the fasta sequence of residues in the ATOM entries of a PDB or mmCif
file.
Args:
pdb: path to pdb file, a string
code: PDB code
Returns:
the fasta sequence as a string
"""
if NO_MODELLER:
raise ImportError("Can't run without MODELLER and a valid license key")
log.none()
e = Environ()
if get_hetatms:
e.io.hetatm = True
m = Model(e, file=pdb)
aln = Alignment(e)
aln.append_model(m, align_codes=code)
aln.write(file=code+'.seq')
with open(code+".seq") as file:
original_fasta = file.readlines()
return original_fasta
[docs]
def get_fullseq_pdb(pdb, code, get_hetatms=False):
"""
Get the fasta sequence of residues in the SEQRES records of a PDB/mmCif
file.
Args:
pdb: path to pdb/mmcif file, a string
code: PDB code
Returns:
the fasta sequence as a string
"""
seqres = {}
hetatms_found = False
# pdb
with open(pdb) as file:
for line in file:
if "SEQRES" in line:
split = line.split()
chain = split[2]
if chain not in seqres.keys():
seqres[chain] = []
sequence = split[4:]
seqres[chain] += (sequence)
if line.startswith("HET") and not hetatms_found and get_hetatms:
hetatms_found = True
# mmcif
if seqres == {}:
reading_seq = False
with open(pdb) as file:
for line in file:
if "_entity_poly_seq" in line:
reading_seq = True
if reading_seq:
if len(line.split()) == 4:
chain = line.split()[0]
if chain not in seqres.keys():
seqres[chain] = []
sequence = line.split()[2]
seqres[chain] .append(sequence)
if line.startswith("#"):
reading_seq = False
if line.startswith("HET") and not hetatms_found and get_hetatms:
hetatms_found = True
if hetatms_found:
# count hetatm residues and add an equivalent number of "." entries in
# the fasta sequence
universe = ligand.load_universe(pdb)
ligands = universe.select_atoms('not protein and not water')
num_hetatm_residues = len(ligands.split("residue"))
last_key = sorted(seqres.keys())[-1]
seqres[last_key] += [num_hetatm_residues*"."]
if get_hetatms and not hetatms_found:
raise ValueError("Was told to retrieve hetatms from "+pdb+" but none "
"were found.")
# convert to fasta
fastas = []
for chain, reses in seqres.items():
fasta = ""
for res in reses:
if util.is_residue(res):
fasta += util.three_to_one(res)
fastas.append(fasta)
fasta_joined = "/".join(fastas)
chains = ":::::::::"
if fastas == []:
raise IOError("Couldn't get full sequence from contents of "+pdb+". "
"Does it contain a sequence?")
return ">P1;"+code+"_fill\n"+chains+"\n"+fasta_joined+"*"