import sys, os, string import settings ## assign_helices.py - have the job to finding all alpha helices in the protein. ## It also calculates the hydrophobicity of each helix. ## This is were it all starts... def AssignHelices(pdbfilename): ## If everything is OK, let us start DSSP to find the helices. try: filehandle = open(pdbfilename,'r') structure = filehandle.read() except: print 'Can not open PDB file' return if string.find(structure,"ATOM") == -1: print 'This is not a PDB file' return else: helices = runDSSP( pdbfilename ) return helices ## Here we start DSSP and calculate the hydrophobicity def runDSSP( pdbfilename ): print 'Running DSSP...' ## First there is an array of all amino acids and its hydropathy index. hydropathy = dict() hydropathy['G'] = -0.4 hydropathy['A'] = 1.8 hydropathy['a'] = 1.8 hydropathy['P'] = 1.6 hydropathy['V'] = 4.2 hydropathy['L'] =3.8 hydropathy['I'] =4.5 hydropathy['M'] =1.9 hydropathy['F'] =2.8 hydropathy['Y'] =-1.3 hydropathy['W'] = -0.9 hydropathy['S'] =-0.8 hydropathy['T'] =-0.7 hydropathy['C'] =2.5 hydropathy['N'] =-3.5 hydropathy['Q'] =-3.5 hydropathy['K'] =-3.9 hydropathy['H'] =-3.2 hydropathy['R'] =-4.5 hydropathy['D'] =-3.5 hydropathy['E'] = -3.5 # an array containing the hydrophobicity of the helices hydrophobicity = [] # Let us run DSSP from the command line, with our pdb file as input file = os.popen( settings.DSSP_PATH + 'dsspcmbi ' + pdbfilename ) # This stores the output of DSSP lines = file.readlines() # An array to store all helices in helices = [] helices2 = [] missing = [] residues = [] chains = [] prev_sse = None started = 0 chain_nr = 0 prev_chain = None my_res_id = 0 temp_res = [] res_dict = dict() ## Then we read each line of the DSSP output. Maybe DSSP found some alpha helices to us for i in range(0, len(lines)): if lines[i][2:3] == "#": started = 1 elif started != 0: chain = lines[i][11:12] #print chain if chain == ' ': chain = None r = lines[i][6:10] if r == " ": print "no residue to read at line " + str(i) else: # the residue id from PDB... could not be used here since MMTK starts from 0 res_id = int(lines[i][6:10]) aa = str(lines[i][13:14]) sse = lines[i][16:17] if prev_chain != chain: chains.append(chain) chain_nr =chain_nr+1 my_res_id = 0 if len(temp_res) != 0: residues.append(temp_res) temp_res = [] prev_chain = chain temp_res.append(res_id) ## If we find some helices... Lets store them. if sse == "H" or sse == "G" or sse == "I": if prev_sse == None: helix = [] helix2 = [] hydro = float(0.00) #helix.append(chain_nr-1) #helix2.append(chain_nr-1) helix2.append(chain) prev_sse = sse helix.append(my_res_id) helix2.append(res_id) try: hydro = hydro + hydropathy[aa] except KeyError: hydro = hydro print aa + ' not found in hydropathy index.' else: if prev_sse == "H" or prev_sse == "G" or prev_sse == "I": h_len = float(len(helix)) helix.append(my_res_id) helix2.append(res_id) helices.append(helix) helices2.append(helix2) hydrophobicity.append(hydro/h_len) prev_sse = None helix = None helix2 = None hydro = 0 my_res_id = my_res_id + 1 residues.append(temp_res) ## helices3 = [] ## prev_start = helices2[0][1] ## prev_end = helices2[0][len(helices2[0])-1] ## prev_chain = helices2[0][0] ## helices3.append(helices2[0]) ## i=1 ## while i < len(helices2): ## this_start = helices2[i][1] ## this_end = helices2[i][len(helices2[i])-1] ## this_chain = helices2[i][0] ## helices3.append(helices2[i]) ## if int(prev_end+1) == int(this_start) and prev_chain == this_chain: ## helices3.remove(helices2[i]) ## helices2[i].remove(this_chain) ## helices2[i-1].extend(helices2[i]) ## #helices2[i] = this_chain + helices2[i] ## prev_start = this_start ## prev_end = this_end ## prev_chain = this_chain ## i+=1 print chains #print helices #print helices2 #print residues ## Finally we return a list of all the helices, and the hydrophobicity of each helix. return helices2, residues, hydrophobicity, chains if __name__ == '__main__': AssignHelices(sys.argv[1])