import sys, os, string
import settings


## assign_helices.py - have the job to finding all alpha helices in the protein.
## It also calculates the hydrophobicity of each helix.

## This is were it all starts...
def AssignHelices(pdbfilename):

    ## If everything is OK, let us start DSSP to find the helices.
    try:
        filehandle = open(pdbfilename,'r')
        structure = filehandle.read()
    except:
        print 'Can not open PDB file'
        return

    if string.find(structure,"ATOM") == -1:
        print 'This is not a PDB file'
        return
    else:
        helices = runDSSP( pdbfilename )

    return helices
        

## Here we start DSSP and calculate the hydrophobicity
def runDSSP( pdbfilename ):
        
    print 'Running DSSP...'

    ## First there is an array of all amino acids and its hydropathy index.
    hydropathy = dict()
    hydropathy['G'] = -0.4
    hydropathy['A'] = 1.8
    hydropathy['a'] = 1.8
    hydropathy['P'] = 1.6
    hydropathy['V'] = 4.2
    hydropathy['L'] =3.8
    hydropathy['I'] =4.5
    hydropathy['M'] =1.9

    hydropathy['F'] =2.8
    hydropathy['Y'] =-1.3
    hydropathy['W'] = -0.9

    hydropathy['S'] =-0.8
    hydropathy['T'] =-0.7
    hydropathy['C'] =2.5
    hydropathy['N'] =-3.5
    hydropathy['Q'] =-3.5

    hydropathy['K'] =-3.9
    hydropathy['H'] =-3.2
    hydropathy['R'] =-4.5

    hydropathy['D'] =-3.5
    hydropathy['E'] = -3.5

    
    # an array containing the hydrophobicity of the helices
    hydrophobicity = []
    
    # Let us run DSSP from the command line, with our pdb file as input
    file = os.popen( settings.DSSP_PATH + 'dsspcmbi ' +   pdbfilename )

    # This stores the output of DSSP
    lines = file.readlines()

    # An array to store all helices in
    helices = []
    helices2 = []
    missing = []
    residues = []
    chains = []
    
    prev_sse = None
    started = 0
    chain_nr = 0
    prev_chain = None
    my_res_id = 0
    temp_res = []

    res_dict = dict()

    
    ## Then we read each line of the DSSP output. Maybe DSSP found some alpha helices to us
    for i in range(0, len(lines)):
        
        if lines[i][2:3] == "#":
            started = 1
        elif started != 0:
            chain = lines[i][11:12]
            #print chain
            if chain == ' ':
                chain = None
            
            r = lines[i][6:10]
            
            if r == "    ":
                print "no residue to read at line " + str(i)
            else:
                # the residue id from PDB... could not be used here since MMTK starts from 0
                res_id = int(lines[i][6:10])
                aa = str(lines[i][13:14])
                sse = lines[i][16:17]
                    
                if prev_chain != chain:
                    chains.append(chain)
                    chain_nr =chain_nr+1
                    my_res_id = 0
                    if len(temp_res) != 0:
                        residues.append(temp_res)
                        temp_res = []

                prev_chain = chain
                temp_res.append(res_id)

                ## If we find some helices... Lets store them.
                if sse == "H" or sse == "G" or sse == "I":
                    if prev_sse == None:
                        helix = []
                        helix2 = []
                        hydro = float(0.00)
                        #helix.append(chain_nr-1)
                        #helix2.append(chain_nr-1)
                        helix2.append(chain)
                    prev_sse = sse
                    helix.append(my_res_id)
                    helix2.append(res_id)
                    try:
                        hydro = hydro + hydropathy[aa]
                    except KeyError:
                        hydro = hydro
                        print aa + ' not found in hydropathy index.'
                    
                else:
                    if prev_sse == "H" or prev_sse == "G" or prev_sse == "I":
                        h_len = float(len(helix))
                        helix.append(my_res_id)
                        helix2.append(res_id)
                        helices.append(helix)
                        helices2.append(helix2)
                        hydrophobicity.append(hydro/h_len)
                        prev_sse = None
                        helix = None
                        helix2 = None
                        hydro = 0
            my_res_id = my_res_id + 1
    residues.append(temp_res)

    ## helices3 = []
##     prev_start = helices2[0][1]
##     prev_end = helices2[0][len(helices2[0])-1]
##     prev_chain = helices2[0][0]
##     helices3.append(helices2[0])
    
##     i=1
##     while i < len(helices2):
##         this_start = helices2[i][1]
##         this_end = helices2[i][len(helices2[i])-1]
##         this_chain = helices2[i][0]
##         helices3.append(helices2[i])

##         if int(prev_end+1) == int(this_start) and prev_chain == this_chain:
##             helices3.remove(helices2[i])
##             helices2[i].remove(this_chain)
##             helices2[i-1].extend(helices2[i])
##             #helices2[i] = this_chain + helices2[i]
##         prev_start = this_start
##         prev_end = this_end
##         prev_chain = this_chain
##         i+=1
    

    print chains
    #print helices
    #print helices2
    #print residues
            
    ## Finally we return a list of all the helices, and the hydrophobicity of each helix. 
    return helices2, residues, hydrophobicity, chains
    

if __name__ == '__main__':
    AssignHelices(sys.argv[1])