import glob,os,re,math,sys,random

# Should version this... 130502-11 TAW
# More extensive support for geometric operations


# In the context of this module, a residue is a list of atoms, where each atom/item
# is a list or tuple with at least 7 values:
#
# (atom name (str), residue (str), residue id (int), chain (char), x (float), y (float), z (float))


# Crude mass for weighted averages. No consideration of united atoms.
# This will probably give only minor deviations, while also giving less headache
# We add B with a mass of 32, so BB and SC* will have equal weights
_mass = {'H': 1,'C': 12,'N': 14,'O': 16,'S': 32,'P': 31,'M': 0, 'B': 32}


# Normalization factor for geometric modifiers (nanometer)
_normfac = 0.125 


# Listing of aminoacids
_aminoacids = [
    "ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS", "LEU",
    "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL", "TRP", "TYR",
    "ACE", "NH2",
    ]


# Determine average position for a set of atoms
def _average(a):
    if a:
        # Get some massy number for each atom
        # The masses are tailored for atomistic models.
        # For coarse-grained force fields it is usually 
        # sufficient to have equal masses
        mxyz = [(_mass.get(i[0][0],1),i[4],i[5],i[6]) for i in a if i]         # Masses and coordinates
        mw   = [sum(i) for i in zip(*[(m*x,m*y,m*z,m) for m,x,y,z in mxyz]) ]  # Sums if weighted coordinates     
        return [i/mw[3] for i in mw]                                           # Centre of mass           
    return None


def _vsub(a,b):
    return [i-j for i,j in zip(a,b)]


def _vadd(a,b):
    return [i+j for i,j in zip(a,b)]


def _normalize(a):
    l = math.sqrt(sum([i*i for i in a]))
    return [i/l for i in a]


def _crossprod(a,b):
    return [a[1]*b[2]-a[2]*b[1],a[2]*b[0]-a[0]*b[2],a[0]*b[1]-a[1]*b[0]]


def _r(a,kick):
    return a+random.random()*kick-kick/2


class ResidueMap:
    def __init__(self,target=None,source=None,atoms=None,mod=[],name=""):

        if atoms:
            # Setting mapping from an atom list
            # Format is:
            # number, aa, cg beads
            x = [i[1]  for i in atoms]
            y = [i[2:] for i in atoms]
            # For those atoms for which no source list is given
            # set the source equal to that of the previous one
            for i in range(len(y)):
                if not y[i]:
                    y[i] = y[i-1]


        if source:
            y = source

        if target:

            if not atoms:
                x = target

            assert len(x) == len(y)

            # Case of forward mapping: atomistic to martini
            # Initialize dictionary 
            d = dict(zip(target,[[] for i in target]))

            # Fill entries
            # The mapping is specified in full both ways, which
            # means that, e.g. for Martini, the result may differ
            # from the original mapping definition. This should
            # add stability, and is required to allow the double 
            # mappings used in Martini for some residues.
            for u,v in zip(y,x):
                for j in u:
                    d[j].append(v)

            self.atoms = target
            self.map   = d
        else:
            self.atoms = x
            self.map   = dict(zip(x,y))

        # Special cases
        self.mod    = mod


    def do(self, residue, target=None, coords=False, nterm=False, cterm=False, nt=False, kick=0.05):
        # Given a set of source atoms with coordinates
        # return the corresponding list of mapped atoms
        # with suitable starting coordinates.

        # If a target list is given, match every atom against
        # the atoms in the ResidueMap. If an atom is not in
        # the definition, then it is returned with the 
        # coordinates of the last atom that was in the list.

        # For amino acids, nterm/cterm will cause extra hydrogens/
        # oxygen to be added at the start/end of the residue.
        # If nt (neutral termini) is true, two hydrogens will be
        # added in stead of three if nterm is true and an additional
        # hydrogen will be added at the end if cterm is true.

        # Unpack first atom
        first, resn, resi, chain, x, y, z = residue[0]
        resn = resn.strip()

        # Check whether a target was supplied
        set_termini = not target

        # Target atoms list
        if target:
            # A target atom list can be given as a list of names
            # or as a list of atoms. In the latter case, each element
            # will be a list or tuple and we extract the names.
            if type(target[0]) in (list,tuple):
                target = [i[0].strip() for i in target]
            elif type(target) == str:
                target = target.split()
            else:
                # Make a copy to leave the original list untouched
                target = list(target)
        else:
            target = list(self.atoms)


        # Atoms we have; the source dictionary
        atoms = [ i[0].strip() for i in residue ]
        have  = dict(zip(atoms, residue))


        # Set array for output; residue to return
        out = []


        # The target list is leading. Make sure that the atom list matches.
        # So, the actual atom list is built from the target list:
        atomlist = [i for i in target if i in self.atoms]                 


        # Go over the target particles; the particles we want
        # If we have a target topology, then there may be 
        # additional particles to want, especially hydrogens.
        # These will be assigned to the previous heavy atom 
        # from the want list.
        for want in atomlist:


            # If we have a target list, the atom will be in
            # (it would have been skipped otherwise), and we
            # can read up to the next we want. 
            if coords:
                got = coords.get(want, _average([ have.get(i) for i in self.map[want] ]))                
            else:
                got = _average([ have.get(i) for i in self.map[want] ])
            
            if not got:
                print "Problem determining mapping coordinates for atom %s of residue %s."%(target[0],resn)
                print "atomlist:", atomlist
                print "want:", want, self.map[want]
                print "have:", have.keys()            
                print "Bailing out..."
                print target
                sys.exit(1)


            # This logic reads the atom we want together with all atoms 
            # that are in the target list, but not in the residue 
            # definition in the mapping dictionary, up to the next atom
            # that is in that definition.
            while target and (target[0] == want or target[0] not in self.atoms):
                name = target.pop(0)

                out.append((name,resn,resi,chain,got[0],got[1],got[2]))                
   
                # If we have a target list given as argument to the function
                # then we ignore whatever is given for nterm/cterm.
                # Otherwise, the N-terminal/C-terminal additions are made 
                # right after the (N)H/(C)O, if nterm/cterm are set
                if resn in _aminoacids and set_termini:


                    # If this is an N-terminal amino acid, we may have
                    # to add one or two hydrogens.
                    if nterm:
                        if resn in ("PRO","HYP") and name == "N":
                            if nt:
                                # Add one
                                out.append(("H",resn,resi,chain,got[0],got[1],got[2]))
                            else:
                                # Add two
                                out.append(("H1",resn,resi,chain,got[0],got[1],got[2]))
                                out.append(("H2",resn,resi,chain,got[0],got[1],got[2]))
                        elif want == "H":
                            if nt:
                                # Add one
                                out.append(("H2",resn,resi,chain,got[0],got[1],got[2]))
                            else:
                                # Add two
                                out.append(("H2",resn,resi,chain,got[0],got[1],got[2]))
                                out.append(("H3",resn,resi,chain,got[0],got[1],got[2]))                


                    if cterm and want == "O":
                        out.append((want,resn,resi,chain,got[0],got[1],got[2]))
                        if nt:
                            out.append(("H",resn,resi,chain,got[0],got[1],got[2]))                        


        # Now add a random kick whenever needed to ensure that no atoms overlap
        for i in range(len(out)):
            for j in range(i):
                if out[i][4] == out[j][4] and out[i][5] == out[j][5] and out[i][6] == out[j][6]:
                    # Equal coordinates: need fix
                    x,y,z  = out[i][4:7]
                    out[i] = out[i][:4]+(_r(x,kick),_r(y,kick),_r(z,kick))


        # Create a lookup dictionary
        atoms = dict(zip([i[0] for i in out],range(len(out))))


        # Create a coordinate dictionary
        # This allows adding dummy particles for increasingly complex operations
        coord = dict([(i[0],(_r(i[4],1e-5),_r(i[5],1e-5),_r(i[6],1e-5))) for i in out])

        
        # Correct terminal amino acid N/C positions, to increase stability of 
        # subsequent corrections.
        if resn in _aminoacids and nterm:
            t = atoms.get("N")

            # Set N at 120 degree angle with respect to CA-C
            b = coord.get("CA")
            c = coord.get("C")

            # Only act if we really have backbone atoms
            if t != None and b != None and c != None:
                u = b[0]-c[0], b[1]-c[1], b[2]-c[2]
                l = math.sqrt(u[0]*u[0]+u[1]*u[1]+u[2]*u[2])
                try:
                    v = _normalize(_crossprod(u,(u[0]+1,u[1],u[2])))
                except ZeroDivisionError:
                    # Oh, so that vector was parallel. Then this must work.
                    v = _normalize(_crossprod(u,(u[0],u[1]+1,u[2])))

                coord["N"] = (b[0]+u[0]/2+.866*l*v[0],b[1]+u[1]/2+.866*l*v[1],b[2]+u[2]/2+.866*l*v[2])             
                out[t]     = out[t][:4] + coord["N"]


        if resn in _aminoacids and cterm:
            t = atoms.get("C")

            # Set N at 120 degree angle with respect to CA-N
            b = coord.get("CA")
            c = coord.get("N")
 
            # Only act if we really have backbone atoms
            if t != None and b != None and c != None:
                u = b[0]-c[0], b[1]-c[1], b[2]-c[2]
                l = math.sqrt(u[0]*u[0]+u[1]*u[1]+u[2]*u[2])
                try:
                    v = _normalize(_crossprod(u,(u[0]+1,u[1],u[2])))
                except ZeroDivisionError:
                    # Oh, so that vector was parallel. Then this must work.
                    v = _normalize(_crossprod(u,(u[0],u[1]+1,u[2])))

                coord["C"] = (b[0]+u[0]/2+.866*l*v[0],b[1]+u[1]/2+.866*l*v[1],b[2]+u[2]/2+.866*l*v[2])             
                out[t]     = out[t][:4] + coord["C"]


        # Before making modifications, save the raw projection results
        raw = [i for i in out]


        # Treat special cases: coordinate modifying operations
        for tag,i in self.mod:
            
            if tag == "trans":    

                #
                #  a              a--
                #   \                 \
                #    b--c       or     b--c--d
                #        \                 \  \
                #         d                 e--(e+d-2c)
                #
                # a = b + c - d

                # Target particle
                a = i[0]

                # Get coordinates for the other atoms
                s = [ coord.get(j,-1) for j in i[1:] ]

                # Connecting particle
                b = s.pop(0)

                if min(s) > -1:
                    # Subtract the third position from the fourth and so on, and sum the vectors
                    u = _normalize([sum(k) for k in zip(*[ _normalize(_vsub(j,s[0])) for j in s[1:] ])])

                    # Get the new position
                    x,y,z = b[0]-_normfac*u[0], b[1]-_normfac*u[1], b[2]-_normfac*u[2]

                    # Index of target atom, if in the list
                    t = atoms.get(a)

                    # Set the coordinates in the output if this is a real particle
                    if t != None:
                        out[t] = out[t][:4] + (x,y,z)

                    # Add coordinates to dictionary
                    coord[a] = (x,y,z)
                else:
                    print "Not all positions defined for [trans] operation:"
                    print [(j,coord.get(j)) for j in i]
                    

                # The 'trans' definition is also a line of atom names
                # Here the length should be four, always: a b c d
                #try:                
                #    a,b,c,d = i
                #except ValueError:
                #    print "Invalid trans bond definition in residue %s (%s). Ignoring."%(out[0][1],i)
                #    continue

                # Source coordinates
                #b = coord.get(b)
                #c = coord.get(c)
                #d = coord.get(d)

                #if b != None and c != None and d != None:
                #    # Vector c-d normalized
                #    u = [_normfac*j for j in _normalize(_vsub(c,d))]
 
                #    # New coordinates for a
                #    x,y,z = b[0]+u[0], b[1]+u[1], b[2]+u[2]

                #    # Index of target atom, if in the list
                #    t = atoms.get(a) 

                #    # Set the coordinates in the output if this is a real particle
                #    if t != None:
                #        out[t] = out[t][:4] + (x,y,z)

                #    # Add coordinates to dictionary
                #    coord[a] = (x,y,z)


            elif tag == "cis":    

                #
                #    b--c
                #   /    \
                #  a      d
                #
                # a = 2b - 2c + d

                # The 'cis' definition is also a line of atom names
                # Here the length should be four, always: a b c d
                # The difference with trans dihedrals is in the equation below
                try:                
                    a,b,c,d = i
                except ValueError:
                    print "Invalid trans bond definition in residue %s (%s). Ignoring."%(out[0][1],i)
                    continue

                # Source coordinates
                b = coord.get(b)
                c = coord.get(c)
                d = coord.get(d)

                if b != None and c != None and d != None:
                    try:
                        u = _normalize(_vsub(c,d))
                        v = _normalize(_vsub(b,c))
                        x,y,z = b[0]+_normfac*(v[0]-u[0]), b[1]+_normfac*(v[1]-u[1]), b[2]+_normfac*(v[2]-u[2])
                    except ZeroDivisionError:
                        x,y,z = 2*b[0]-2*c[0]+d[0],   2*b[1]-2*c[1]+d[1],   2*b[2]-2*c[2]+d[2]

                    # Index of target atom, if in the list
                    t = atoms.get(a) 

                    # Set the coordinates in the output if this is a real particle
                    if t != None:
                        out[t] = out[t][:4] + (x,y,z)

                    # Add coordinates to dictionary
                    coord[a] = (x,y,z)


            elif tag == "out":

                # Place a on the negative resultant vector 
                # 
                #       a
                #      /
                #  c--b
                #      \
                #       d
                #
                # a = 3b - c - d
                #

                # The 'out' definition is also a line of atom names
                # But the length can be variable

                # Target particle
                a = i[0]

                # Get coordinates for the other atoms
                s = [ coord.get(j,-1) for j in i[1:] ]

                if min(s) > -1:
                    # Subtract the center from the other atoms and sum the vectors
                    u = _normalize([sum(k) for k in zip(*[ _normalize(_vsub(j,s[0])) for j in s[1:] ])])

                    # Get the new position
                    x,y,z = s[0][0]-_normfac*u[0], s[0][1]-_normfac*u[1], s[0][2]-_normfac*u[2]

                    # Index of target atom, if in the list
                    t = atoms.get(a) 

                    # Set the coordinates in the output if this is a real particle
                    if t != None:
                        out[t] = out[t][:4] + (x,y,z)

                    # Add coordinates to dictionary
                    coord[a] = (x,y,z)
                

            elif tag == "chiral":

                # The 'chiral' definition is a list of atom names

                # Target atom
                a = i[0]

                # Get coordinates for the other atoms; the first atom in this list is the chiral center
                s = [ coord.get(j,-1) for j in i[1:] ]

                if min(s) > -1:
                    # Subtract the center from the other atoms
                    u = [ _vsub(j,s[0]) for j in s[1:] ]

                    # If there are multiple substituents given for a chiral center, determine the 
                    # average cross-product. Otherwise, use a more elaborate scheme for rebuilding.
                    if len(u) > 2:

                        # Get the resultant crossproduct normalized
                        c = _normalize([sum(m) for m in zip(*[ _crossprod(j,k) for j,k in zip([u[-1]]+u,u) ])])
    
                        # Set the new coordinates
                        # Ai, magic number here! Where does the 0.1 come from? 
                        x,y,z = s[0][0]+_normfac*c[0], s[0][1]+_normfac*c[1], s[0][2]+_normfac*c[2]

                    else:
                        #          
                        #  a   .
                        #   \ .
                        #    b--d
                        #   / 
                        #  c   
                        #        Like for CB/HA: 
                        #           CB CA N C
                        #           HA CA C N
                        #        Or (safer):
                        #           CB CA N C
                        #           HA CA C N CB 
                        #


                        # Unpack vectors
                        c, d  = u

                        # Midpoint; a vector from the center b
                        p     = [ .05*i for i in _normalize(_vadd(c,d)) ]

                        # Vector c-p
                        q     = [ .05*i for i in _normalize(_vsub(c,d)) ]
                      
                        # The vector in the direction of the target particle
                        try:
                            w   = [ _normfac*j for j in _normalize(_crossprod(q,p)) ]
                        except ZeroDivisionError:
                            trm = nterm and ", N-terminus" or (cterm and ", C-terminus" or "")
                            print "Chirality of %s (%s%s) for placing %s undefined by atoms %s. Skipping modification."%(i[1],resn,trm,i[0],repr(i[2:]))
                            continue
    
                        # The coordinates
                        x,y,z = s[0][0]+w[0]-p[0], s[0][1]+w[1]-p[1], s[0][2]+w[2]-p[2]

                    # Index of target atom, if in the list
                    t = atoms.get(a)

                    # Set the coordinates in the output if this is a real particle
                    if t != None:
                        out[t] = out[t][:4] + (x,y,z)

                    # Add coordinates to dictionary
                    coord[a] = (x,y,z)


        # Now add a random kick again whenever needed to ensure that no atoms overlap
        for i in range(len(out)):
            for j in range(i):
                if out[i][4] == out[j][4] and out[i][5] == out[j][5] and out[i][6] == out[j][6]:
                    # Equal coordinates: need fix
                    x,y,z  = out[i][4:7]
                    out[i] = out[i][:4]+(_r(x,kick),_r(y,kick),_r(z,kick))

        return out, raw
           

# These are the modifier tags. They signify a specific 
# operation on the atoms listed.
_mods = ("chiral","trans","cis","out")


# These are the default tags. Other tags should be 
# coarse grained model names.        
_tags = _mods + ("molecule","mapping","atoms")


def _init():
    molecules = []
    mapping   = {}
    cg        = []
    aa        = []
    ff        = []
    mod       = []
    cur       = []
    mol       = []
    tag       = re.compile('^ *\[ *(.*) *\]')

    # Read all .map residue definitions in the module directory
    for filename in glob.glob(os.path.dirname(__file__)+"/*.map"):

        # Default CG model is martini.
        cg_ff = "martini"

        for line in open(filename):
            
            # Strip leading and trailing spaces
            s = line.strip()

            # Check for directive
            if s.startswith("["):

                # Extract the directive name
                cur = re.findall(tag,s)[0].strip().lower()

                if not cur in _tags: # cur == "martini":
                    cg_ff = cur
                    cg    = []

                # The tag molecule starts a new molecule block
                # The tag mapping starts a new mapping block for a specific force field
                # In both cases, we need to purge the stuff that we have so far and
                # empty the variables, except 'mol'
                if cur in ("molecule","mapping"):                
                    # Check whether we have stuff
                    # If so, we purge
                    if aa:
                        for ffi in ff:
                            for m in mol:
                                try:
                                    mapping[(m,  ffi,  cg_ff )] = ResidueMap(target=cg,atoms=aa,name=m)
                                except:
                                    print "Error reading %s to %s mapping for %s (file: %s)."%(ffi,cg_ff,m,filename)
                                try:
                                    mapping[(m, cg_ff,  ffi  )] = ResidueMap(atoms=aa,mod=mod,name=m)
                                except:
                                    print "Error reading %s to %s mapping for %s (file: %s)."%(cg_ff,ffi,m,filename)
                        
                    # Reset lists
                    aa,ff,mod = [],[],[]

                    # Reset molecule name list if we have a molecule tag
                    if cur == "molecule":
                        mol = []

                continue
       
            # Remove comments
            s = s.split(';')[0].strip()

            if not s:
                # Skip empty lines
                continue
        
            elif cur == "molecule":
                mol.extend(s.split())
                
            elif not cur in _tags:  # cur == "martini":
                # Martini coarse grained beads in topology order
                cg.extend(s.split()) 

            elif cur == "mapping":
                # Multiple force fields can be specified
                ff.extend(s.split())

            elif cur == "atoms":
                # Atom list for current force field molecule definition 
                aa.append(s.split())
                
            elif cur in _mods: 
                # Definitions of modifying operations
                mod.append((cur,s.split()))


    # At the end we may still have rubbish left:
    if aa:
        for ffi in ff:
            for m in mol:
                try:
                    mapping[(m,  ffi,  cg_ff )] = ResidueMap(target=cg,atoms=aa,name=m)
                except:
                    print "Error reading %s to %s mapping for %s (file: %s)."%(ffi,cg_ff,m,filename)
                try:
                    mapping[(m, cg_ff,  ffi  )] = ResidueMap(atoms=aa,mod=mod,name=m)
                except:
                    print "Error reading %s to %s mapping for %s (file: %s)."%(cg_ff,ffi,m,filename)


    return mapping


mapping = _init()


def get(target="gromos",source="martini"): 
    D = dict([(i[0],mapping[i]) for i in mapping.keys() if i[1] == source and i[2] == target])
    print "Residues defined for transformation from %s to %s:"%(source,target)
    print D.keys()
    return D