#decompo en 1 site pour sas, vdw, elec et sumconf ok teste sur RAR-RXR=> reste a voir le gbmv et pour calc #reste a tester decompo en 2 sites et voir comment gerer le sum_conf package Leto; our $VERSION = "2.0"; #Constant gamma for SAS =0.005. #To modify this value is necessary to modify the function energy_sum (line 1912 1920) # and the function energy_sum_conf (line 2167). package Leto::Atom; # Constructor of the Leto::Atom objects. # Usage : Leto::Atom->new() or Leto::Atom->new({ATOM_NAME=>CA},{RESIDUE_NUMBER=>25}) # Sends the attributes of the newly created objects to the _init method when arguments are passed to it. # If no argument is passed to it, the _init method initializes all the attributes of the object to the undef value. sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self ={}; bless ($self, $class); $self->_init(@_); return $self; } sub _init { my $self = shift; $self->{RECORD_NAME} = undef; $self->{ATOM_NUMBER} = undef; $self->{ATOM_NAME} = undef; $self->{ALT_LOCATION} = undef; $self->{RESIDUE_NAME} = undef; $self->{CHAIN} = undef; $self->{RESIDUE_NUMBER} = undef; $self->{INSERTION} = undef; $self->{X} = undef; $self->{Y} = undef; $self->{Z} = undef; $self->{OCCUPANCY} = undef; $self->{BFACTOR} = undef; $self->{SEGMENT} = undef; $self->{ELEMENT} = undef; $self->{CHARGE} = undef; $self->{RESIDUE} = undef; $self->{UP_REF} = undef; $self->{SSBOND_RES} = undef; $self->{SSBOND_CHAIN} = undef; # If some arguments have been passed to _init via new(), the corresponding attributes take their values. if (@_) { my %extra = @_; @$self{keys %extra} = values %extra; } } # Automatic generation of closure accessors. # Creates when called the methods to access each of the object attributes. # As for all the object methods corresponding to an object attribute, passing an argument to the method will set the attribute to the value given in argument. # If no argument is passed to the object method, the current value of the corresponding attribute is returned. for my $champ (qw(RECORD_NAME ATOM_NUMBER ATOM_NAME ALT_LOCATION RESIDUE_NAME CHAIN RESIDUE_NUMBER INSERTION X Y Z OCCUPANCY BFACTOR SEGMENT ELEMENT CHARGE RESIDUE UP_REF SSBOND_RES SSBOND_CHAIN)) { my $meth = __PACKAGE__ . "::" . (lc $champ); no strict "refs"; *$meth = sub { my $self = shift; $self->{$champ} = shift if @_; return $self->{$champ}; } } # Creates a new Leto::Atom object with its attributes set from a line in the CRD format. sub read_crd_atom { my $proto = shift; if (ref($proto)) { return(0); } my $string = shift; # The CRD format definition used is the one found in the CHARMM manual. # Extracts the attributes of the Atom object from the line given in argument from a CRD file. my $atom_number = substr($string, 0, 5); my $residue_number = substr($string, 5,5); my $residue_name = substr($string, 11,4); my $atom_name = substr ($string,16,4); my $x = substr($string, 20,10); my $y = substr($string, 30,10); my $z = substr($string, 40,10); my $segid = substr($string, 51, 4); my $bfactor = substr($string, 60, 10); # Creation of an Atom object with its attributes set to the values previously extracted. my $atom = $proto->new( ATOM_NAME=>$atom_name, ATOM_NUMBER=>$atom_nb, BFACTOR=>$bfactor, X=>$x, Y=>$y, Z=>$z, RESIDUE_NAME=>$res_name, RESIDUE_NUMBER=>$res_nb, RESID=>$resid, SEGMENT=>$segid); return ($atom); } # Creates a new Leto::Atom object with its attributes set from a line in the Merk format. sub read_mrk_atom { my $proto = shift; if (ref($proto)) { return(0); } my $string = shift; # The Merck format specifications were found in the CHARMM documentation. # Extracts the attributes of the Atom object from the line given in argument from a MRK file. my $x = substr($string, 0,10); my $y = substr($string, 11,10); my $z = substr($string, 22,10); my $atom_number = substr($string, 44, 5); my $atom_name = substr ($string, 50,4); my $residue_number = substr($string, 54,4); my $residue_name = substr($string, 58,4); my $bfactor = substr($string, 62, 8); # In this case, the partial charge is set here. my $segid = substr($string, 76, 4); # Creation of an Atom object with its attributes set to the values previously extracted. my $atom = $proto->new( ATOM_NAME=>$atom_name, ATOM_NUMBER=>$atom_number, BFACTOR=>$bfactor, X=>$x, Y=>$y, Z=>$z, RESIDUE_NAME=>$residue_name, RESIDUE_NUMBER=>$residue_number, # RESID=>$resid, SEGMENT=>$segid); return ($atom); } # Creates a new Leto::Atom object with its attributes set from a line in the PDB format. sub read_pdb_atom { my $proto = shift; if (ref($proto)) { return(0); } my $string = shift; # The PDB format definition used is the one found in the PDB site documentation (RCSB). # Extracts the attributes of the Atom object from the line given in argument from a PDB file. # The numbers given in argument to the substr function define a field in a PDB line. # The first one indicates where the field begins and the other one the length of that field. my $rec_name = substr($string, 0,6); # Record name of the PDB line, such as ATOM, HETATM, TER... my $atom_nb = substr($string, 6,5); my $atom_name = substr($string, 11,5); my $alt_loc = substr($string, 16,1); # Alternative location my $res_name = substr($string, 17,4); my $chain_id = substr($string, 21,1); my $res_nb = substr($string, 22,4); my $ins_id = substr($string, 26,1); # Insertion identifier my $x = substr($string, 30,8); my $y = substr($string, 38,8); my $z = substr($string, 46,8); my $occup = substr($string, 54,6); # Occupancy my $temp_fact = substr($string, 60,6); # B factor my $seg_id = substr($string, 72,4); my $atom_id = substr($string, 76,2); my $atom_charge = substr($string, 78,2); # Creation of an Atom object with its attributes set to the values previously extracted. my $atom = $proto->new( RECORD_NAME=>$rec_name, ATOM_NUMBER=>$atom_nb, ATOM_NAME=>$atom_name, ALT_LOCATION=>$alt_loc, RESIDUE_NAME=>$res_name, CHAIN=>$chain_id, RESIDUE_NUMBER=>$res_nb, INSERTION=>$ins_id, X=>$x, Y=>$y, Z=>$z, OCCUPANCY=>$occup, BFACTOR=>$temp_fact, SEGMENT=>$seg_id, ELEMENT=>$atom_id, CHARGE=>$atom_charge ); return ($atom); } # Creates a new Leto::Atom object with its attributes set from a line which simply contains the atom name and its coordinates. sub read_xyz_atom { my $proto = shift; if (ref($proto)) { return(0); } # Extracts the 3 coordinates and name attributes from a simple string containing them. my $string = shift; my @items =split (" ", $string); # Creation of an Atom object with its attributes set to the values previously extracted. my $atom = Leto::Atom->new(); $atom->atom_name($items[0]); $atom->x($items[1]); $atom->y($items[2]); $atom->z($items[3]); return ($atom); } # Returns a string containing all the attributes of the Leto::Atom object given in argument in the CRD format. sub print_crd_atom { my $self=shift; my $string = ""; $string .= sprintf("%5d%5d %4s %4s%10.5f%10.5f%10.5f %4s %4s%10.5f\n", $self->atom_number(), $self->residue_number(), $self->residue_name(), $self->atom_name(), $self->x(), $self->y(), $self->z(), $self->segment(), ($self->resid() ? $self->resid() : $self->residue()), $self->bfactor() ); return ($string); } # Returns an ATOM or HETATM line containing all the attributes of the Leto::Atom object given in argument in the PDB format. sub print_pdb_atom { my $self = shift; my $string = ""; $string .= sprintf("%-6s%5d%5s%1s%3s %1s%4d%1s %8.3f%8.3f%8.3f%6.2f%6.2f %-4s%2s%2s\n", $self->record_name(), $self->atom_number(), $self->atom_name(), $self->alt_location(), $self->residue_name(), $self->chain(), $self->residue_number(), $self->insertion(), $self->x(), $self->y(), $self->z(), $self->occupancy(), $self->bfactor(), $self->segment(), $self->element(), $self->charge() ); return ($string); } # Returns an ATOM or HETATM line containing all the attributes of the Leto::Atom object given in argument in the PDB format. sub print_charmm_atom { my $self = shift; my $string = ""; $string .= sprintf("%-6s%5d%5s%1s%3s %4d%1s %8.3f%8.3f%8.3f%6.2f%6.2f %-4s%2s%2s\n", $self->record_name(), $self->atom_number(), $self->atom_name(), $self->alt_location(), $self->residue_name(), $self->residue_number(), $self->insertion(), $self->x(), $self->y(), $self->z(), $self->occupancy(), $self->bfactor(), $self->segment() ); return ($string); } sub print_pdb_atom_ok { my $self = shift; my $string = ""; $string .= sprintf("rec_name=%-6s\natom_num%5d\natom_nam%5s\nalt-loc=%1s\nres_nam=%3s\nchain= %1s\nres_num=%4d\ninsert=%1s\n %8.3f %8.3f %8.3f %6.2f %6.2f %-4s%2s%2s\n", $self->record_name(), $self->atom_number(), $self->atom_name(), $self->alt_location(), $self->residue_name(), $self->chain(), $self->residue_number(), $self->insertion(), $self->x(), $self->y(), $self->z(), $self->occupancy(), $self->bfactor(), $self->segment(), $self->element(), $self->charge() ); return ($string); } # Returns a TER line containing the required attributes of the Leto::Atom object given in argument in the PDB format. sub print_pdb_ter { my $self = shift; my $string = ""; $string .= sprintf("TER %5d %3s %1.1s%4d \n", $self->atom_number()+1, $self->residue_name(), $self->chain(), $self->residue_number()); return ($string); } # Returns an ATOM line containing all the attributes of the Leto::Atom object given in argument # in a format based upon the PDB one and which complies with the CHARMM requirements. sub print_pdb2charmm { my $self = shift; my $string = ""; #$string .= sprintf("ATOM %5d%5s%1s%4s %4d%1s %8.3f%8.3f%8.3f %6.2f %1.1s\n", $self->atom_number(), $self->atom_name(), $self->alt_location(), $self->residue_name(), $self->residue_number(), $self->insertion(), $self->x(), $self->y(), $self->z(), $self->occupancy(), $self->chain() ); $string .= sprintf("ATOM %5d%5s%1s%4s %4d%1s %8.3f%8.3f%8.3f %6.2f %-4.4s\n", $self->atom_number(), $self->atom_name(), $self->alt_location(), $self->residue_name(), $self->residue_number(), $self->insertion(), $self->x(), $self->y(), $self->z(), $self->occupancy(), $self->chain() ); return ($string); } # Returns an ATOM line containing all the attributes of the Leto::Atom object given in argument # in a format based upon the PDB one and which complies with the CHARMM requirements. sub print_pdb_tip3 { my $self = shift; my $string = ""; $string .= sprintf("ATOM %5d%5s TIP3 %4d%1s %8.3f%8.3f%8.3f %6.2f %1s\n", $self->atom_number(), $self->atom_name(), $self->residue_number(), $self->insertion(), $self->x(), $self->y(), $self->z(), $self->occupancy(), $self->chain() ); return ($string); } # Returns a string containing the corrdinates and atom name attributes of the Leto::Atom object given in argument. sub print_xyz_atom { my $self=shift; my $string=""; $string .= sprintf ("%-8s %8.6f %8.6f %8.6f\n", $self->atom_name(), $self->x(), $self->y(), $self->z()); return ($string); } # Computes the distance between 2 Leto::Atom objects given in argument. sub distance { my $proto = shift; if (ref($proto)) { return(0); } my ($atom1,$atom2) = @_; my $distance = sqrt( (($atom1->x()-$atom2->x())*($atom1->x()-$atom2->x())) + (($atom1->y()-$atom2->y())*($atom1->y()-$atom2->y())) + (($atom1->z()-$atom2->z())*($atom1->z()-$atom2->z())) ); return ($distance); } # Takes an array of Leto::Atom objects and returns a dummy Atom object which is the mass center of the ones given in argument. sub mass_center { my $proto = shift; if (ref($proto)) { return (0); } my @atoms_array = @_; my $atom_number = $#atoms_array+1; my $x=0; my $y=0; my $z=0; # 3 variables are initializied, one for each corrdinate. foreach (@atoms_array) { $x += $_->x(); $y += $_->y(); $z += $_->z(); } # Creation of a dummy Leto::Atom object representing the mass center of the atoms given in argument # and which coordinates are the sum of those of each atom divided by the total number of atoms. my $atom = Leto::Atom->new(); $atom->x($x/$atom_number); $atom->y($y/$atom_number); $atom->z($z/$atom_number); $atom->atom_name("DUM"); return ($atom); } # Has a Leto::Atom object translated by adding to its coordinates the ones given in argument . sub trans { my $self = shift; my @move = @_; if ($#move != 2) { return(0); # The method needs 3 coordinates in argument to process the tranlation. } $self->x() += $move[0]; $self->y() += $move[1]; $self->z() += $move[2]; return ($self); } # not corrected yet =for debug sub rotate { use Math::MatrixReal; my $self = shift; my @coors1 = splice(@_,0,3); my @coors2 = splice(@_,0,3); my @this_coors = ($self->x(),$self->y(),$self->z()); my $axpoint1 = Math::MatrixReal->new_from_cols([\@coors1]); my $axpoint2 = Math::MatrixReal->new_from_cols([\@coors2]); my $this_atom = Math::MatrixReal->new_from_rows([\@this_coors]); my $angle=shift(@_); my @xaxis = (1,0,0); my @yaxis = (0,1,0); my @zaxis = (0,0,1); my $ox = Math::MatrixReal->new_from_cols([\@xaxis]); my $oy = Math::MatrixReal->new_from_cols([\@yaxis]); my $oz = Math::MatrixReal->new_from_cols([\@zaxis]); my $axis = $axpoint2 - $axpoint1; my $axisnorm = $axis->length(); # l, m and n, directine cosine on Ox, Oy et Oz my $l = ( $axis->scalar_product($ox) )/$axisnorm; my $m = ( $axis->scalar_product($oy) )/$axisnorm; my $n = ( $axis->scalar_product($oz) )/$axisnorm; my $cosphi = cos($angle); my $sinphi = sin($angle); # we've got everything we need, let us start. # first, we do a translation to bring the first point # on the origin. This translation will be undone later my $copy = $axpoint1->shadow(); $copy = $axpoint1->clone(); # now we translate everyone, that is, $axpoint1, $axpoint2 # and this atom coordinate to transform... $axpoint1 -= $copy; $axpoint2 -= $copy; $this_atom -= ~$copy; # then we create the rotation matrix, with the # help of our friend the directing cosine my $matrix = Math::MatrixReal->new(3,3); $matrix->assign(1,1, ( ($l*$l)+($m*$m + $n*$n)*$cosphi)); $matrix->assign(1,2, ( ($l*$m)*(1-$cosphi)-($n * $sinphi))); $matrix->assign(1,3, ( ($n*$l)*(1-$cosphi)+($m*$sinphi))); $matrix->assign(2,1, ( ($l*$m)*(1-$cosphi)+($n*$sinphi))); $matrix->assign(2,2, ( ($m*$m)+($l*$l + $n*$n)*$cosphi)); $matrix->assign(2,3, ( ($n*$m)*(1-$cosphi)-($l*$sinphi))); $matrix->assign(3,1, ( ($n*$l)*(1-$cosphi)-($m*$sinphi))); $matrix->assign(3,2, ( ($n*$m)*(1-$cosphi)+($l*$sinphi))); $matrix->assign(3,3, ( ($n*$n)+($m*$m + $l*$l)*$cosphi)); # we apply the transformation to our coordinates $this_atom = $this_atom*$matrix; # we apply the opposite of the previous translation $this_atom += ~$copy; # and we set this atom coordinates to the new ones, youpi $self->coordinates( ($this_atom->element(1,1), $this_atom->element(1,2), $this_atom->element(1,3)) ); return $self; } =cut 1; #-------------------------------------------------------------------------------------------------------------------------------------------# package Leto::GrpAtoms; # Constructor of the Leto::GrpAtoms objects. # Usage : Leto::GrpAtom->new() or Leto::GrpAtoms->new({ATOM_NAME=>CA},{RESIDUE_NUMBER=>25}) # Sends the attributes of the newly created objects to the _init method when arguments are passed to it. # If no argument is passed to it, the _init method initializes all the attributes of the object to the undef value. sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self ={}; bless ($self, $class); $self->_init(@_); return ($self); } sub _init { my $self = shift; $self->{NAME} = undef; $self->{CHAIN} = undef; $self->{NUMBER} = undef; $self->{INSERTION} = undef; $self->{SEGMENT} = undef; $self->{UP_REF} = undef; $self->{SUB_LIST} = []; # If some arguments have been passed to _init via new(), the corresponding attributes take their values. if (@_) { my %extra = @_; @$self{keys %extra} = values %extra; } } # Automatic generation of closure accessors. # Creates when called the methods to access each of the object attributes. # As for all the object methods corresponding to an object attribute, passing an argument to the method will set the attribute to the value given in argument. # If no argument is passed to the object method, the current value of the corresponding attribute is returned. for my $champ (qw(NAME NUMBER CHAIN INSERTION SEGMENT UP_REF)) { my $meth = __PACKAGE__ . "::" . (lc $champ); no strict "refs"; *$meth = sub { my $self = shift; $self->{$champ} = shift if @_; return ($self->{$champ}); } } # Method to access the sub_list attribute which takes an inventory of the objects belonging to the Leto::GrpAtoms object given in argument. # Apart from the other accessor because returns an array, and not a scalar like the other attributes methods. sub sub_list { my $self = shift; @{$self->{SUB_LIST}} = shift if @_; return (\@{$self->{SUB_LIST}}); } # Method to add objects to the the sub_list attribute which is an inventory of the objects belonging to the Leto::GrpAtoms object given in argument. sub add_item { my $self = shift; if (@_) { my @items_to_add = @_; # Adds items to the sub_list of the current Leto::GrpAtoms object, does not replace any existing one. foreach (@items_to_add) { push (@{ $self->{SUB_LIST} }, $_); } } return (\@{$self->{SUB_LIST}}); # Returns the atom list. Could be useful. } # Method to take an inventory of items which numbers are given in argument. # Interrogates the sub_list method with the atoms numbers. sub index_item { my $self = shift; my @array = []; # Array to contain the atoms which numbers are given in argument. my @indexes = []; # Array containing the atoms numbers. if (@_) { @indexes = @_; } else { return (undef); } my @allatoms = $self->sub_list(); for (my $i=0; $i<=(@indexes-1); $i++) { $array[$i] = \%{$allatoms[$indexes[$i]]}; # Fatal error if the index is not found in the sub_list attribute. die "\nThere is no item with this index in this residue/molecule\n" unless (defined($array[$i])); } return (\@array); } # Computes the distance between two Leto::GrpAtoms objects. # What it really does is determining the mass center for each one and calculating the distance between them. sub distance_mass_center { my $proto = shift; if (ref($proto)) { return(0); } my $mol1 = shift; my $mol2 = shift; # Determines the mass center of each of the 2 GrpAtoms by calling the sub_list attribute and giving it to the Leto::Atom->mass_center method. my $mass1 = Leto::Atom->mass_center($mol1->sub_list()); my $mass2 = Leto::Atom->mass_center($mol2->sub_list()); my $distance = Leto::Atom->distance($mass1, $mass2); return ($distance); } # Has a Leto::GrpAtoms object translated by adding to its coordinates the ones given in argument . sub trans { my $self = shift; my @move = @_; # The method needs 3 coordinates in argument to process the translation. if ($#move != 2) { return (0); } # Does a tranlation of each of the Leto::Atom comprised in the GrpAtoms by calling the trans method from the Leto::Atom package. foreach ($self->sub_list()) { $_->trans(@move); } return (1); } # Computes the Root Mean Square Distance between 2 GrpAtoms. sub rmsd { my $proto = shift; if (ref($proto)) { return (0); } my $mol1 = shift; # First GrpAtoms. my $mol2 = shift; # Second GrpAtoms. my @atoms1 = $mol1->sub_list(); # All the atoms from the 1st GrpAtoms. my @atoms2 = $mol2->sub_list(); # All the atoms from the sedond GrpAtoms. # The method needs the same number of atoms in the 2 GrpAtoms to process the RMSD calculation. if ($#atoms1 != $#atoms2) { return (undef); } my $sumdist = 0.0; foreach $index (0..$#atoms1) { my $distance = Leto::Atom->distance($atoms1[$index],$atoms2[$index]); # Distance between the of the atoms from the 2 GrpAtoms 2 by 2. $sumdist += ($distance * $distance); # Sum of the distances between each atoms pairs. } # Calculation of the RMSD as the sum of the distances of the atoms pairs divided by the total number of atoms pairs. my $rmsd = sqrt ($sumdist/($#atoms1+1)); return ($rmsd); } # Reads a PDB file from the Protein Data Bank and records atoms data into arrays of objects. sub read_pdb { my $proto = shift; return (0) if (ref($proto)); my ($lines_ref,$complex) = @_; my @complexes; # Array referencing all the chains in the PDB. my @ssbond; # Array referencing the disulfide bonds. my @chains_array; # Array refering to the chains if any. my @res_array; # Array refering to all the residues. my @atoms_array; # Array refering to all the atoms. my @hetatm; # Array containing the line of the hetatoms (except water and ligand) my @het; # Array containing the name of hetatoms present in the PDB file my $line_index = 0; $water=0; # initialization of the variable that allows to know if the user want water in his system $compnd=0; # initialization of the variable that allows to know if there is a line with compnd chain in the PDB file # Checking of the state of the Complex variable which defines the chains forming the complex to be studied using an option at the execution of the script. # If so, its value is the 2 identifiers of the chosen chains. if ($complex =~ /[A-Z]+/) { ($fc,$sc) = split (//,$complex); } ## READING THE FILE A FIRST TIME # Allows to read the file a first time to extract the name of chains and hetatoms (if there are them). foreach $i(0..$#{$lines_ref}) { if ($lines_ref->[$i]=~/^ATOM/) { if (substr($lines_ref->[$i],21,1) ne $complexes[-1]) { if ((substr($lines_ref->[$i],21,1) eq " ") && ($complexes[-1] ne "NULL")){ push (@complexes,"NULL"); } if (substr($lines_ref->[$i],21,1) ne " ") { push (@complexes,substr($lines_ref->[$i],21,1)); } } } elsif (($lines_ref->[$i]=~/^HETATM/) && ($lines_ref->[$i] !~/HOH/)) { if (substr($lines_ref->[$i],17,4) ne $het[-1]) { push (@het,substr($lines_ref->[$i],17,4)); } } elsif ($lines_ref->[$i]=~/HOH/) { $presence_hoh=1; } } # this part allows to ask to user which chain he wants in his system. print "\tWhich chain would you like to form your system ? (if the name is NULL there is only one chain without name)\n"; # The user must type his choice. foreach $c (0..$#complexes) { print "\t\t$complexes[$c]\n"; } print "\tType the name of the chain separated with comma : "; chomp ($sys = ); $sys=uc ($sys); @ch=split (/,/,$sys); # the array ch contains the name of each chain of the system $nb_ch_sys=$#ch+1; # variable that contains the number of chain of the system $chaine="\\b$ch[0]"; foreach $xx(1..$#ch) { $chaine="$chaine|\\b$ch[$xx]"; # this variable contains the name of each chain separated with a pipe and with \b } # to not confuse A and AT1 for example # this part allows to ask to user which hetatom he wants in his system if there are them. if (defined @het) { $sys_het="not"; # initialization of the variable that allows to know if the system contains or not heteratom. print "\tWhich hetatom would you like to form your system ?\n"; foreach $c (0..$#het) { print "\t\t$het[$c]\n"; } # The user must type in his choice. print "\tType the name of the hetatom separated with comma (or type not if none heteroatom) :"; chomp ($sys_het = ); if ($sys_het ne "not") { # if the user want heteroatom in his system $sys_het=uc ($sys_het); @het_sys=split (/,/,$sys_het); # the array het_sys contains the name of each hetatom of the system $nb_het_sys=$#het_sys+1; # variable that contains the number of hetatom of the system $hetero="\\b$het_sys[0]"; foreach $xx(1..$#het_sys) { $hetero="$hetero|\\b$het_sys[$xx]"; # this variable contains the name of each hetatom separated with a pipe } } } # this part allows to ask to user if he wants a chain with water molecule if there are them. if ($presence_hoh==1) { print "\tWould you a chain with water molecule? (y or n) :"; chomp ($eau =); if ($eau eq "y") { $water=1; } else { $water=0; } } ## READING THE SSBOND FILE IF IT EXIST if (-e "dfsep/ssbond.dat") { open (SSBOND, "< dfsep/ssbond.dat") || die "Unable to open the file dfsep/ssbond.dat : $!\n"; @ssbond=; close(SSBOND); } ## READING THE COORDINATES IN THE PDB # Running along the PDB file. while ($line_index <= $#{$lines_ref}) { # Identification of a water molecule. if ((substr($lines_ref->[$line_index],17,3) =~ /HOH/) && ($water==1)) { my $new_chain = Leto::GrpAtoms->new(); # Creation of a new GrpAtoms object. $new_chain->name("W"); # Initialization of the chain identifier attribute. # Recording of each water molecule. while ($lines_ref->[$line_index] =~ /HOH/) { my $current_resid = substr ($lines_ref->[$line_index], 22,4); # Initialization of the residue number. # Creation of the new residue GrpAtoms object recorded in the residue array. push (@res_array, my $new_res = Leto::GrpAtoms->new()); # Creation of the new Atom object recorded in the atoms array. push (@atoms_array, my $new_atom = Leto::Atom->read_pdb_atom($lines_ref->[$line_index])); # Here the residue corresponds to the single O atom because of the abscence of hydrogen atoms. # Setting up of different attributes of the object : $new_atom->chain("W"); $new_atom->up_ref(\$new_chain); # Reference to the chain GrpAtoms comprising the Atom object. $new_res->up_ref(\$new_chain); # Reference to the chain GrpAtoms comprising the residue GrpAtoms object. $new_res->name($new_atom->residue_name()); $new_res->chain("W"); $new_res->number($current_resid); $new_chain->add_item($new_atom); $line_index++; } # Recording of the Water chain in the chains array. push (@chains_array, $new_chain); } # Identification of a HETATM line (excluded water molecule). elsif (($lines_ref->[$line_index] =~ /^HETATM/) && ($lines_ref->[$line_index] !~ /HOH/)) { if (($lines_ref->[$line_index] =~ /($hetero)/) && ($sys_het ne "not")) { my $new_chain = Leto::GrpAtoms->new(); # Creation of a new GrpAtoms object. $new_chain->name("Z"); # Initialization of the chain identifier attribute. my $new_res = Leto::GrpAtoms->new(); # Creation of a new GrpAtoms object. $new_res->name(substr ($lines_ref->[$line_index], 17,4)); # Initialization of the chain identifier attribute. $new_res->up_ref(\$new_chain); # Reference to the chain GrpAtoms comprising the residue GrpAtoms object. $new_res->number(substr ($lines_ref->[$line_index], 22,4)); # Initialization of the residue number. $new_res->chain("Z"); # Recording of each hetero-atom. while ($lines_ref->[$line_index] =~ /($hetero)/) { # Creation of the new Atom object recorded in the atoms array. push (@atoms_array, my $new_atom = Leto::Atom->read_pdb_atom($lines_ref->[$line_index])); # Here the residue corresponds to the single O atom because of the abscence of hydrogen atoms. # Setting up of different attributes of the object : $new_atom->chain("Z"); $new_atom->up_ref(\$new_chain); # Reference to the chain GrpAtoms comprising the Atom object. $new_res->add_item($new_atom); $new_chain->add_item($new_atom); $line_index++; } # Recording of the ligand chain identifier in the chains array. push (@res_array, $new_res); # Recording of the ligand chain identifier in the chains array. push (@chains_array, $new_chain); } # The HETATM isn't a water molecule nor a ligand. else { push (@hetatm,$lines_ref->[$line_index]); $line_index++; } } # Identification of an ATOM line. elsif (($lines_ref->[$line_index] =~ /^ATOM/) && ((substr($lines_ref->[$line_index],21,1) =~ /($chaine)/) || ($chaine eq "NULL"))) { my $new_chain = Leto::GrpAtoms->new(); # Creation of a chain GrpAtoms object. my $current_chainid = substr($lines_ref->[$line_index], 21,1); # Initialization with the chain identifier. # Proceeds chain by CHAIN : ## begin CHAIN ## while ((substr($lines_ref->[$line_index], 21,1)) eq $current_chainid) { my $new_res = Leto::GrpAtoms->new(); # Creation of a new residue GrpAtoms object. my $current_resid = substr($lines_ref->[$line_index], 22,4); # Initialization with the residue number. # Proceeds residue by RESIDUE : ## begin RESIDUE ## while ((substr($lines_ref->[$line_index], 22,4)) == $current_resid) { # Creation of the new Atom object recorded in the atoms array. push (@atoms_array, my $new_atom = Leto::Atom->read_pdb_atom($lines_ref->[$line_index])); # Identification of CYS which numbers have been recorded in the @ssbond array. foreach $i(1..$#ssbond) { chomp ($ssbond[$i]); @bond=split(/\t/,$ssbond[$i]); # Puts space to compare this number with the residue_number which is in 4 caracters if (length($bond[1])==1) { $bond[1]=" $bond[1]"; } if (length($bond[1])==2) { $bond[1]=" $bond[1]"; } if (length($bond[1])==3) { $bond[1]=" $bond[1]"; } if (($new_atom->residue_number() == $bond[1]) && ($new_atom->chain() eq $bond[0]) && ($new_atom->residue_name() =~ /CYS /) && ($new_atom->atom_name() =~/ C /)) { # Setting up of a new attribute representing the number of the residue the current atom makes a disulfide bond with. $new_atom->ssbond_res($bond[3]); # Setting up of a new attribute representing the chain identifier of the residue the current atom makes a disulfide bond with. $new_atom->ssbond_chain($bond[2]); last; } } # Setting up of different attributes of the object : # Identification of a single chain in the PDB file (which has consequently no identifier). if ($new_atom->chain() eq " ") { $new_atom->chain("A"); } $new_atom->up_ref(\$new_res); # Reference to the residue GrpAtoms comprising the Atom object. $new_res->up_ref(\$new_chain); # Reference to the chain GrpAtoms comprising the residue GrpAtoms object. $new_res->add_item($new_atom); $new_res->name($new_atom->residue_name()); $new_res->chain($new_atom->chain()); $new_res->number($current_resid); $new_res->insertion($new_atom->insertion()); $new_res->segment($new_atom->segment()); $line_index++; # To prevent the program to record the TER line at the end of each chain. if ($lines_ref->[$line_index] !~ /^ATOM/) { last; } } ## end RESIDUE ## # The next ATOM line no longer belongs to the same residue : $new_chain->add_item($new_res); $new_chain->name($new_res->chain()); # Recording of the residue GrpAtoms object in the residue array. push (@res_array, $new_res); # To prevent the program to record the TER line at the end of each chain. if ($lines_ref->[$line_index] !~ /^ATOM/) { last; } } ## end CHAIN ## # Last residue of the chain my $chain_id = substr($lines_ref->[$line_index-1], 21,1); my $name = "$chain_id"; $na = substr($lines_ref->[$line_index-1], 22,4); $last{$name} = $na; # Recording of the chain GrpAtoms object in the chains array. push (@chains_array, $new_chain); } # The line was not a SSBOND/ATOM/HETATM/TER line, goes to the next line. else { $line_index++; } } # Returns the chains residues and atoms arrays as well as the last residues hash. return (\@chains_array, \@res_array, \@atoms_array, \%last, \@hetatm, $nb_ch_sys, $nb_het_sys, $water); } # Reads a PDB file from the Charmm and records atoms data into arrays of objects. sub read_pdb_charmm { my $proto = shift; return (0) if (ref($proto)); my ($lines_ref,$complex) = @_; my @complexes; # Array referencing all the chains in the pdb file. my @het; # Array containing the name of hetatoms present in the pdb file. my @ssbond; # Array referencing the disulfide bonds. my @chains_array; # Array refering to the chains if any. my @res_array; # Array refering to all the residues. my @atoms_array; # Array refering to all the atoms. my $presence_hoh = 0; #Check if there is water in the file $sys_het="not"; # initialization of the variable that allows to know if the system contains or not heteratom. my $line_index = 0; # Checking of the state of the Complex variable which defines the chains forming the complex to be studied using an option at the execution of the script. # If so, its value is the 2 identifiers of the chosen chains. if ($complex =~ /[A-Z]+/) { ($fc,$sc) = split (//,$complex); } ## READING THE FILE A FIRST TIME # Allows to read the file a first time to extract the name of chains and hetatoms (if there are them). foreach $i(0..$#{$lines_ref}) { if ($lines_ref->[$i]=~/^ATOM/) { # allows to put the name NULL if the file contains a single chain without name. # when the length of the line is lower than 76 there is a single chain without name. if (((substr($lines_ref->[$i],72,4) eq " ") || (length($lines_ref->[$i])<76)) && ($complexes[-1] ne "NULL")) { push (@complexes,"NULL"); } # allows to put in the array containing the name of chain if ((substr($lines_ref->[$i],72,4) ne $complexes[-1]) && (length($lines_ref->[$i])>76)) { if (substr($lines_ref->[$i],72,4) ne " ") { push (@complexes,substr($lines_ref->[$i],72,4)); } } } elsif (($lines_ref->[$i]=~/^HETATM/) && ($lines_ref->[$i]!~/TIP3/) && ($lines_ref->[$i]!~/HOH/)) { if (substr($lines_ref->[$i],17,4) ne $het[-1]) { push (@het,substr($lines_ref->[$i],17,4)); } } elsif (($lines_ref->[$i]=~/TIP3/) || ($lines_ref->[$i]=~/HOH/)) { $presence_hoh=1; } } # this part allows to ask to user which chain he wants in his system. print "\tWhich chain would you like to form your system ? (if the name is NULL there is only one chain without name)\n"; # The user must type his choice. foreach $c (0..$#complexes) { print "\t\t$complexes[$c]\n"; } print "\tType the name of the chain separated with comma : "; chomp ($sys = ); $sys=uc ($sys); @ch=split (/,/,$sys); # the array ch contains the name of each chain of the system $nb_ch_sys=$#ch+1; # variable that contains the number of chain of the system $chaine="\\b$ch[0]"; foreach $xx(1..$#ch) { $chaine="$chaine|\\b$ch[$xx]"; # this variable contains the name of each chain separated with a pipe and with \b } # to not confuse A and AT1 for example $chaine="$chaine\\b"; # this part allows to ask to user which hetatom he wants in his system if there are them. if (defined @het) { print "\tWhich hetatom would you like to form your system ?\n"; foreach $c (0..$#het) { print "\t\t$het[$c]\n"; }# Last residue of the chain my $seg_id = substr($lines_ref->[$line_index-1], 72,4); $seg_id =~ s/\s*//g; my $name = "$seg_id"; $na = substr($lines_ref->[$line_index-1], 22,4); $last{$name} = $na; # The user must type in his choice. print "\tType the name of the hetatom separated with comma (or type not if none heteroatom) :"; chomp ($sys_het = ); if ($sys_het ne "not") { # if the user want heteroatom in his system $sys_het=uc ($sys_het); @het_sys=split (/,/,$sys_het); # the array het_sys contains the name of each hetatom of the system $nb_het_sys=$#het_sys+1; # variable that contains the number of hetatom of the system $hetero="\\b$het_sys[0]"; foreach $xx(1..$#het_sys) { $hetero="$hetero|\\b$het_sys[$xx]"; # this variable contains the name of each hetatom separated with a pipe } } } # this part allows to ask to user if he wants a chain with water molecule if there are them. if ($presence_hoh==1) { print "\tWould you a chain with water molecule? (y or n) :"; chomp ($eau =); if ($eau eq "y") { $water=1; } else { $water=0; } } ## READING THE SSBOND FILE IF IT EXISTS if (-e "dfsep/ssbond.dat") { open (SSBOND, "< dfsep/ssbond.dat") || die "Unable to open the file dfsep/ssbond.dat : $!\n"; @ssbond=; close(SSBOND); } ## READING THE COORDINATES IN THE PDB # Running along the PDB file. while ($line_index <= $#{$lines_ref}) { my $seg_id = substr($lines_ref->[$line_index], 72,4); $seg_id =~ s/\s*//g; # Identification of a water molecule. if (((substr($lines_ref->[$line_index],17,3) =~ /HOH/) || (substr($lines_ref->[$line_index],17,3) =~ /TIP/)) && ($water==1)) { my $new_chain = Leto::GrpAtoms->new(); # Creation of a new GrpAtoms object. $new_chain->name("W"); # Initialization of the chain identifier attribute. # Recording of each water molecule. while (($lines_ref->[$line_index] =~ /HOH/) || ($lines_ref->[$line_index] =~ /TIP/)) { my $current_resid = substr ($lines_ref->[$line_index], 22,4); # Initialization of the residue number. # Creation of the new residue GrpAtoms object recorded in the residue array. push (@res_array, my $new_res = Leto::GrpAtoms->new()); # Creation of the new Atom object recorded in the atoms array. push (@atoms_array, my $new_atom = Leto::Atom->read_pdb_atom($lines_ref->[$line_index])); # Here the residue corresponds to the single O atom because of the abscence of hydrogen atoms. # Setting up of different attributes of the object : $new_atom->chain("W"); $new_atom->up_ref(\$new_chain); # Reference to the chain GrpAtoms comprising the Atom object. $new_res->up_ref(\$new_chain); # Reference to the chain GrpAtoms comprising the residue GrpAtoms object. $new_res->name($new_atom->residue_name()); $new_res->chain("W"); $new_res->number($current_resid); $new_chain->add_item($new_atom); $line_index++; } # Recording of the Water chain in the chains array. push (@chains_array, $new_chain); } # Identification of a heteroatom. elsif (($lines_ref->[$line_index] =~ /^HETATM/) && (substr($lines_ref->[$line_index],17,3) =~ /($hetero)/) && ($sys_het ne "not") && (($lines_ref->[$line_index] !~ /HOH/) || ($lines_ref->[$line_index] !~ /TIP/))) { my $new_chain = Leto::GrpAtoms->new(); # Creation of a new GrpAtoms object. $new_chain->name("Z"); # Initialization of the chain identifier attribute. my $new_res = Leto::GrpAtoms->new(); # Creation of a new GrpAtoms object. $new_res->name(substr ($lines_ref->[$line_index], 17,4)); # Initialization of the chain identifier attribute. $new_res->up_ref(\$new_chain); # Reference to the chain GrpAtoms comprising the residue GrpAtoms object. $new_res->number(substr ($lines_ref->[$line_index], 22,4)); # Initialization of the residue number. $new_res->chain("Z"); # Recording of each hetero-atom. while (($lines_ref->[$line_index] =~ /($hetero)/) && ($lines_ref->[$line_index] =~ /^HETATM/)) { # Creation of the new Atom object recorded in the atoms array. push (@atoms_array, my $new_atom = Leto::Atom->read_pdb_atom($lines_ref->[$line_index])); # Removes the space in the segment $seg=$new_atom->segment(); $seg=~s/\s*//g; $new_atom->segment($seg); # Here the residue corresponds to the single O atom because of the abscence of hydrogen atoms. # Setting up of different attributes of the object : $new_atom->chain("Z"); $new_atom->up_ref(\$new_chain); # Reference to the chain GrpAtoms comprising the Atom object. $new_res->add_item($new_atom); $new_chain->add_item($new_atom); $line_index++; } # Recording of the ligand chain identifier in the chains array. push (@res_array, $new_res); # Recording of the ligand chain identifier in the chains array. push (@chains_array, $new_chain); $line_index++; } # Identification of an ATOM line. elsif (($lines_ref->[$line_index] =~ /^ATOM/) && (($seg_id =~ /($chaine)/) || ($chaine =~/NULL/)) ) { my $new_chain = Leto::GrpAtoms->new(); # Creation of a chain GrpAtoms object. my $current_segid = substr($lines_ref->[$line_index],72,4); # Initialization with the segid identifier. # Proceeds segid by SEGID : ## begin SEGID ## while ((substr($lines_ref->[$line_index], 72,4)) eq $current_segid) { my $new_res = Leto::GrpAtoms->new(); # Creation of a new residue GrpAtoms object. my $current_resid = substr($lines_ref->[$line_index],22,4); # Initialization with the residue number. # Proceeds residue by RESIDUE : ## begin RESIDUE ## # allow to not continue if there is only one residue 1 while (((substr($lines_ref->[$line_index],22,4)) == $current_resid) && (substr($lines_ref->[$line_index], 72,4) eq $current_segid)) { # Creation of the new Atom object recorded in the atoms array. push (@atoms_array, my $new_atom = Leto::Atom->read_pdb_atom($lines_ref->[$line_index])); # Removes the space in the segment $seg=$new_atom->segment(); $seg=~s/\s*//g; $new_atom->segment($seg); $new_atom->chain($seg); # Identification of CYS which numbers have been recorded in the @ssbond array.. foreach $i(1..$#ssbond) { @bond=split(/\t/,$ssbond[$i]); # Puts space to compare this number with the residue_number which is in 4 caracters if (length($bond[1])==1) { $bond[1]=" $bond[1]"; } if (length($bond[1])==2) { $bond[1]=" $bond[1]"; } if (length($bond[1])==3) { $bond[1]=" $bond[1]"; } if (($new_atom->residue_number() == $bond[1]) && ($new_atom->segment() eq $bond[0]) && ($new_atom->residue_name() =~ /CYS /) && ($new_atom->atom_name() =~/ C /)) { # Setting up of a new attribute representing the number of the residue the current atom makes a disulfide bond with. $new_atom->ssbond_res($bond[3]); # Setting up of a new attribute representing the chain identifier of the residue the current atom makes a disulfide bond with. $new_atom->ssbond_chain($bond[2]); last; } } # Identification of a single chain in the pdb file (which has consequently no identifier). if ($new_atom->segment() eq "") { $new_atom->segment("A"); $new_atom->chain("A") } $new_atom->up_ref(\$new_res); # Reference to the residue GrpAtoms comprising the Atom object. $new_res->up_ref(\$new_chain); # Reference to the chain GrpAtoms comprising the residue GrpAtoms object. $new_res->add_item($new_atom); $new_res->name($new_atom->residue_name()); $new_res->number($current_resid); $new_res->insertion($new_atom->insertion()); $new_res->segment($new_atom->segment()); $line_index++; # To prevent the program to record the TER line at the end of each chain. if ($lines_ref->[$line_index] !~ /^ATOM/) { last; } } ## end RESIDUE ## # The next ATOM line no longer belongs to the same residue : $new_chain->add_item($new_res); $new_chain->name($new_res->segment()); # Recording of the residue GrpAtoms object in the residue array. push (@res_array, $new_res); # To prevent the program to record the TER line at the end of each chain. if ($lines_ref->[$line_index] !~ /^ATOM/) { last; } } # Last residue of the chain my $seg_id = substr($lines_ref->[$line_index-1], 72,4); $seg_id =~ s/\s*//g; my $name = "$seg_id"; $na = substr($lines_ref->[$line_index-1], 22,4); $last{$name} = $na; ## end CHAIN ## # Recording of the chain GrpAtoms object in the chains array. push (@chains_array, $new_chain); } # The line was not a SSBOND/ATOM/HETATM line, goes to the next line. else { $line_index++; } } # Returns the chains residues and atoms arrays as well as the last residues hash. return (\@chains_array, \@res_array, \@atoms_array, \%last, \@hetatm, $nb_ch_sys,$nb_het_sys,$water); } # Corrects the attributes of some Atom objects to comply with the CHARMM standard. sub pdb2charmm { my $proto = shift; return (0) if (ref($proto)); my ($atoms_ref,$last_ref,$type) = @_; my $index = 0; my (@before,@after); # Arrays recording the modifications done by this function. # Runs along the atoms array. while ($index <= $#{$atoms_ref}) { # Identification of an alternative location for the current Atom object. if ($atoms_ref->[$index]->alt_location() =~ /[A-Z]/) { # First location for the Atom. if ($atoms_ref->[$index]->alt_location() =~ /A/) { $occup_max = $atoms_ref->[$index]->occupancy(); # Initialization of an occupancy value which represents the maximum probability for an alternative location to occur. $atoms_ref->[$index]->alt_location(""); # Reset of the alternative location attribute. push (@before,join(' ',$atoms_ref->[$index]->atom_number(),$atoms_ref->[$index]->atom_name(),$atoms_ref->[$index]->residue_name(),$atoms_ref->[$index]->residue_number(),$atoms_ref->[$index]->chain())); push (@after,"alt_location"); } else { # The current Atom has a greater occupancy than the last one. if ($atoms_ref->[$index]->occupancy() > $occup_max) { $occup_max = $atoms_ref->[$index]->occupancy(); # Its value is the new maximum occupancy. $atoms_ref->[$index]->alt_location(""); # Reset of the alternative location attribute. # Deletion of the previous Atom object from the atoms array. splice (@{$atoms_ref},$index-1,1); $index--; # The index must be set to the correct value to prevent the program to skip an Atom object. } # The current Atom has a lower occupancy than the last one. else { $atoms_ref->[$index]->alt_location(""); # Reset of the alternative location attribute. # Deletion of the current Atom object from the atoms array. splice (@{$atoms_ref},$index,1); $index--; # The index must be set to the correct value to prevent the program to skip an Atom object. } } } # Water atom name HOH corrected in OH2. if ($atoms_ref->[$index]->residue_name() eq "HOH ") { push (@before,join(' ',$atoms_ref->[$index]->atom_number(),$atoms_ref->[$index]->atom_name(),$atoms_ref->[$index]->residue_name(),$atoms_ref->[$index]->residue_number(),$atoms_ref->[$index]->chain())); $atoms_ref->[$index]->atom_name("OH2"); push (@after,join(' ',$atoms_ref->[$index]->atom_number(),$atoms_ref->[$index]->atom_name(),$atoms_ref->[$index]->residue_name(),$atoms_ref->[$index]->residue_number(),$atoms_ref->[$index]->chain())); } # Residue name HIS corrected in HSP. if ($atoms_ref->[$index]->residue_name() eq "HIS ") { push (@before,join(' ',$atoms_ref->[$index]->residue_name(),$atoms_ref->[$index]->residue_number(),$atoms_ref->[$index]->chain())); $atoms_ref->[$index]->residue_name("HSP "); push (@after,join(' ',$atoms_ref->[$index]->residue_name(),$atoms_ref->[$index]->residue_number(),$atoms_ref->[$index]->chain())); } # Isoleucine atom name CD1 corrected in CD. if (($atoms_ref->[$index]->residue_name() eq "ILE ") && ($atoms_ref->[$index]->atom_name() =~ /CD1/)) { push (@before,join(' ',$atoms_ref->[$index]->atom_number(),$atoms_ref->[$index]->atom_name(),$atoms_ref->[$index]->residue_name(),$atoms_ref->[$index]->residue_number(),$atoms_ref->[$index]->chain())); $atoms_ref->[$index]->atom_name("CD "); push (@after,join(' ',$atoms_ref->[$index]->atom_number(),$atoms_ref->[$index]->atom_name(),$atoms_ref->[$index]->residue_name(),$atoms_ref->[$index]->residue_number(),$atoms_ref->[$index]->chain())); } # Atom name OXT corrected in OT2. if ($atoms_ref->[$index]->atom_name =~ /OXT/) { push (@before,join(' ',$atoms_ref->[$index]->atom_number(),$atoms_ref->[$index]->atom_name(),$atoms_ref->[$index]->residue_name(),$atoms_ref->[$index]->residue_number(),$atoms_ref->[$index]->chain())); $atoms_ref->[$index]->atom_name("OT2"); push (@after,join(' ',$atoms_ref->[$index]->atom_number(),$atoms_ref->[$index]->atom_name(),$atoms_ref->[$index]->residue_name(),$atoms_ref->[$index]->residue_number(),$atoms_ref->[$index]->chain())); } # Terminal O of the last residue of a chain corrected in OT1. if (((($atoms_ref->[$index]->residue_number() == $last_ref->{$atoms_ref->[$index]->chain()}) && ($type==1)) || (($atoms_ref->[$index]->residue_number() == $last_ref->{$atoms_ref->[$index]->segment()}) && ($type==2))) && ($atoms_ref->[$index]->atom_name() =~ / O /) && (Leto::GrpAtoms->is_aa($atoms_ref->[$index]->residue_name()))) { push (@before,join(' ',$atoms_ref->[$index]->atom_number(),$atoms_ref->[$index]->atom_name(),$atoms_ref->[$index]->residue_name(),$atoms_ref->[$index]->residue_number(),$atoms_ref->[$index]->chain())); $atoms_ref->[$index]->atom_name("OT1"); push (@after,join(' ',$atoms_ref->[$index]->atom_number(),$atoms_ref->[$index]->atom_name(),$atoms_ref->[$index]->residue_name(),$atoms_ref->[$index]->residue_number(),$atoms_ref->[$index]->chain())); } $index++; } # Returns the corrected atoms array. return ($atoms_ref,\@before,\@after); } # RenumbePATCHING FIRST NONE LAST NONErs from 1 the atoms and residues of a PDB. sub renum_pdb { my $proto = shift; return (0) if (ref($proto)); my ($atoms_ref,$last_ref,$Verbose,$type) = @_; my $index = 0; my $atom_n = 1; # Initialization of the atom number. my $res_n = 1; # Initialization of the residue number. # Runs along the atoms array. # while ($index <= $#{$atoms_ref}) { while ($index < $#{$atoms_ref}+1) { my $chain_id = $atoms_ref->[$index]->chain(); my $seg_id = $atoms_ref->[$index]->segment(); # Proceeds chain by CHAIN : ## begin CHAIN ## while ((($atoms_ref->[$index]->chain() eq $chain_id) && ($type==1)) || (($atoms_ref->[$index]->segment() eq $seg_id) && ($type==2))) { my $res_id = $atoms_ref->[$index]->residue_number(); # Proceeds residue by RESIDUE : ## begin RESIDUE ## while (($atoms_ref->[$index]->residue_number() == $res_id) && ((($atoms_ref->[$index]->chain() eq $chain_id) && ($type==1)) || (($atoms_ref->[$index]->segment() eq $seg_id) && ($type==2)))) { # Identification of an Atom involved in a disulfide bond. if (defined $atoms_ref->[$index]->ssbond_res()) { my $offset = $atoms_ref->[$index]->residue_number()-$res_n; # Correction of the number of the other residue involved in the disulfide bond according to the offset of the current Atom. $atoms_ref->[$index]->ssbond_res($atoms_ref->[$index]->ssbond_res()-$offset); } $atoms_ref->[$index]->atom_number($atom_n); $atoms_ref->[$index]->residue_number($res_n); $atom_n++; $index++; # To prevent the program to run beyond the length of the atoms array. if (!defined $atoms_ref->[$index]) { last; } } ## end RESIDUE ## $res_n++; # To prevent the program to run beyond the length of the atoms array. if (!defined $atoms_ref->[$index]) { last; } } ## end CHAIN ## # Correction of the last residues %last hash. $last_ref->{$chain_id} = $res_n-1; if ($chain_id !~ /(H|W)/) { warn "\tChain $chain_id : $last_ref->{$chain_id} residues.\n" if ($Verbose == 1); } $res_n = 1; # Reset of the residue number for the next chain. } # Returns the corrected atoms array and last residues hash. return ($atoms_ref,$last_ref); } # Allows to know if a compound is an amino acid. sub is_aa { my $proto = shift; return (0) if (ref($proto)); my ($to_check) = @_; $to_check=~s/\s*//g; # remove the space in the variable foreach $RES (GLY,TYR,THR,CYS,SER,ILE,VAL,ALA,ARG,PHE,ASN,PRO,LEU,GLU,ASP,GLN,HIS,LYS,MET,TRP) { if($to_check eq $RES) { $aa=1; # return 1 if the variable is an amino acid last; } else { $aa=0; # return 0 if the variable is not an amino acid } } return ($aa); } # Split the chains from a PDB file and writes as much PDB files as the number of chains. CHARMM compliant. sub split_pdb2charmm { my $proto = shift; return (0) if (ref($proto)); my ($atoms_ref,$filin,$type) = @_; # Determination of the PDB file name. my ($fname,$ext) = split ('\.',$filin); my $index = 0; my @pdb_files; # Runs along the atoms array. while ($index < $#{$atoms_ref}+1) { $seg_id = $atoms_ref->[$index]->segment(); $chain_id = $atoms_ref->[$index]->chain(); # Generation of the output PDB file names. if (($type==1) || (($type==2) && ($chain_id eq "Z"))) { $file_name = lc("$fname\_$chain_id"); } if (($type==2) && ($chain_id ne "Z")) { $file_name = lc("$fname\_$seg_id"); } open (FILOUT, "> scr/$file_name.$ext") || die "Unable to create the file scr/$file_name.$ext : $!\n"; # Proceeds chain by CHAIN : ## begin CHAIN ## if ($chain_id eq "W") { while ($atoms_ref->[$index]->chain() eq $chain_id) { printf (FILOUT "%s", $atoms_ref->[$index]->print_pdb_tip3()); $index++; # To prevent the program to run beyond the length of the atoms array. if (!defined $atoms_ref->[$index]) { last; } } } elsif ($chain_id eq "Z") { while ($atoms_ref->[$index]->chain() eq $chain_id) { printf (FILOUT "%s", $atoms_ref->[$index]->print_pdb2charmm()); $index++; # To prevent the program to run beyond the length of the atoms array. if (!defined $atoms_ref->[$index]) { last; } } } else { while ((($atoms_ref->[$index]->chain() eq $chain_id) && ($type==1)) || (($atoms_ref->[$index]->segment() eq $seg_id) && ($type==2))) { if (($atoms_ref->[$index]->residue_name() =~ /TIP3/) && ($type==2)) { printf (FILOUT "%s", $atoms_ref->[$index]->print_pdb_tip3()); } else { printf (FILOUT "%s", $atoms_ref->[$index]->print_pdb2charmm()); } $index++; # To prevent the program to run beyond the length of the atoms array. if (!defined $atoms_ref->[$index]) { last; } } ## end CHAIN ## # Prints a TER line with the attributes of the last Atom at the end of a chain only if it is an amino acid. if (Leto::GrpAtoms->is_aa($atoms_ref->[$index-1]->residue_name())) { printf (FILOUT "%s", $atoms_ref->[$index-1]->print_pdb_ter()); } } printf (FILOUT "END\n"); close (FILOUT); # Creation of the array containing all the PDB file names created. push (@pdb_files, lc("$file_name")); } # Returns the PDB files array. return (\@pdb_files); } # Split the chains from a PDB file and writes as much PDB files as the number of chains. sub split_pdb { my $proto = shift; return (0) if (ref($proto)); my ($atoms_ref,$filin) = @_; # Determination of the PDB file name. my ($fname,$ext) = split ('\.',$filin); my $index = 0; my @pdb_files; # Runs along the atoms array. while ($index <= $#{$atoms_ref}) { $chain_id = $atoms_ref->[$index]->chain(); # Generation of the output PDB file names. my $file_name = lc("$fname\_$chain_id.$ext"); open (FILOUT, "> scr/$file_name") || die "Unable to create the file scr/$file_name : $!\n"; # Proceeds chain by chain : ## begin CHAIN ## while ($atoms_ref->[$index]->chain() eq $chain_id) { printf (FILOUT "%s", $atoms_ref->[$index]->print_pdb_atom()); $index++; # To prevent the program to run beyond the length of the atoms array. if (!defined $atoms_ref->[$index]) { last; } } ## end CHAIN ## # Prints a TER line with the attributes of the last Atom at the end of a chain except for Water. printf (FILOUT "%s", $atoms_ref->[$index-1]->print_pdb_ter()) unless ($atoms_ref->[$index-1]->chain() eq "W"); printf (FILOUT "END\n"); close (FILOUT); # Creation of the array containing all the PDB file names created. push (@pdb_files, lc("$fname\_$chain_id")); } # Returns the PDB files array. return (\@pdb_files); } # Creates an input file build.inp from template. sub print_build { my $proto = shift; return (0) if (ref($proto)); my ($atoms_ref,$pdb_ref,$path,$Verbose,$nb_ch_sys,$nb_het_sys,$water,$type,$build,$pdb_name,$top,$par,$term_ref,$patch_ref) = @_; open (BUILDTMP, "< tmpsep/build.tmp") || die "Unable to open the file tmpsep/build.tmp : $!\n"; @build = ; close (BUILDTMP); if ($build eq "base") { open (BUILDINP, "> inp/$pdb_name\_complex.build.inp") || die "Unable to create the file inp/$pdb_name\_complex.build.inp : $!\n"; } elsif ($build eq "his") { open (BUILDINP, "> inp/$pdb_name\_complex.build_psf_crd.inp") || die "Unable to create the file inp/$pdb_name\_complex.build_psf_crd.inp : $!\n"; } my $index = 0; my $chain_id = ""; # Identifier of the last chain processed (in the order it appears in the PDB file). my $seg_id=""; # Identifier of the last segid processed (in the order it appears in the PDB file). # Runs along the template file. foreach $b (0..$#build) { # Identification of the keyword $SET$ in the template file. if ($build[$b] =~ /^\$SET\$/) { warn "\tSetting up the input file ...\n" if ($Verbose == 1); printf (BUILDINP "SET path %s ! Path of the working directory\n", $path); foreach $i(0..$nb_ch_sys-1) { printf (BUILDINP "SET %i %s ! Chain %i\n", $i+1,$pdb_ref->[$i],$i+1); $ref=$i; } if ($nb_het_sys !=0) { $ref++; printf (BUILDINP "SET %i %s ! Chain of heteroatom\n", $ref+1,$pdb_ref->[$ref]); $ref=$ref; } if ($water!=0) { $ref++; printf (BUILDINP "SET %i %s ! Water molecules\n", $ref+1,$pdb_ref->[$ref]); } if ($build eq "his") { printf (BUILDINP "SET ext .%s ! To know if it's new psf and crd in which protonation state of his is good\n", $build); } elsif ($build eq "base") { printf (BUILDINP "SET ext ! To know if it's new psf and crd in which protonation state of his is good\n"); } printf (BUILDINP "SET name %s ! To know the name of files\n",$pdb_name); } elsif ($build[$b]=~ /^\$LIB\$/) { printf(BUILDINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD rtf unit 2 card\nCLOSE unit 2\n",$top); printf(BUILDINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD param unit 2 card\nCLOSE unit 2\n",$par); } # Identification of the keyword $CHAIN$ in the template file elsif ($build[$b]=~ /^\$CHAIN\$/) { foreach $i(0..$#$pdb_ref-1) { # makes a loop on each chain (heteroatom, water). if ($pdb_ref->[$i]=~/.*_w/) { # for the water chain printf (BUILDINP "\n! Initial pdb sequence\n"); printf (BUILDINP "OPEN unit 1 READ formatted name \@path/scr/@%i.pdb\@ext\n",$i+1); printf (BUILDINP "READ sequ pdb unit 1\n"); printf (BUILDINP "CLOSE unit 1\n"); printf (BUILDINP "\nGENERATE W noangle nodihedral\n"); printf (BUILDINP "\n! Initial coordinates\n"); printf (BUILDINP "OPEN unit 1 READ formatted name \@path/scr/@%i.pdb\@ext\n",$i+1); printf (BUILDINP "READ coor pdb unit 1 append\n"); printf (BUILDINP "CLOSE unit 1\n"); } else { # Allows to write in the input the good keyword for generate psf with patch for disulfide bond # Runs along the atoms array. if ($type==1) { # if the file is a true pdb, the script uses the chain identifier. printf (BUILDINP "\n! Initial pdb sequence\n"); printf (BUILDINP "OPEN unit 1 READ formatted name \@path/scr/@%i.pdb\@ext\n",$i+1); printf (BUILDINP "READ sequ pdb unit 1\n"); printf (BUILDINP "CLOSE unit 1\n"); $termok=0; # initialization of a variable to know if the chain has special terminaison while ($index <= $#{$atoms_ref}) { # The current chain hasn't been treated yet. if (($atoms_ref->[$index]->chain() ne $chain_id) && ($atoms_ref->[$index]->chain() !~ /W/)) { # The current chain identifier is recorded to indicate it has been treated. $chain_id = $atoms_ref->[$index]->chain(); # Processing the special terminaison foreach $j(0..$#{$term_ref}) { if (($atoms_ref->[$index]->residue_name() =~ /TIP3/) || ($atoms_ref->[$index]->residue_name() =~ /HOH/)) { printf (BUILDINP "\nGENERATE %1s noangle nodihedral\n", $chain_id); $termok=1; last; } if ($term_ref->[$j][0] eq $chain_id) { if (($term_ref->[$j][1] eq "") && ($term_ref->[$j][2] eq "")) { printf (BUILDINP "\nGENERATE %1s setup\n", $chain_id); } else { printf (BUILDINP "\nGENERATE %1s first %1s last %1s setup\n", $chain_id,$term_ref->[$j][1],$term_ref->[$j][2]); } $termok=1; last; } } if ($termok == 0) { # this is for the default case printf (BUILDINP "\nGENERATE %1s setup\n", $chain_id); } # Processing special patch (for example GLUP, ASPP ...) foreach $k(1..$#{$patch_ref}) { if ($patch_ref->[$k][0] =~/\b$chain_id\b/) { printf (BUILDINP "\nPATCH %1s %1s %1s setup\n", $patch_ref->[$k][1],$chain_id,$patch_ref->[$k][2]); } } } # Processing the disulfide bonds. if ((defined $atoms_ref->[$index]->ssbond_res()) && ($atoms_ref->[$index]->chain() eq $chain_id)) { warn "\tPatching the cysteins involved in disulfide bonds in chain $chain_id ...\n" if ($Verbose ==1); printf (BUILDINP "PATCH DISU %1s %3d %1s %3d\n", $atoms_ref->[$index]->chain(), $atoms_ref->[$index]->residue_number(), $atoms_ref->[$index]->ssbond_chain(), $atoms_ref->[$index]->ssbond_res()); } $index++; # All the chains (but Water) have been treated. if ($index > $#{$atoms_ref} || $atoms_ref->[$index]->chain() ne $chain_id) { last; } } } if ($type==2) { # if the file is from charmm or modeller, the script uses the segid identifier. printf (BUILDINP "\n! Initial pdb sequence\n"); printf (BUILDINP "OPEN unit 1 READ formatted name \@path/scr/@%i.pdb\@ext\n",$i+1); printf (BUILDINP "READ sequ pdb unit 1\n"); printf (BUILDINP "CLOSE unit 1\n"); $termok=0; # initialization of a variable to know if the chain has special terminaison while ($index <= $#{$atoms_ref}) { # The current chain hasn't been treated yet. if (($atoms_ref->[$index]->segment() ne $seg_id) && ($atoms_ref->[$index]->segment() !~ /W/)) { # The current chain identifier is recorded to indicate it has been treated. $seg_id = $atoms_ref->[$index]->segment(); if ($atoms_ref->[$index]->residue_name() =~ /TIP3/) { printf (BUILDINP "\nGENERATE %1s noangle nodihedral\n", $seg_id); $termok=1; last; } # Processing the special terminaison foreach $j(1..$#{$term_ref}) { if ($term_ref->[$j][0] eq $seg_id) { printf (BUILDINP "\nGENERATE %1s first %1s last %1s setup\n", $seg_id,$term_ref->[$j][1],$term_ref->[$j][2]); $termok=1; last; } } if ($termok == 0) { # this is for hte default case printf (BUILDINP "\nGENERATE %1s setup\n", $seg_id); } # Processing special patch (for example GLUP, ASPP ...) foreach $k(1..$#{$patch_ref}) { if ($patch_ref->[$k][0] eq $seg_id) { printf (BUILDINP "\nPATCH %1s %1s %1s setup\n", $patch_ref->[$k][1],$seg_id,$patch_ref->[$k][2]); } } } # Processing the disulfide bonds. if ((defined $atoms_ref->[$index]->ssbond_res()) && ($atoms_ref->[$index]->segment() eq $seg_id)) { warn "\tPatching the cysteins involved in disulfide bonds in chain $seg_id ...\n" if ($Verbose ==1); printf (BUILDINP "PATCH DISU %4s %3d %4s %3d\n", $atoms_ref->[$index]->segment(), $atoms_ref->[$index]->residue_number(), $atoms_ref->[$index]->ssbond_chain(), $atoms_ref->[$index]->ssbond_res()); } $index++; # All the chains (but Water) have been treated. if ($index > $#{$atoms_ref} || $atoms_ref->[$index]->segment() ne $seg_id) { last; } } } printf (BUILDINP "\n! Initial coordinates\n"); printf (BUILDINP "OPEN unit 1 READ formatted name \@path/scr/@%i.pdb\@ext\n",$i+1); printf (BUILDINP "READ coor pdb unit 1 append\n"); printf (BUILDINP "CLOSE unit 1\n"); } } } # All other lines without keywords. else { printf (BUILDINP "%s", $build[$b]); } } close (BUILDINP); # Returns the atoms and PDB files arrays. return ($atoms_ref,$pdb_ref,$pdb_name); } # Creates an input file mini.inp from template. sub print_mini { my $proto = shift; return (0) if (ref($proto)); my ($ext,$path,$Verbose,$nb_ch_sys,$nb_het_sys,$pdb_name,$top,$par) = @_; if ($ext eq "small") { open (MINITMP, "< tmp/small_mini.tmp") || die "Unable to open the file tmp/mini.tmp : $!\n"; @mini = ; close (MINITMP); } elsif ($ext eq "his") { open (MINITMP, "< tmp/mini.tmp") || die "Unable to open the file tmp/mini.tmp : $!\n"; @mini = ; close (MINITMP); } open (MININP, "> inp/$pdb_name\_complex.mini_$ext.inp") || die "Unable to create the file inp/$pdb_name\_complex.mini_$ext.inp : $!\n"; # Runs along the template file. foreach $m (0..$#mini) { # Identification of the keyword $SET$ in the template file. if ($mini[$m] =~ /^\$SET\$/) { warn "\tSetting up the input file ...\n" if ($Verbose == 1); printf (MININP "SET path %s ! Path of the working directory\n", $path); printf (MININP "SET ext %s ! Extension identifying the minimization\n", $ext); printf (MININP "SET out %s\_complex.mini ! Output file name\n", $pdb_name); printf (MININP "SET inp %s\_complex ! Input file name\n", $pdb_name); printf (MININP "SET name %s ! Name of files\n", $pdb_name); } elsif ($mini[$m]=~ /^\$LIB\$/) { printf(MININP "OPEN READ unit 2 card name \@path/lib/%s\nREAD rtf unit 2 card\nCLOSE unit 2\n",$top); printf(MININP "OPEN READ unit 2 card name \@path/lib/%s\nREAD param unit 2 card\nCLOSE unit 2\n",$par); } # Tests if the CRD file exists with the extension used for the minimization. elsif ((-f "scr/$pdb_name\_complex.crd.$ext") && ($mini[$m] =~ ?\@path\/scr\/\@name\_complex\.crd?)) { printf (MININP "OPEN unit 1 card READ name \@path\/scr\/\@name_complex.crd.%s\n", $ext); } # Tests if the PSF file exists with the extension used for the minimization. elsif ((-f "scr/$pdb_name\_complex.psf.$ext") && ($mini[$m] =~ ?\@path\/scr\/\@name\_complex\.psf?)) { printf (MININP "OPEN unit 1 card READ name \@path\/scr\/\@name_complex.psf.%s\n", $ext); } # All other lines without keywords. else { printf (MININP "%s", $mini[$m]); } } close (MININP); } # Creates an input file crd.inp from template. sub print_crd { my $proto = shift; return (0) if (ref($proto)); my ($ext,$path,$Verbose,$nb_ch_sys,$nb_het_sys,$pdb_name,$top,$par,$fp,$sp) = @_; open (CRDTMP, "< tmp/crd.tmp") || die "Unable to open the file tmp/crd.tmp : $!\n"; @crd = ; close (CRDTMP); if ($ext eq "his") { open (CRDINP, "> inp/$pdb_name\_complex.crd-mini.inp") || die "Unable to create the file inp/$pdb_name\_complex.crd-mini.inp : $!\n"; } else { open (CRDINP, "> inp/$pdb_name\_complex.crd.inp") || die "Unable to create the file inp/$pdb_name\_complex.crd.inp : $!\n"; } # Runs along the template file. foreach $c (0..$#crd) { # Identification of the keyword $SET$ in the template file. if ($crd[$c] =~ /^\$SET\$/) { warn "\tSetting up the input file ...\n" if ($Verbose == 1); printf (CRDINP "SET path %s ! absolute path of the working directory\n", $path); if (defined $ext) { printf (CRDINP "SET crd %s\_complex.mini_%s ! name of the crd file from the previous minimization\n", $pdb_name, $ext); printf (CRDINP "SET ext -mini ! allows to know that crd was minimized\n"); } else { printf (CRDINP "SET crd %s\_complex ! name of the crd file \n", $pdb_name); printf (CRDINP "SET ext ! allows to know that crd was minimized\n"); } printf (CRDINP "SET name %s ! \n", $pdb_name); } elsif ($crd[$c]=~ /^\$LIB\$/) { printf(CRDINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD rtf unit 2 card\nCLOSE unit 2\n",$top); printf(CRDINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD param unit 2 card\nCLOSE unit 2\n",$par); } # Identification of the keyword $CHA$ in the template file. elsif ($crd[$c] =~ /^\$CHA\$/) { warn "\tSetting up the charges ...\n" if ($Verbose == 1); printf (CRDINP "! Writing the charges in a crd file\nOPEN WRITE formatted unit 27 name \@path/scr/\@name\@ext\_cx-cha.crd\nWRITE coor card sele %s .or. %s end unit 27\n* CRD with Charges in WMAIN for \@name\@ext\_complex\n*\n", $fp, $sp); printf (CRDINP "! Writing the charges in a crd file\nOPEN WRITE formatted unit 27 name \@path/scr/\@name\@ext\_fp-cha.crd\nWRITE coor card sele %s end unit 27\n* CRD with Charges in WMAIN for \@name\@ext\_fp\n*\n", $fp); printf (CRDINP "! Writing the charges in a crd file\nOPEN WRITE formatted unit 27 name \@path/scr/\@name\@ext\_sp-cha.crd\nWRITE coor card sele %s end unit 27\n* CRD with Charges in WMAIN for \@name\@ext\_sp\n*\n", $sp); } # Identification of the keyword $RAD$ in the template file. elsif ($crd[$c] =~ /^\$RAD\$/) { warn "\tSetting up the radii ...\n" if ($Verbose == 1); printf (CRDINP "! Writing the radii in a crd file\nOPEN WRITE formatted unit 27 name \@path/scr/\@name\@ext\_cx-rad.crd\nWRITE coor card sele %s .or. %s end unit 27\n* CRD with Radii in WMAIN for \@name\@ext\_complex\n*\n", $fp, $sp); printf (CRDINP "! Writing the radii in a crd file\nOPEN WRITE formatted unit 27 name \@path/scr/\@name\@ext\_fp-rad.crd\nWRITE coor card sele %s end unit 27\n* CRD with Radii in WMAIN for \@name\@ext\_fp\n*\n", $fp); printf (CRDINP "! Writing the radii in a crd file\nOPEN WRITE formatted unit 27 name \@path/scr/\@name\@ext\_sp-rad.crd\nWRITE coor card sele %s end unit 27\n* CRD with Radii in WMAIN for \@name\@ext\_sp\n*\n", $sp); } # All other lines without keywords. else { printf (CRDINP "%s", $crd[$c]); } } close (CRDINP); } # Corrects the CRD files to comply with the UHBD standard. sub crd2uhbd { my $proto = shift; return (0) if (ref($proto)); my ($atoms_ref,$last_ref,$glob,$term_ref,$patch_ref) = @_; foreach $a (0..$#{$atoms_ref}) { # Identification of the atoms forming disulfide bonds. if (defined ($atoms_ref->[$a]->ssbond_res())) { # Creates an array for each chain. # The residue numbers for each pair of atoms forming a disulfide bond are recorded 2 by 2 in the array. # Each value from the array contains the informations of both residue number and chain (for example : 17_L). push (@ssbond,join('_',$atoms_ref->[$a]->ssbond_res(),$atoms_ref->[$a]->ssbond_chain())); push (@ssbond,join('_',$atoms_ref->[$a]->residue_number(),$atoms_ref->[$a]->chain())); # For example, the following pairs do form a disulfide bond : 0&1, 2&3, 4&5, etc... } } my (@before,@after); # Arrays recording the modifications done by this function. # Modification of the files in place. # The unmodified files have the extension .his added at the end of their name. local $^I = '.his'; # Indication of the files to be modified (given in argument). local @ARGV = glob ("$glob"); $out=join("\n",@ARGV); # Processes all the files : foreach $kk(0..$#{$term_ref}) { print "chain $term_ref->[$kk][0] nter $term_ref->[$kk][3] cter $term_ref->[$kk][4]\n"; } while (<>) { # Only processes the coordinates lines which are at least 70 characters long. if (length($_) >= 70) { # if ( (substr($_,16,4) eq "C ") || (substr($_,16,4) eq "CA ") || (substr($_,16,4) eq "HA ") || (substr($_,16,4) eq "HA1 ") || (substr($_,16,4) eq "HA2 ") || (substr($_,16,4) eq "HN ") ||(substr($_,16,4) eq "CAY ") || (substr($_,16,4) eq "HY1 ") || (substr($_,16,4) eq "HY2 ") || (substr($_,16,4) eq "HY3 ") || (substr($_,16,4) eq "CY ") || (substr($_,16,4) eq "OY ") || (substr($_,16,4) eq "HT1 ") || (substr($_,16,4) eq "HT2 ") || (substr($_,16,4) eq "HT3 ") || (substr($_,16,4) eq "N ") || (substr($_,16,4) eq "O ") || (substr($_,16,4) eq "OT1 ") || (substr($_,16,4) eq "OT2 ") ||(substr($_,16,4) eq "NT ") || (substr($_,16,4) eq "HNT ") || (substr($_,16,4) eq "CAT ") || (substr($_,16,4) eq "CAT ") ) { foreach $i(0..$#{$term_ref}) { # For the acetylated residues changes the name of the residue into ACE for few atom only. $chain_nob=substr($_,51,4); $chain_nob=~s/\s//g; # Variable containing the name of the chain without blank if (($term_ref->[$i][1] eq "ACE") && ($chain_nob =~ /$term_ref->[$i][0]/) && ( (substr($_,16,4) eq "CAY ") || (substr($_,16,4) eq "HY1 ") || (substr($_,16,4) eq "HY2 ") || (substr($_,16,4) eq "HY3 ") || (substr($_,16,4) eq "CY ") || (substr($_,16,4) eq "OY ") ) ){ push (@before,(substr($_,11,9) . substr($_,51,9))); substr($_,11,4) =~ s/[A-Z]{3} /ACE /; push (@after,(substr($_,11,9) . substr($_,51,9))); } # For the ct3 residues changes the name of the residue into CT3 for few atom only. if (($term_ref->[$i][2] eq "CT3") && ($chain_nob =~ /$term_ref->[$i][0]/) && ((substr($_,16,4) eq "NT ") || (substr($_,16,4) eq "HNT ") || (substr($_,16,4) eq "CAT ") || (substr($_,16,4) eq "HT1 ") || (substr($_,16,4) eq "HT2 ") || (substr($_,16,4) eq "HT3 ") )) { push (@before,(substr($_,11,9) . substr($_,51,9))); substr($_,11,4) =~ s/[A-Z]{3} /CT3 /; push (@after,(substr($_,11,9) . substr($_,51,9))); } # If the NTER residues is a PRO changes the name of the residue into PRPP. if ((($term_ref->[$i][1] eq "NTER") || ($term_ref->[$i][3] eq "NTER")) && (substr($_,56,4) =~ /1 /) && ($chain_nob =~ /$term_ref->[$i][0]/) && (substr($_,11,4) =~ /PRO /)) { substr($_,11,4) =~ s/[A-Z]{3} /PRPP/; } # For the NTER residues changes the name of the residue into NTER for few atom only. if ((($term_ref->[$i][1] eq "NTER") || ($term_ref->[$i][3] eq "NTER")) && (substr($_,56,4) =~ /1 /) && ($chain_nob =~ /$term_ref->[$i][0]/) && ( (substr($_,16,4) eq "C ") || (substr($_,16,4) eq "CA ") || (substr($_,16,4) eq "HA ") || (substr($_,16,4) eq "HA1 ") || (substr($_,16,4) eq "HA2 ") || (substr($_,16,4) eq "HN ") || (substr($_,16,4) eq "HT1 ") || (substr($_,16,4) eq "HT2 ") || (substr($_,16,4) eq "HT3 ") || (substr($_,16,4) eq "N ") || (substr($_,16,4) eq "O "))) { #elsif ((substr($_,56,4) =~ /1 /) && (substr($_,51,4) =~ /$term_ref->[$i][0]/) && ( (substr($_,16,4) eq "C ") || (substr($_,16,4) eq "CA ") || (substr($_,16,4) eq "HA ") || (substr($_,16,4) eq "HA1 ") || (substr($_,16,4) eq "HA2 ") || (substr($_,16,4) eq "HN ") || (substr($_,16,4) eq "HT1 ") || (substr($_,16,4) eq "HT2 ") || (substr($_,16,4) eq "HT3 ") || (substr($_,16,4) eq "N ") || (substr($_,16,4) eq "O "))) { # If a GLY is in NTER changes the name of the residue into GLPP. if (substr($_,11,4) =~ /GLY /) { substr($_,11,4) =~ s/[A-Z]{3} /GLPP/; } else { substr($_,11,4) =~ s/[A-Z]{3} /NTER/; } } # If the CTER residue is a PRO changes the name of the residue into PRCT. if ((($term_ref->[$i][2] eq "CTER") || ($term_ref->[$i][4] eq "CTER")) && (substr($_,56,4) == $last_ref->{$chain_nob}) && ($chain_nob =~ /$term_ref->[$i][0]/) && (substr($_,11,4) =~ /PRO /)) { substr($_,11,4) =~ s/[A-Z]{3} /PRCT/; } # For the CTER residues changes the name of the residue into CTER for few atom only. if ((($term_ref->[$i][2] eq "CTER") || ($term_ref->[$i][4] eq "CTER")) && (substr($_,56,4) == $last_ref->{$chain_nob}) && ($chain_nob =~ /$term_ref->[$i][0]/) && ( (substr($_,16,4) eq "C ") || (substr($_,16,4) eq "CA ") || (substr($_,16,4) eq "HA ") || (substr($_,16,4) eq "HA1 ") || (substr($_,16,4) eq "HA2 ") || (substr($_,16,4) eq "HN ") || (substr($_,16,4) eq "HT1 ") || (substr($_,16,4) eq "HT2 ") || (substr($_,16,4) eq "HT3 ") || (substr($_,16,4) eq "N ") || (substr($_,16,4) eq "OT1 ") || (substr($_,16,4) eq "OT2 "))) { # If a GLY is in CTER changes the name of the residue into GLCT. if (substr($_,11,4) =~ /GLY /) { substr($_,11,4) =~ s/[A-Z]{3} /GLCT/; } else { push (@before,(substr($_,11,9) . substr($_,51,9))); substr($_,11,4) =~ s/[A-Z]{3} /CTER/; push (@after,(substr($_,11,9) . substr($_,51,9))); } } } } # Seeks the array to check if the current atom belongs to a residue forming a disulfide bond. foreach $s (0..$#ssbond) { my ($res,$chain) = split ('_',$ssbond[$s]); # The current atom refers to a residue forming a disulfide bond. if ((substr($_,56,4) == $res) && (substr($_,51,4) eq $chain)) { push (@before,substr($_,5,10)); # The residue name is modified not to be taken into account by the script UHBD. substr($_,11,4) =~ s/CYS /CSS /; push (@after,substr($_,5,10)); } } foreach $s (1..$#{$patch_ref}) { $num_res_nob=substr($_,56,4); $num_res_nob=~s/\s//; # Variable containing the number residu without blank # The current atom refers to a residue forming a disulfide bond. if (($num_res_nob =~ /$patch_ref->[$s][2]/) && ($chain_nob =~ /$patch_ref->[$s][0]/)) { push (@before,substr($_,5,10)); # The residue name is modified not to be taken into account by the script UHBD. substr($_,11,4) =~ s/GLU /GPP /; substr($_,11,4) =~ s/ASP /APP /; push (@after,substr($_,5,10)); } } # } # Prints the lines form the file, whether they have been modified or not. print; } return (\@before,\@after); } # this fonction allows to substitute residue name in current line (see followed fonction to understand) sub substi_aa { my $proto = shift; return (0) if (ref($proto)); my ($aa,$aa_modif) = @_; # normal name of AA and name use for atom of side chain in site22 my (@before,@after); # Arrays recording the modifications done by this function. if (substr($_,11,3) =~ /$aa/) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,3) =~ s/[A-Z]{3}/$aa_modif/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } return (\@before,\@after,$_); } # this fonction allows to substitute residue name for all residue using the previous fonction # it uses in case of a UHBD calculation with one titrable site by residue, allow to change the name # of side chain atom in extemities of each chain sub substitution { my $proto = shift; return (0) if (ref($proto)); my ($aa) = @_; if ($aa =~ /ASP /) { Leto::GrpAtoms->substi_aa(ASP,ASZ); } elsif (substr($_,11,4) =~ /ASN /) { Leto::GrpAtoms->substi_aa(ASN,AZZ); } elsif (substr($_,11,4) =~ /ILE /) { Leto::GrpAtoms->substi_aa(ILE,ILZ); } elsif (substr($_,11,4) =~ /ALA /) { Leto::GrpAtoms->substi_aa(ALA,ALZ); } elsif (substr($_,11,4) =~ /NVA /) { Leto::GrpAtoms->substi_aa(NVA,NVZ); } elsif (substr($_,11,4) =~ /VAL /) { Leto::GrpAtoms->substi_aa(VAL,VAZ); } elsif (substr($_,11,4) =~ /LEU /) { Leto::GrpAtoms->substi_aa(LEU,LEZ); } elsif (substr($_,11,4) =~ /SER /) { Leto::GrpAtoms->substi_aa(SER,SEZ); } elsif (substr($_,11,4) =~ /THR /) { Leto::GrpAtoms->substi_aa(THR,THZ); } elsif (substr($_,11,4) =~ /CYS /) { Leto::GrpAtoms->substi_aa(CYS,CZZ); } elsif (substr($_,11,4) =~ /CSS /) { Leto::GrpAtoms->substi_aa(CSS,CSZ); } elsif (substr($_,11,4) =~ /MET /) { Leto::GrpAtoms->substi_aa(MET,MEZ); } elsif (substr($_,11,4) =~ /APP /) { Leto::GrpAtoms->substi_aa(APP,APZ); } elsif (substr($_,11,4) =~ /GLU /) { Leto::GrpAtoms->substi_aa(GLU,GLZ); } elsif (substr($_,11,4) =~ /GPP /) { Leto::GrpAtoms->substi_aa(GPP,GPZ); } elsif (substr($_,11,4) =~ /GLN /) { Leto::GrpAtoms->substi_aa(GLN,GZZ); } elsif (substr($_,11,4) =~ /LYS /) { Leto::GrpAtoms->substi_aa(LYS,LYZ); } elsif (substr($_,11,4) =~ /ARG /) { Leto::GrpAtoms->substi_aa(ARG,ARZ); } elsif (substr($_,11,4) =~ /HSD /) { Leto::GrpAtoms->substi_aa(HSD,HDZ); } elsif (substr($_,11,4) =~ /HSE /) { Leto::GrpAtoms->substi_aa(HSE,HEZ); } elsif (substr($_,11,4) =~ /HSP /) { Leto::GrpAtoms->substi_aa(HSP,HPZ); } elsif (substr($_,11,4) =~ /HSX /) { Leto::GrpAtoms->substi_aa(HSX,HXZ); } elsif (substr($_,11,4) =~ /PHE /) { Leto::GrpAtoms->substi_aa(PHE,PHZ); } elsif (substr($_,11,4) =~ /TYR /) { Leto::GrpAtoms->substi_aa(TYR,TYZ); } elsif (substr($_,11,4) =~ /TRP /) { Leto::GrpAtoms->substi_aa(TRP,TRZ); } return($_); } # Puts spaces at the good place so that when the program seeks for example the 1 residu id, it doesn't take all the # residus whose residu id contains one 1. sub space { my $proto = shift; return (0) if (ref($proto)); my ($extr) = @_; $length=length($extr); if ($length == 1) { $extr=" $extr"; } elsif ($length == 2) { $extr=" $extr"; } elsif ($length == 3) { $extr=" $extr"; } elsif ($length == 4) { $extr=" $extr"; } return ($extr); } # Corrects the CRD files to comply with the UHBD when the option Int is used. sub crd2uhbd_interactif { my $proto = shift; return (0) if (ref($proto)); my ($glob,$nb_cys,$nb_nter,$nb_cter,$nb_ntnr,$nb_ctnr,$nb_ace,$nb_ct3,$nb_ct2,$nb_app,$nb_gpp,$nb_sp1,$nb_sp2,$nb_citer,$nb_tter,$nb_deo) = @_; my (@before,@after); # Arrays recording the modifications done by this function. # Modification of the files in place. # The unmodified files have the extension .his added at the end of their name. local $^I = '.his'; # Indication of the files to be modified (given in argument). local @ARGV = glob ("$glob"); $out=join("\n",@ARGV); # Put the residue id of the cystein involved in SSbond in a array. $nb_cys=~s/ //g; @cys=split(',',$nb_cys); foreach $i(0..$#cys) { # Puts spaces at the good place so that when the program seeks for example the 1 residu id, it doesn't take all the # residus whose residu id contains one 1. $cys[$i]=Leto::GrpAtoms->space($cys[$i]); } # Put the residue id of the residues in Nter in a array. $nb_nter=~s/ //g; @nter=split(',',$nb_nter); foreach $i(0..$#nter) { # Puts spaces for the same reason as explained for the cys. $nter[$i]=Leto::GrpAtoms->space($nter[$i]); } # Put the residue id of the residues in Cter in a array. $nb_cter=~s/ //g; @cter=split(',',$nb_cter); foreach $i(0..$#cter) { # Puts spaces for the same reason as explained for the cys. $cter[$i]=Leto::GrpAtoms->space($cter[$i]); } # Put the residue id of the residues in Ntnr in a array. $nb_ntnr=~s/ //g; @ntnr=split(',',$nb_ntnr); foreach $i(0..$#ntnr) { # Puts spaces for the same reason as explained for the cys. $ntnr[$i]=Leto::GrpAtoms->space($ntnr[$i]); } # Put the residue id of the residues in Ctnr in a array. $nb_ctnr=~s/ //g; @ctnr=split(',',$nb_ctnr); foreach $i(0..$#ctnr) { # Puts spaces for the same reason as explained for the cys. $ctnr[$i]=Leto::GrpAtoms->space($ctnr[$i]); } # Put the residue id of the acetylated residues in a array. $nb_ace=~s/ //g; @ace=split(',',$nb_ace); foreach $i(0..$#ace) { # Puts spaces for the same reason as explained for the cys. $ace[$i]=Leto::GrpAtoms->space($ace[$i]); } # Put the residue id of the ct3 residues in a array. $nb_ct3=~s/ //g; @ct3=split(',',$nb_ct3); foreach $i(0..$#ct3) { # Puts spaces for the same reason as explained for the cys. $ct3[$i]=Leto::GrpAtoms->space($ct3[$i]); } # Put the residue id of the ct2 residues in a array. $nb_ct2=~s/ //g; @ct2=split(',',$nb_ct2); foreach $i(0..$#ct2) { # Puts spaces for the same reason as explained for the cys. $ct2[$i]=Leto::GrpAtoms->space($ct2[$i]); } # Put the residue id of the app residues in a array. $nb_app=~s/ //g; @app=split(',',$nb_app); foreach $i(0..$#app) { # Puts spaces for the same reason as explained for the cys. $app[$i]=Leto::GrpAtoms->space($app[$i]); } # Put the residue id of the gpp residues in a array. $nb_gpp=~s/ //g; @gpp=split(',',$nb_gpp); foreach $i(0..$#gpp) { # Puts spaces for the same reason as explained for the cys. $gpp[$i]=Leto::GrpAtoms->space($gpp[$i]); } # Put the residue id of the sp1 residues in a array. $nb_sp1=~s/ //g; @sp1=split(',',$nb_sp1); foreach $i(0..$#sp1) { # Puts spaces for the same reason as explained for the cys. $sp1[$i]=Leto::GrpAtoms->space($sp1[$i]); } # Put the residue id of the sp1 residues in a array. $nb_sp2=~s/ //g; @sp2=split(',',$nb_sp2); foreach $i(0..$#sp2) { # Puts spaces for the same reason as explained for the cys. $sp2[$i]=Leto::GrpAtoms->space($sp2[$i]); } # Put the nucleic acid id of the nucleic acid in 5ter in a array. $nb_citer=~s/ //g; @citer=split(',',$nb_citer); foreach $i(0..$#citer) { # Puts spaces for the same reason as explained for the cys. $citer[$i]=Leto::GrpAtoms->space($citer[$i]); } # Put the nucleic acid id of the nucleic acid in 3ter in a array. $nb_tter=~s/ //g; @tter=split(',',$nb_tter); foreach $i(0..$#tter) { # Puts spaces for the same reason as explained for the cys. $tter[$i]=Leto::GrpAtoms->space($tter[$i]); } # Put the nucleic acid id of the deoxyribose nucleic acid in a array. $nb_deo=~s/ //g; @deo=split(',',$nb_deo); foreach $i(0..$#deo) { # Puts spaces for the same reason as explained for the cys. $deo[$i]=Leto::GrpAtoms->space($deo[$i]); } # Processes all the files : while (<>) { # Only processes the coordinates lines which are at least 70 characters long. if (length($_) >= 70) { if (length($_) > 75) { # If a line of crd has more than 75 characters then it a format crd xxl. $x=2; # If charmm c31a2 and a large system are used the crd format is different all is multiplied by two the factor $x makes it possible to go to seek at the good place of the line. } else { $x=1; # For the normal crd. } # Parting DMPC in 3 if ( (substr($_,11,4) =~ /DPC /) && ( (substr($_,16,4) eq "N ") || (substr($_,16,4) eq "C13 ") || (substr($_,16,4) eq "H13A") || (substr($_,16,4) eq "H13B") || (substr($_,16,4) eq "H13C") || (substr($_,16,4) eq "C14 ") || (substr($_,16,4) eq "H14A") || (substr($_,16,4) eq "H14B") || (substr($_,16,4) eq "H14C") || (substr($_,16,4) eq "C15 ") || (substr($_,16,4) eq "H15A") || (substr($_,16,4) eq "H15B") || (substr($_,16,4) eq "H15C") || (substr($_,16,4) eq "C12 ") || (substr($_,16,4) eq "H12A") || (substr($_,16,4) eq "H12B") ) ) { substr($_,11*$x,4) =~ s/[A-Z]{3} /D1C /; } if ( (substr($_,11,4) =~ /DPC /) && ( (substr($_,16,4) eq "C11 ") || (substr($_,16,4) eq "H11A") || (substr($_,16,4) eq "H11B") || (substr($_,16,4) eq "P ") || (substr($_,16,4) eq "O11 ") || (substr($_,16,4) eq "O12 ") || (substr($_,16,4) eq "O13 ") || (substr($_,16,4) eq "O14 ") || (substr($_,16,4) eq "C1 ") || (substr($_,16,4) eq "HA ") || (substr($_,16,4) eq "HB ") ) ) { substr($_,11*$x,4) =~ s/[A-Z]{3} /D2C /; } if ( (substr($_,11,4) =~ /DPC /) ) { substr($_,11*$x,4) =~ s/[A-Z]{3} /D3C /; } # Parting DMPS in 3 if ( (substr($_,11,4) =~ /DPS /) && ( (substr($_,16,4) eq "N ") || (substr($_,16,4) eq "HT1 ") || (substr($_,16,4) eq "HT2 ") || (substr($_,16,4) eq "HT3 ") || (substr($_,16,4) eq "C ") || (substr($_,16,4) eq "OT1 ") || (substr($_,16,4) eq "OT2 ") || (substr($_,16,4) eq "C12 ") || (substr($_,16,4) eq "HCA ") ) ) { substr($_,11*$x,4) =~ s/[A-Z]{3} /D1S /; } if ( (substr($_,11,4) =~ /DPS /) && ( (substr($_,16,4) eq "C11 ") || (substr($_,16,4) eq "H11A") || (substr($_,16,4) eq "H11B") || (substr($_,16,4) eq "P ") || (substr($_,16,4) eq "O11 ") || (substr($_,16,4) eq "O12 ") || (substr($_,16,4) eq "O13 ") || (substr($_,16,4) eq "O14 ") || (substr($_,16,4) eq "C1 ") || (substr($_,16,4) eq "HA ") || (substr($_,16,4) eq "HB ") ) ) { substr($_,11*$x,4) =~ s/[A-Z]{3} /D2S /; } if ( (substr($_,11,4) =~ /DPS /) ) { substr($_,11*$x,4) =~ s/[A-Z]{3} /D3S /; } # Parting DMPS in 3 if ( (substr($_,11,4) =~ /DPG /) && ( (substr($_,16,4) eq "C13 ") || (substr($_,16,4) eq "H13A") || (substr($_,16,4) eq "H13B") || (substr($_,16,4) eq "OG3 ") || (substr($_,16,4) eq "HG3 ") || (substr($_,16,4) eq "C12 ") || (substr($_,16,4) eq "H12A") || (substr($_,16,4) eq "OG2 ") || (substr($_,16,4) eq "HG2 ") ) ) { substr($_,11*$x,4) =~ s/[A-Z]{3} /D1G /; } if ( (substr($_,11,4) =~ /DPG /) && ( (substr($_,16,4) eq "C11 ") || (substr($_,16,4) eq "H11A") || (substr($_,16,4) eq "H11B") || (substr($_,16,4) eq "P ") || (substr($_,16,4) eq "O11 ") || (substr($_,16,4) eq "O12 ") || (substr($_,16,4) eq "O13 ") || (substr($_,16,4) eq "O14 ") || (substr($_,16,4) eq "C1 ") || (substr($_,16,4) eq "HA ") || (substr($_,16,4) eq "HB ") ) ) { substr($_,11*$x,4) =~ s/[A-Z]{3} /D2G /; } if ( (substr($_,11,4) =~ /DPG /) ) { substr($_,11*$x,4) =~ s/[A-Z]{3} /D3G /; } # For the app residues (ASP protonnated) changes the name of the residue into APP. foreach $i (0..$#app) { if (substr($_,5*$x,5) =~ /$app[$i]/) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /APP /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } } # For the gpp residues (GLU protonnated) changes the name of the residue into GPP. foreach $i (0..$#gpp) { if (substr($_,5*$x,5) =~ /$gpp[$i]/) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /GPP /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } } # Seeks the array to check if the current atom belongs to a residue forming a disulfide bond. foreach $i (0..$#cys) { # The current atom refers to a residue forming a disulfide bond. if (substr($_,5*$x,5) == $cys[$i]) { push (@before,substr($_,5*$x,10)); # The residue name is modified not to be taken into account by the script UHBD. substr($_,11*$x,4) =~ s/CYS /CSS /; push (@after,substr($_,5*$x,10)); } } # Spots the residues NTER who have been write by the user. foreach $i (0..$#nter) { if (substr($_,5*$x,5) =~ /$nter[$i]/) { # If a GLY is in NTER changes the name of the residue into GLPP. if (substr($_,11,4) =~ /GLY /) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /GLPP/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } # If a PRO is in NTER changes the name of the residue into PRPP. elsif (substr($_,11,4) =~ /PRO /) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /PRPP/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } # For the others residues in NTER changes the name of the residue into NTER for the atom of the backbone only. # and change the name of side chain of each residue as this UHBD knows the residue in site22 else { if ( (substr($_,16*$x,4) eq "C ") || (substr($_,16*$x,4) eq "CA ") || (substr($_,16*$x,4) eq "HA ") || (substr($_,16*$x,4) eq "HN ") || (substr($_,16*$x,4) eq "HT1 ") || (substr($_,16*$x,4) eq "HT2 ") || (substr($_,16*$x,4) eq "HT3 ") || (substr($_,16*$x,4) eq "N ") || (substr($_,16*$x,4) eq "O ") || (substr($_,16*$x,4) eq "OT1 ") || (substr($_,16*$x,4) eq "OT2 ") ) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /NTER/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } else { # $_ = Leto::GrpAtoms->substitution(substr($_,11,4)); } } } } # Spots the residues NTNR (neutral NTER) who have been write by the user. foreach $i (0..$#ntnr) { if (substr($_,5*$x,5) =~ /$ntnr[$i]/) { # Changes the name of the residue into NTNR for the atom of the backbone only. if ( (substr($_,16*$x,4) eq "C ") || (substr($_,16*$x,4) eq "CA ") || (substr($_,16*$x,4) eq "HA ") || (substr($_,16*$x,4) eq "HN ") || (substr($_,16*$x,4) eq "HT1 ") || (substr($_,16*$x,4) eq "HT2 ") || (substr($_,16*$x,4) eq "HT3 ") || (substr($_,16*$x,4) eq "N ") || (substr($_,16*$x,4) eq "O ") || (substr($_,16*$x,4) eq "OT1 ") || (substr($_,16*$x,4) eq "OT2 ") ) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /NTNR/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } # and change the name of side chain of each residue as this UHBD knows the residue in site22 else { $_ = Leto::GrpAtoms->substitution(substr($_,11,4)); } } } # For the acetylated residues changes the name of the residue into ACE for few atom only. if ( (substr($_,16*$x,4) eq "CAY ") || (substr($_,16*$x,4) eq "HY1 ") || (substr($_,16*$x,4) eq "HY2 ") || (substr($_,16*$x,4) eq "HY3 ") || (substr($_,16*$x,4) eq "CY ") || (substr($_,16*$x,4) eq "OY ") ) { # Spots the residus ACE who have been write by the user foreach $i (0..$#ace) { if (substr($_,5*$x,5) =~ /$ace[$i]/) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /ACE /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } } } # Spots the residues CTER who have been write by the user. foreach $i (0..$#cter) { if (substr($_,5*$x,5) =~ /$cter[$i]/) { # If a GLY is in CTER changes the name of the residue into GLCT. if (substr($_,11,4) =~ /GLY /) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /GLCT/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } # If a PRO is in CTER changes the name of the residue into PRCT. if (substr($_,11,4) =~ /PRO /) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /PRCT/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } # For the others residues in CTER else { #changes the name of the residue into CTER for the atom of the backbone only. if ( (substr($_,16*$x,4) eq "C ") || (substr($_,16*$x,4) eq "CA ") || (substr($_,16*$x,4) eq "HA ") || (substr($_,16*$x,4) eq "HN ") || (substr($_,16*$x,4) eq "HT1 ") || (substr($_,16*$x,4) eq "HT2 ") || (substr($_,16*$x,4) eq "HT3 ") || (substr($_,16*$x,4) eq "N ") || (substr($_,16*$x,4) eq "O ") || (substr($_,16*$x,4) eq "OT1 ") || (substr($_,16*$x,4) eq "OT2 ") ) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /CTER/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } # and change the name of side chain of each residue as this UHBD knows the residue in site22 else { # $_ = Leto::GrpAtoms->substitution(substr($_,11,4)); } } } } # For the residues in CTNR, changes the name of the residue into CTNR for the atom of the backbone only. foreach $i (0..$#ctnr) { if ((substr($_,5*$x,5) =~ /$ctnr[$i]/) && ((substr($_,16*$x,4) eq "C ") || (substr($_,16*$x,4) eq "CA ") || (substr($_,16*$x,4) eq "HA ") || (substr($_,16*$x,4) eq "HN ") || (substr($_,16*$x,4) eq "HT1 ") || (substr($_,16*$x,4) eq "HT2 ") || (substr($_,16*$x,4) eq "HT3 ") || (substr($_,16*$x,4) eq "N ") || (substr($_,16*$x,4) eq "O ") || (substr($_,16*$x,4) eq "OT1 ") || (substr($_,16*$x,4) eq "OT2 ") ) ){ push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /CTNR/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } # and change the name of side chain of each residue as this UHBD knows the residue in site22 else { $_ = Leto::GrpAtoms->substitution(substr($_,11,4)); } } # For the ct3 residues changes the name of the residue into CT3 for few atom only. if ( (substr($_,16*$x,4) eq "NT ") || (substr($_,16*$x,4) eq "HNT ") || (substr($_,16*$x,4) eq "CAT ") || (substr($_,16*$x,4) eq "HT1 ") || (substr($_,16*$x,4) eq "HT2 ") || (substr($_,16*$x,4) eq "HT3 ") ) { # Spots the residus ACE who have been write by the user foreach $i (0..$#ct3) { if (substr($_,5*$x,5) =~ /$ct3[$i]/) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /CT3 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } } } # For the ct2 residues changes the name of the residue into CT2 for few atom only. if ( (substr($_,16*$x,4) eq "NT ") || (substr($_,16*$x,4) eq "HT1 ") || (substr($_,16*$x,4) eq "HT2 ") ) { # Spots the residus CT2 who have been write by the user foreach $i (0..$#ct2) { if (substr($_,5*$x,5) =~ /$ct2[$i]/) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /CT2 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } } } # For the residues in SP1, changes the name of the residue into SP1 for the atom of the side chain only. # BE CAREFULL doesn't work for the moment because it lacks a site with just the BB of SER # To use add a fonction for change the name of BB of SER in the name of a site in site22 that contains only the BB atoms of SER foreach $i (0..$#sp1) { if ((substr($_,5*$x,5) =~ /$sp1[$i]/) && ((substr($_,16*$x,4) eq "CB ") || (substr($_,16*$x,4) eq "HB1 ") || (substr($_,16*$x,4) eq "HB2 ") || (substr($_,16*$x,4) eq "OG ") || (substr($_,16*$x,4) eq "P ") || (substr($_,16*$x,4) eq "O1P ") || (substr($_,16*$x,4) eq "O2P ") || (substr($_,16*$x,4) eq "OT ") || (substr($_,16*$x,4) eq "HT "))){ push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/SER /SP1 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } } # For the residues in SP2, changes the name of the residue into SP2 for the atom of the side chain only. # BE CAREFULL doesn't work for the moment because it lacks a site with just the BB of SER foreach $i (0..$#sp2) { if ((substr($_,5*$x,5) =~ /$sp2[$i]/) && ((substr($_,16*$x,4) eq "CB ") || (substr($_,16*$x,4) eq "HB1 ") || (substr($_,16*$x,4) eq "HB2 ") || (substr($_,16*$x,4) eq "OG ") || (substr($_,16*$x,4) eq "P ") || (substr($_,16*$x,4) eq "O1P ") || (substr($_,16*$x,4) eq "O2P ") || (substr($_,16*$x,4) eq "OT "))){ push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/SER /SP2 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } } # For the 5ter nucleic acid changes the name of the nucleic acid into 5TER for few atom only. if ( (substr($_,16*$x,4) eq "O5' ") || (substr($_,16*$x,4) eq "C5' ") || (substr($_,16*$x,4) eq "H5' ") || (substr($_,16*$x,4) eq "H5''") ) { # Spots the 5TER nucleic acid who have been write by the user foreach $i (0..$#citer) { if (substr($_,5*$x,5) =~ /$citer[$i]/) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /5TER/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } } } # For the 3ter nucleic acid changes the name of the nucleic acid into 3TER for few atom only. if ( (substr($_,16*$x,4) eq "C3' ") || (substr($_,16*$x,4) eq "H3' ") || (substr($_,16*$x,4) eq "O3' ") || (substr($_,16*$x,4) eq "H3T ")) { # Spots the 3TER nucleic acid who have been write by the user foreach $i (0..$#tter) { if (substr($_,5*$x,5) =~ /$tter[$i]/) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /3TER/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } } } # For the deoxyribose nucleic acid changes the name of the nucleic acid into DEO for few atom only. if ( (substr($_,16*$x,4) eq "C2' ") || (substr($_,16*$x,4) eq "H2''") || (substr($_,16*$x,4) eq "H2' ") ) { # Spots the 3TER nucleic acid who have been write by the user foreach $i (0..$#deo) { if (substr($_,5*$x,5) =~ /$deo[$i]/) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/[A-Z]{3} /DEO /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } } } # For the drug CH30 changes the name of the residue in function of the atom. if (substr($_,11,4) =~ /CH30/) { if ( (substr($_,16*$x,4) eq "C01 ") || (substr($_,16*$x,4) eq "H01 ") || (substr($_,16*$x,4) eq "C02 ") || (substr($_,16*$x,4) eq "NP3 ") || (substr($_,16*$x,4) eq "HP1 ") || (substr($_,16*$x,4) eq "HP2 ") || (substr($_,16*$x,4) eq "C04 ") || (substr($_,16*$x,4) eq "H04 ") || (substr($_,16*$x,4) eq "C23 ") || (substr($_,16*$x,4) eq "O25 ")) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/CH30/CH1 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } if ( (substr($_,16*$x,4) eq "NP0 ") || (substr($_,16*$x,4) eq "O ") || (substr($_,16*$x,4) eq "O62 ") || (substr($_,16*$x,4) eq "C05 ") || (substr($_,16*$x,4) eq "H05 ")) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/CH30/CH2 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } if ( (substr($_,16*$x,4) eq "N24 ") || (substr($_,16*$x,4) eq "H24 ") || (substr($_,16*$x,4) eq "C27 ") || (substr($_,16*$x,4) eq "H71 ") || (substr($_,16*$x,4) eq "H72 ") || (substr($_,16*$x,4) eq "C28 ") || (substr($_,16*$x,4) eq "H81 ") || (substr($_,16*$x,4) eq "H82 ") || (substr($_,16*$x,4) eq "C30 ") || (substr($_,16*$x,4) eq "OM1 ") || (substr($_,16*$x,4) eq "O32 ")) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/CH30/CH3 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } if ( (substr($_,16*$x,4) eq "C12 ") || (substr($_,16*$x,4) eq "C13 ") || (substr($_,16*$x,4) eq "H13 ") || (substr($_,16*$x,4) eq "C14 ") || (substr($_,16*$x,4) eq "H14 ") || (substr($_,16*$x,4) eq "C15 ") || (substr($_,16*$x,4) eq "H15 ") || (substr($_,16*$x,4) eq "C16 ") || (substr($_,16*$x,4) eq "H16 ") || (substr($_,16*$x,4) eq "C17 ") || (substr($_,16*$x,4) eq "H17 ") || (substr($_,16*$x,4) eq "C33 ") || (substr($_,16*$x,4) eq "H33 ") || (substr($_,16*$x,4) eq "O45 ") || (substr($_,16*$x,4) eq "C46 ") || (substr($_,16*$x,4) eq "H61 ") || (substr($_,16*$x,4) eq "H62 ")) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/CH30/CH4 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } if ( (substr($_,16*$x,4) eq "C35 ") || (substr($_,16*$x,4) eq "H35 ")) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/CH30/CH5 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } if ( (substr($_,16*$x,4) eq "C41 ") || (substr($_,16*$x,4) eq "H41 ") || (substr($_,16*$x,4) eq "H42 ") || (substr($_,16*$x,4) eq "H43 ")) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/CH30/CH6 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } if ( (substr($_,16*$x,4) eq "C37 ") || (substr($_,16*$x,4) eq "H30 ") || (substr($_,16*$x,4) eq "H31 ") || (substr($_,16*$x,4) eq "H32 ")) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/CH30/CH7 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } if ( (substr($_,16*$x,4) eq "C20 ") || (substr($_,16*$x,4) eq "H21 ") || (substr($_,16*$x,4) eq "H22 ") || (substr($_,16*$x,4) eq "H23 ")) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/CH30/CH8 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } if ( (substr($_,16*$x,4) eq "C49 ") || (substr($_,16*$x,4) eq "C50 ") || (substr($_,16*$x,4) eq "HF1 ") || (substr($_,16*$x,4) eq "C51 ") || (substr($_,16*$x,4) eq "H51 ") || (substr($_,16*$x,4) eq "C52 ") || (substr($_,16*$x,4) eq "H52 ") || (substr($_,16*$x,4) eq "C53 ") || (substr($_,16*$x,4) eq "H53 ") || (substr($_,16*$x,4) eq "C54 ") || (substr($_,16*$x,4) eq "HF2 ")) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/CH30/CH9 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } if ( (substr($_,16*$x,4) eq "CEX ") || (substr($_,16*$x,4) eq "HEX ") || (substr($_,16*$x,4) eq "HES ")) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/CH30/CH0 /; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } } # For the component AFB or CFB changes the name of the residue in function of the atom. if ((substr($_,11,3) =~ /AFB/) ||(substr($_,11,3) =~ /CFB/)) { if ( (substr($_,16*$x,4) eq "C5 ") || (substr($_,16*$x,4) eq "C6 ") || (substr($_,16*$x,4) eq "C7 ") || (substr($_,16*$x,4) eq "C8 ") || (substr($_,16*$x,4) eq "C9 ") || (substr($_,16*$x,4) eq "H1 ") || (substr($_,16*$x,4) eq "H2 ") || (substr($_,16*$x,4) eq "H3 ") || (substr($_,16*$x,4) eq "H4 ") || (substr($_,16*$x,4) eq "H5 ") || (substr($_,16*$x,4) eq "H6 ") || (substr($_,16*$x,4) eq "H7 ") || (substr($_,16*$x,4) eq "H8 ") || (substr($_,16*$x,4) eq "OC7 ")) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,3) =~ s/AFB/AF1/; substr($_,11*$x,3) =~ s/CFB/CF1/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } if ( (substr($_,16*$x,4) eq "C1 ") || (substr($_,16*$x,4) eq "C2 ") || (substr($_,16*$x,4) eq "C3 ") || (substr($_,16*$x,4) eq "C4 ") || (substr($_,16*$x,4) eq "OC4 ") || (substr($_,16*$x,4) eq "H22 ") || (substr($_,16*$x,4) eq "H23 ") || (substr($_,16*$x,4) eq "H24 ") || (substr($_,16*$x,4) eq "O16 ") || (substr($_,16*$x,4) eq "OC1 ") || (substr($_,16*$x,4) eq "H21 ")) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,3) =~ s/AFB/AF2/; substr($_,11*$x,3) =~ s/CFB/CF2/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } if ( (substr($_,16*$x,4) eq "C10 ") || (substr($_,16*$x,4) eq "C11 ") || (substr($_,16*$x,4) eq "C12 ") || (substr($_,16*$x,4) eq "C13 ") || (substr($_,16*$x,4) eq "C14 ") || (substr($_,16*$x,4) eq "C15 ") || (substr($_,16*$x,4) eq "C16 ") || (substr($_,16*$x,4) eq "H9 ") || (substr($_,16*$x,4) eq "H10 ") || (substr($_,16*$x,4) eq "H11 ") || (substr($_,16*$x,4) eq "H12 ") || (substr($_,16*$x,4) eq "H13 ") || (substr($_,16*$x,4) eq "H14 ") || (substr($_,16*$x,4) eq "H15 ") || (substr($_,16*$x,4) eq "H16 ") || (substr($_,16*$x,4) eq "H17 ") || (substr($_,16*$x,4) eq "H18 ") || (substr($_,16*$x,4) eq "H19 ") || (substr($_,16*$x,4) eq "H20 ")) { push (@before,(substr($_,11*$x,9) . substr($_,51*$x,9))); substr($_,11*$x,4) =~ s/AFB/AF3/; substr($_,11*$x,4) =~ s/CFB/CF3/; push (@after,(substr($_,11*$x,9) . substr($_,51*$x,9))); } } } # Prints the lines form the file, whether they have been modified or not. print; } return (\@before,\@after); } # Creates a shell script run.com from template. sub print_run { my $proto = shift; return (0) if (ref($proto)); my ($Uhbd,$Verbose,$grid,$pdb_name) = @_; open (RUNTMP, "< tmpsep/run.tmp") || die "Unable to open the file tmpsep/run.tmp : $!\n"; @run = ; close (RUNTMP); open (RUN, "> inp/$pdb_name\_complex.run.com") || die "Unable to create the file inp/$pdb_name\_complex.run.com : $!\n"; # Runs along the template file. foreach $r (0..$#run) { # Identification of the keyword $SET$ in the template file. if ($run[$r] =~ /^\$SET\$/) { warn "\tSetting up the input file ...\n" if ($Verbose == 1); printf (RUN "set SCRIPTS=%s\n", $Uhbd); # Absolute path of the UHBD program. print (RUN "set GRID=1.0\n"); # Value of the grid. } # Identification of the keyword $PDB$ in the template file. elsif ($run[$r] =~ /^\$PDB\$/) { printf (RUN "foreach f ("); # Prints the names of the 2 chains and the one of the complex they form. foreach $p ("cx","fp","sp") { printf (RUN " %s\_%s ", $pdb_name , $p); } printf (RUN ")\n"); } # All other lines without keywords. else { printf (RUN "%s", $run[$r]); } } close (RUN); # Makes the file executable. chmod (0751,"inp/$pdb_name\_complex.run.com"); } # Corrects the residue name of the histidines according to their protonation state (find in a file write by the user). sub his_skip{ my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref,$pdb_name,$mini_ext) = @_; my (@his,@before,@after); # Arrays recording the modifications done by this function. open (STATE, "< dfsep/state_his.dat") || die "Unable to open the file dfsep/state_his.dat : $!\n"; my @state=; close (STATE); $hydrogen{cx}=0; foreach $i(1..$#state) { chomp($state[$i]); ($chain,$key,$value)=split(" ",$state[$i]); $length=length($chain); if ($length == 1) { $chain="$chain "; } elsif ($length == 2) { $chain="$chain "; } elsif ($length == 3) { $chain="$chain "; } foreach $part("fp","sp") { if ($mini_ext eq "small") { open (CHA, "< scr/$pdb_name-mini\_$part-cha.crd") || die "Unable to open the file scr/$pdb_name-mini\_$part-cha.crd : $!\n"; } else { open (CHA, "< scr/$pdb_name$ext\_$part-cha.crd") || die "Unable to open the file scr/$pdb_name\_$part-cha.crd : $!\n"; } my @cha=; close (CHA); foreach $line(0..$#cha) { if (((substr($cha[$line],56,5)) == $key) && ((substr($cha[$line],51,4) eq uc($chain) )) && ($value =~ /(HSD|HSE)/) && ((substr($cha[$line],16,2) eq "CA" ))) { $hydrogen{$part}++; } } } } $hydrogen{cx} = $hydrogen{fp} + $hydrogen{sp}; # Modification of the pdb files in place. # The unmodified pdb files have the extension .old added at the end of their name. local $^I = '.old'; # Indication of the files to be modified. local @ARGV = glob ("scr/*.pdb"); my $offset = 0; # Offset to properly renumber the atom, taking into account the hydrogen atoms deleted. # Processes all the files : while (<>){ # Only processes the coordinates lines which are at least 70 characters long. if (length($_) >= 70) { foreach $i(1..$#state) { chomp($state[$i]); ($chain,$key,$value)=split(/\t/,$state[$i]); $length=length($key); if ($length == 1) { $key=" $key"; } elsif ($length == 2) { $key=" $key"; } elsif ($length == 3) { $key=" $key"; } $lengthc=length($chain); if ($lengthc == 1) { $chain="$chain "; } elsif ($lengthc == 2) { $chain="$chain "; } elsif ($lengthc == 3) { $chain="$chain "; } # Spots the histidines according to their residue number and chain. if ((substr($_,22,4) == $key) && (substr($_,72,4) eq uc($chain))) { # Modifies their residue name according to their protonation state. substr($_,17,4) =~ s/(HIS|HSP)/$value/; # Spots the HE2 atom of an HSD. if ( (substr($_,17,4) eq "HSD ") && (substr($_,13,4) eq "HE2 ") ) { #push (@before,substr($_,16,4)); #push (@after,(substr($_,11,4) . substr($_,51,9))); # Deletes the atom line. $_ = ""; # Increases the offset to take into account this deletion for the next atoms. $offset++; } # Spots the HD1 atom of an HSE. elsif ( (substr($_,17,4) eq "HSE ") && (substr($_,13,4) eq "HD1 ") ) { #push (@before,substr($_,16,4)); #push (@after,(substr($_,11,4) . substr($_,51,9))); # Deletes the atom line. $_ = ""; # Increases the offset to take into account this deletion for the next atoms. $offset++; } } } # Renumbers in place each atom occurence if nedded. substr($_,6,5) = sprintf("%5d",substr($_,6,5)-$offset) unless (length($_) == 0); } # Prints the lines form the file, whether they have been modified or not. print; # Reset the offset at the end of each file for the next one. if (eof) { $offset = 0; } } # Allows to rename the pdb files # Files .pdb.his have good state for HIS @pdb = glob ("scr/*.pdb"); foreach $s(0..$#pdb) { rename ("$pdb[$s]","$pdb[$s].his"); } # Files .pdb have all HSP @pdb = glob ("scr/*.pdb.old"); foreach $s(0..$#pdb) { ($name)=($pdb[$s]=~/(.*\.pdb)\.old/); rename ("$pdb[$s]","$name"); } foreach $p ("cx","fp","sp") { # Modification of the files in place. # The unmodified files have the extension .ter temporarily added at the end of their name. local $^I = '.ter'; # Indication of the files to be modified. local @ARGV; if ($mini_ext eq "small") { @ARGV = glob ("scr/$pdb_name-mini\_$p-{cha,rad}.crd"); } else { @ARGV = glob ("scr/$pdb_name\_$p-{cha,rad}.crd"); } my $offset = 0; # Offset to properly renumber the atom, taking into account the hydrogen atoms deleted. # Processes all the files : while (<>){ # Spots the total number of atoms line. if (length($_) == 6) { # Substracts the hydrogen atoms deleted from the total number of atoms. substr($_,0,5) = sprintf("%5d",(substr($_,0,5)-$hydrogen{$p})); } # Only processes the coordinates lines which are at least 70 characters long. elsif (length($_) >= 70) { foreach $i(1..$#state) { chomp($state[$i]); ($chain,$key,$value)=split(/\t/,$state[$i]); $length=length($chain); if ($length == 1) { $chain="$chain "; } elsif ($length == 2) { $chain="$chain "; } elsif ($length == 3) { $chain="$chain "; } # Spots the histidines according to their residue number. if (substr($_,56,4) == $key) { # Modifies their residue name according to their protonation state. substr($_,11,4) =~ s/(HIS|HSP)/$value/; # Spots the HE2 atom of an HSD. if ( (substr($_,11,4) eq "HSD ") && (substr($_,16,4) eq "HE2 ") ) { push (@before,substr($_,16,4)); push (@after,(substr($_,11,4) . substr($_,51,9))); # Deletes the atom line. $_ = ""; # Increases the offset to take into account this deletion for the next atoms. $offset++; } # Spots the HD1 atom of an HSE. elsif ( (substr($_,11,4) eq "HSE ") && (substr($_,16,4) eq "HD1 ") ) { push (@before,substr($_,16,4)); push (@after,(substr($_,11,4) . substr($_,51,9))); # Deletes the atom line. $_ = ""; # Increases the offset to take into account this deletion for the next atoms. $offset++; } } } # Renumbers in place each atom occurence if nedded. substr($_,0,5) = sprintf("%5d",substr($_,0,5)-$offset) unless (length($_) == 0); } # Prints the lines form the file, whether they have been modified or not. print; # Reset the offset at the end of each file for the next one. if (eof) { $offset = 0; } } } foreach $p ("cx","fp","sp") { # Modification of the files in place. # The unmodified files have the extension .old temporarily added at the end of their name. local $^I = '.old'; # Indication of the files to be modified. local @ARGV; if ($mini_ext eq "small") { @ARGV = glob ("scr/$pdb_name-mini\_$p-{cha,rad}.crd.his"); } else { @ARGV = glob ("scr/$pdb_name\_$p-{cha,rad}.crd.his"); } $offset = 0; # Offset to properly renumber the atom, taking into account the hydrogen atoms deleted. # Processes all the files : while (<>) { # Spots the total number of atoms line. if (length($_) == 6) { # Substracts the hydrogen atoms deleted from the total number of atoms. substr($_,0,5) = sprintf("%5d",(substr($_,0,5)-$hydrogen{$p})); } # Only processes the coordinates lines which are at least 70 characters long. elsif (length($_) >= 70) { foreach $i(1..$#state) { chomp($state[$i]); ($chain,$key,$value)=split(/\t/,$state[$i]); $length=length($chain); if ($length == 1) { $chain="$chain "; } elsif ($length == 2) { $chain="$chain "; } elsif ($length == 3) { $chain="$chain "; } # Spots the histidines according to their residue number. if (substr($_,56,4) == $key) { # Modifies their residue name according to their protonation state. substr($_,11,4) =~ s/(HIS|HSP)/$value/; # Spots the HE2 atom of an HSD. if ( (substr($_,11,4) eq "HSD ") && (substr($_,16,4) eq "HE2 ") ) { # Deletes the atom line. $_ = ""; # Increases the offset to take into account this deletion for the next atoms. $offset++; } # Spots the HD1 atom of an HSE. elsif ( (substr($_,11,4) eq "HSE ") && (substr($_,16,4) eq "HD1 ") ) { # Deletes the atom line. $_ = ""; # Increases the offset to take into account this deletion for the next atoms. $offset++; } } } # Renumbers in place each atom occurence if nedded. substr($_,0,5) = sprintf("%5d",substr($_,0,5)-$offset) unless (length($_) == 0); } # Prints the lines form the file, whether they have been modified or not. print; # Reset the offset at the end of each file for the next one. if (eof) { $offset = 0; } } # Renames the CRD files to to properly comply with the standard used otherwise by the script. Done to be user friendly. # The extension at the end of the file name indicates what corrections have been done to the file. # The extension .his stands for the correction of the histidines according to their protonation state. # The extension .ter stands for the correctionof backbone atoms of the first and last residue as well as the cysteines involved in disulfide bonds to comply with the UHBD standard. if ($mini_ext eq "small") { rename ("scr/$pdb_name-mini\_$p-cha.crd","scr/$pdb_name\_$p-cha.crd.his.ter"); rename ("scr/$pdb_name-mini\_$p-rad.crd","scr/$pdb_name\_$p-rad.crd.his.ter"); rename ("scr/$pdb_name-mini\_$p-cha.crd.his.old","scr/$pdb_name\_$p-cha.crd"); rename ("scr/$pdb_name-mini\_$p-rad.crd.his.old","scr/$pdb_name\_$p-rad.crd"); } else { rename ("scr/$pdb_name\_$p-cha.crd","scr/$pdb_name\_$p-cha.crd.his.ter"); rename ("scr/$pdb_name\_$p-rad.crd","scr/$pdb_name\_$p-rad.crd.his.ter"); rename ("scr/$pdb_name\_$p-cha.crd.his.old","scr/$pdb_name\_$p-cha.crd"); rename ("scr/$pdb_name\_$p-rad.crd.his.old","scr/$pdb_name\_$p-rad.crd"); } } return (\@his,\@before,\@after); } # Corrects the residue name of the histidines according to their protonation state after uhbd calculation. sub his_uhbd { my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref,$pdb_name,$mini_ext) = @_; my (@his,@before,@after); # Arrays recording the modifications done by this function. # Runs for each of the 2 chains and the complex they form. RESULT : foreach $p ("fp","sp","cx") { # Opens the titration result file. open (PK, "< uhbd/his/$pdb_name\_$p/tmp/pk-result-UHBD-titrex.dat") || do { push(@before, "Pk $pdb_name\_$p"); push(@after, "Pk $pdb_name\_$p"); last RESULT }; my @lines = ; close (PK); my $index = 0; my @pk; # Array refering the titrated sites and their corresponding histidine residue number. # Runs along the result file. while ($index <= $#lines) { if (($lines[$index] =~ /^Site /) && ($lines[$index+1] !~ /\w+/)) { push (@before, "Chain $pdb_name\_$p"); push (@after, "Chain $pdb_name\_$p"); last RESULT; $index =$index +1; } # Spots the histidines. elsif ($lines[$index] =~ /HSP/) { # Records in an array the number of the titrated site corresponding to a single histidine and its number in the sequence. $pk[substr($lines[$index],0,4)] = substr($lines[$index],17,4); $index =$index +1; } else{ $index =$index +1; } } # Opens the protonation data file. open (XAVE, "< uhbd/his/$pdb_name\_$p/tmp/xave-UHBD-titrex.dat") || do { push(@before, "Xave $pdb_name\_$p"); push(@after, "Xave $pdb_name\_$p"); last RESULT }; @lines = ; close (XAVE); $index = 0; my @proba; # Array refering the titrated sites and their corresponding histidine protonation state. # Runs along the file. while ($index <= $#lines) { # Spots the title lines which are 8 characters long at the physiological pH of 7.4. if ( (length($lines[$index]) == 8) && (substr($lines[$index],2,5) == 7.400) ) { $index++; # Stays in the same paragraph (in other words at the same pH). while (length($lines[$index]) > 8) { # Spots the lines corresponding to the histidines which are longer than those refering to other residues. # The histines have 2 titrated sites, which means 3 combinations instead of 2 for the other residue with only one titrated site. if (length($lines[$index]) > 27) { # Determines the protonation state of each histidine according to the greatest probability between each of the 3 states. # The 3 columns refer to the HSP, HSE and HSD probabilities respectively. $proba[substr($lines[$index],4,2)] = substr($lines[$index],9,7) >= substr($lines[$index],19,7) ? ( substr($lines[$index],9,7) >= substr($lines[$index],29,7) ? "HSP" : "HSD" ) : ( substr($lines[$index],19,7) >= substr($lines[$index],29,7) ? "HSE" : "HSD" ); } $index++; } # Prevents the script to run along the file if the pH of 7.4 has already been found. last; } $index++; } foreach $k (0..$#pk) { if ($pk[$k] != undef) { $$chain{$pk[$k]} = $proba[$k]; } } } # Opens the titration result file. open (PK, "< uhbd/his/$pdb_name\_cx/tmp/pk-result-UHBD-titrex.dat") || die "Unable to open the file uhbd/his/$pdb_name\_cx/tmp/pk-result-UHBD-titrex.dat : $!\n"; my @pk; # Array refering the titrated sites and their corresponding histidine residue number. # Runs along the result file. while () { # Spots the histidines. if ($_ =~ /HSP/) { # Records in an array the number of the titrated site corresponding to a single histidine and its number in the sequence. $pk[substr($_,0,4)] = substr($_,17,4); } } close (PK); # Opens the protonation data file. open (XAVE, "< uhbd/his/$pdb_name\_cx/tmp/xave-UHBD-titrex.dat") || die "Unable to open the file uhbd/his/$pdb_name\_cx/tmp/xave-UHBD-titrex.dat : $!\n"; my @lines = ; close (XAVE); my $index = 0; my @proba; # Array refering the titrated sites and their corresponding histidine protonation state. # Runs along the file. while ($index <= $#lines) { # Spots the title lines which are 8 characters long at the physiological pH of 7.4. if ( (length($lines[$index]) == 8) && (substr($lines[$index],2,5) == 7.400) ) { $index++; # Stays in the same paragraph (in other words at the same pH). while (length($lines[$index]) > 8) { # Spots the lines corresponding to the histidines which are longer than those refering to other residues. # The histines have 2 titrated sites, which means 3 combinations instead of 2 for the other residue with only one titrated site. if (length($lines[$index]) > 27) { # Determines the protonation state of each histidine according to the greatest probability between each of the 3 states. # The 3 columns refer to the HSP, HSE and HSD probabilities respectively. $proba[substr($lines[$index],4,2)] = substr($lines[$index],9,7) >= substr($lines[$index],19,7) ? ( substr($lines[$index],9,7) >= substr($lines[$index],29,7) ? "HSP" : "HSD" ) : ( substr($lines[$index],19,7) >= substr($lines[$index],29,7) ? "HSE" : "HSD" ); } $index++; } # Prevents the script to run along the file if the pH of 7.4 has already been found. last; } $index++; } foreach $k (0..$#pk) { if ($pk[$k] != undef) { $$complex{$pk[$k]} = $proba[$k]; } } foreach $p ("fp","sp") { while (($key,$value) = each (%$p)) { my @keys = keys %$complex; my @values = values %$complex; foreach $k (0..$#keys) { if (($keys[$k] == $key) && ($values[$k] =~ /(HSD|HSE)/)) { $hydrogen{$p}++; } if (($keys[$k] == $key) && ($values[$k] ne $value)) { push (@his, $key, $p, $value, $$complex{$key}); } } } $hydrogen{$complex} += $hydrogen{$p}; } foreach $p ("fp","sp","cx") { # Modification of the files in place. # The unmodified files have the extension .ter temporarily added at the end of their name. local $^I = '.ter'; # Indication of the files to be modified. local @ARGV; if ($mini_ext eq "small") { @ARGV = glob ("scr/$pdb_name-mini\_$p-{cha,rad}.crd"); } else { @ARGV = glob ("scr/$pdb_name\_$p-{cha,rad}.crd"); } my $offset = 0; # Offset to properly renumber the atom, taking into account the hydrogen atoms deleted. # Processes all the files : while (<>){ # Spots the total number of atoms line. if (length($_) == 6) { # Substracts the hydrogen atoms deleted from the total number of atoms. substr($_,0,5) = sprintf("%5d",(substr($_,0,5)-$hydrogen{$p})); } # Only processes the coordinates lines which are at least 70 characters long. elsif (length($_) >= 70) { while (($key,$value) = each(%$complex)) { # Spots the histidines according to their residue number. if (substr($_,56,4) == $key) { # Modifies their residue name according to their protonation state. substr($_,11,4) =~ s/(HIS|HSP)/$value/; # Spots the HE2 atom of an HSD. if ( (substr($_,11,4) eq "HSD ") && (substr($_,16,4) eq "HE2 ") ) { push (@before,substr($_,16,4)); push (@after,(substr($_,11,4) . substr($_,51,9))); # Deletes the atom line. $_ = ""; # Increases the offset to take into account this deletion for the next atoms. $offset++; } # Spots the HD1 atom of an HSE. elsif ( (substr($_,11,4) eq "HSE ") && (substr($_,16,4) eq "HD1 ") ) { push (@before,substr($_,16,4)); push (@after,(substr($_,11,4) . substr($_,51,9))); # Deletes the atom line. $_ = ""; # Increases the offset to take into account this deletion for the next atoms. $offset++; } } } # Renumbers in place each atom occurence if nedded. substr($_,0,5) = sprintf("%5d",substr($_,0,5)-$offset) unless (length($_) == 0); } # Prints the lines form the file, whether they have been modified or not. print; # Reset the offset at the end of each file for the next one. if (eof) { $offset = 0; } } # Modification of the files in place. # The unmodified files have the extension .old temporarily added at the end of their name. local $^I = '.old'; # Indication of the files to be modified. local @ARGV; if ($mini_ext eq "small") { @ARGV = glob ("scr/$pdb_name-mini\_$p-{cha,rad}.crd.his"); } else { @ARGV = glob ("scr/$pdb_name\_$p-{cha,rad}.crd.his"); } $offset = 0; # Offset to properly renumber the atom, taking into account the hydrogen atoms deleted. # Processes all the files : while (<>) { # Spots the total number of atoms line. if (length($_) == 6) { # Substracts the hydrogen atoms deleted from the total number of atoms. substr($_,0,5) = sprintf("%5d",(substr($_,0,5)-$hydrogen{$p})); } # Only processes the coordinates lines which are at least 70 characters long. elsif (length($_) >= 70) { while (($key,$value) = each(%$complex)) { # Spots the histidines according to their residue number. if (substr($_,56,4) == $key) { # Modifies their residue name according to their protonation state. substr($_,11,4) =~ s/(HIS|HSP)/$value/; # Spots the HE2 atom of an HSD. if ( (substr($_,11,4) eq "HSD ") && (substr($_,16,4) eq "HE2 ") ) { # Deletes the atom line. $_ = ""; # Increases the offset to take into account this deletion for the next atoms. $offset++; } # Spots the HD1 atom of an HSE. elsif ( (substr($_,11,4) eq "HSE ") && (substr($_,16,4) eq "HD1 ") ) { # Deletes the atom line. $_ = ""; # Increases the offset to take into account this deletion for the next atoms. $offset++; } } } # Renumbers in place each atom occurence if nedded. substr($_,0,5) = sprintf("%5d",substr($_,0,5)-$offset) unless (length($_) == 0); } # Prints the lines form the file, whether they have been modified or not. print; # Reset the offset at the end of each file for the next one. if (eof) { $offset = 0; } } # Renames the CRD files to to properly comply with the standard used otherwise by the script. Done to be user friendly. # The extension at the end of the file name indicates what corrections have been done to the file. # The extension .his stands for the correction of the histidines according to their protonation state. # The extension .ter stands for the correction of backbone atoms of the first and last residue as well as the cysteines involved in disulfide bonds to comply with the UHBD standard. if ($mini_ext eq "small") { rename ("scr/$pdb_name-mini\_$p-cha.crd","scr/$pdb_name\_$p-cha.crd.his.ter"); rename ("scr/$pdb_name-mini\_$p-rad.crd","scr/$pdb_name\_$p-rad.crd.his.ter"); rename ("scr/$pdb_name-mini\_$p-cha.crd.his.old","scr/$pdb_name\_$p-cha.crd"); rename ("scr/$pdb_name-mini\_$p-rad.crd.his.old","scr/$pdb_name\_$p-rad.crd"); } else { rename ("scr/$pdb_name\_$p-cha.crd","scr/$pdb_name\_$p-cha.crd.his.ter"); rename ("scr/$pdb_name\_$p-rad.crd","scr/$pdb_name\_$p-rad.crd.his.ter"); rename ("scr/$pdb_name\_$p-cha.crd.his.old","scr/$pdb_name\_$p-cha.crd"); rename ("scr/$pdb_name\_$p-rad.crd.his.old","scr/$pdb_name\_$p-rad.crd"); } } # Modification of the pdb files in place. # The unmodified pdb files have the extension .old added at the end of their name. local $^I = '.old'; # Indication of the files to be modified. local @ARGV = glob ("scr/*.pdb"); #my $offset = 0; # Offset to properly renumber the atom, taking into account the hydrogen atoms deleted. # Processes all the files : while (<>){ # Only processes the coordinates lines which are at least 70 characters long. if (length($_) >= 70) { while (($key,$value) = each(%$complex)) { # Spots the histidines according to their residue number. if (substr($_,22,4) == $key) { # Modifies their residue name according to their protonation state. substr($_,17,4) =~ s/(HIS|HSP)/$value/; # Spots the HE2 atom of an HSD. if ( (substr($_,17,4) eq "HSD ") && (substr($_,13,4) eq "HE2 ") ) { # Deletes the atom line. $_ = ""; # Increases the offset to take into account this deletion for the next atoms. $offset++; } # Spots the HD1 atom of an HSE. elsif ( (substr($_,17,4) eq "HSE ") && (substr($_,13,4) eq "HD1 ") ) { # Deletes the atom line. $_ = ""; # Increases the offset to take into account this deletion for the next atoms. $offset++; } } } # Renumbers in place each atom occurence if nedded. substr($_,6,5) = sprintf("%5d",substr($_,6,5)-$offset) unless (length($_) == 0); } # Prints the lines form the file, whether they have been modified or not. print; # Reset the offset at the end of each file for the next one. if (eof) { $offset = 0; } } # Allows to rename the pdb files # Files .pdb.his have good state for HIS @pdb = glob ("scr/*.pdb"); foreach $s(0..$#pdb) { rename ("$pdb[$s]","$pdb[$s].his"); } # Files .pdb have all HSP @pdb = glob ("scr/*.pdb.old"); foreach $s(0..$#pdb) { ($name)=($pdb[$s]=~/(.*\.pdb)\.old/); rename ("$pdb[$s]","$name"); } return (\@his,\@before,\@after); } # Creates an input file build_psf_crd.inp from template. sub print_build_psf_crd { my $proto = shift; return (0) if (ref($proto)); my ($atoms_ref,$pdb_ref,$path,$Verbose) = @_; open (BUILDTMP, "< tmp/build_psf_crd.tmp") || die "Unable to open the file tmp/build_psf_crd.tmp : $!\n"; @build = ; close (BUILDTMP); open (BUILDINP, "> inp/$pdb_ref->[-1].build_psf_crd.inp") || die "Unable to create the file inp/$pdb_ref->[-1].build_psf_crd.inp : $!\n"; my $index = 0; my $chain_id = ""; # Identifier of the last chain processed (in the order it appears in the PDB file). my $seg_id=""; # Identifier of the last segid processed (in the order it appears in the PDB file). # Runs along the template file. foreach $b (0..$#build) { # Identification of the keyword $SET$ in the template file. if ($build[$b] =~ /^\$SET\$/) { warn "\tSetting up the input file ...\n" if ($Verbose == 1); printf (BUILDINP "SET path %s ! Absolute path of the working directory\n", $path); printf (BUILDINP "SET 1 %s ! First chain\n", $pdb_ref->[0]); printf (BUILDINP "SET 2 %s ! Second chain\n", $pdb_ref->[1]); printf (BUILDINP "SET 3 %s ! Water molecules\n", $pdb_ref->[-2]); printf (BUILDINP "SET 4 %s ! Complex\n", $pdb_ref->[-1]); } elsif ($build[$b]=~ /^\$LIB\$/) { printf(CRDINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD rtf unit 2 card\nCLOSE unit 2\n",$top); printf(CRDINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD param unit 2 card\nCLOSE unit 2\n",$par); } # Identification of the keyword $PATCH$ in the template file. elsif ($build[$b] =~ /^\$PATCH\$/) { # Runs along the atoms array. while ($index <= $#{$atoms_ref}) { # The current chain hasn't been treated yet. if (($atoms_ref->[$index]->chain() ne $chain_id) && ($atoms_ref->[$index]->chain() ne "W")) { # The current chain identifier is recorded to indicate it has been treated. $chain_id = $atoms_ref->[$index]->chain(); warn "\tPatching the cysteins involved in disulfide bonds in chain $chain_id ...\n" if ($Verbose == 1); printf (BUILDINP "\nGENERATE %1s setup\n", $chain_id); } # Processing the disulfide bonds. if ((defined $atoms_ref->[$index]->ssbond_res()) && ($atoms_ref->[$index]->chain() eq $chain_id)) { printf (BUILDINP "PATCH DISU %1s %3d %1s %3d\n", $atoms_ref->[$index]->chain(), $atoms_ref->[$index]->residue_number(), $atoms_ref->[$index]->ssbond_chain(), $atoms_ref->[$index]->ssbond_res()); } $index++; # All the chains (but Water) have been treated. if ($index > $#{$atoms_ref} || $atoms_ref->[$index]->chain() ne $chain_id) { last; } } } # All other lines without keywords. else { printf (BUILDINP "%s", $build[$b]); } } close (BUILDINP); # Returne the PDB files array. return ($pdb_ref); } # Creates an input file sas.inp from template. # Due to work for a regular complex energy calculation and for calculations on different conformations extracted from the molecular dynamics. sub print_sas { my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref,$path,$Verbose,$Int,$top,$par,$fp,$sp,$ext,$togsep) = @_; $ext="-$ext"; # Opens the classic sas input when the program is used for the beginning. if ($Int == 0) { open (SASTMP, "< tmpsep/sas.tmp") || die "Unable to open the file tmpsep/sas.tmp : $!\n"; } # Opens the sas input who can be modify when the option -Int is used. else { if ($togsep eq "tog") { # allows to open the template file to calculate one value of sas per residue open (SASTMP, "< tmpsep/sasI.tmp") || die "Unable to open the file tmpsep/sasI.tmp : $!\n"; } elsif ($togsep eq "sep") { # allows to open the template file to calculate sc and bb values of sas per residue open (SASTMP, "< tmpsep/sas-sep.tmp") || die "Unable to open the file tmpsep/sasI.tmp : $!\n"; } } @sas = ; close (SASTMP); # Split the last value of the PDB files array. In all cases, $pdb is the PDB name of the complex (example : 1jtp_al). # In case of the processing of a conformation, $conf is the conformation number (example : conf_17). if ($Int == 1) { ($pdb,$conf) = split('\.',$pdb_ref->[-1]); open (SASINP, "> inp/$pdb_ref->[-1]/$pdb_ref->[-1].sas.inp") || die "Unable to create the file inp/$pdb_ref->[-1]/$pdb_ref->[-1].sas.inp : $!\n"; } else { open (SASINP, "> inp/$pdb_ref\_complex.sas.inp") || die "Unable to create the file inp/$pdb_ref\_complex.sas.inp : $!\n"; } # Runs along the template file. foreach $s (0..$#sas) { # Identification of the keyword $SET$ in the template file. if ($sas[$s] =~ /^\$SET\$/) { warn "\tSetting up the input file ...\n" if ($Verbose == 1); printf (SASINP "SET path %s ! path of the working directory\n", $path); if ($Int == 1) { printf (SASINP "SET inp %s ! input file name\n", $pdb); printf (SASINP "SET out %s ! output file name\n", $pdb_ref->[-1]); printf (SASINP "SET res %i ! residue number\n",$res_nb); } if ($Int == 0) { printf (SASINP "SET name %s ! name of the file \n", $pdb_ref); } printf (SASINP "SET fp %s ! first chain\n", $fp); printf (SASINP "SET sp %s ! second chain\n", $sp); printf (SASINP "SET ext %s ! second chain\n", $ext); } elsif ($sas[$s]=~ /^\$LIB\$/) { printf(SASINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD rtf unit 2 card\nCLOSE unit 2\n",$top); printf(SASINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD param unit 2 card\nCLOSE unit 2\n",$par); } # All other lines without keywords. else { printf (SASINP "%s", $sas[$s]); } } close (SASINP); } # Creates an input file sas.inp from template. # Due to work for a regular complex energy calculation and for calculations on different conformations extracted from the molecular dynamics. sub print_vdw { my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref,$path,$Verbose,$Int,$top,$par,$fp,$sp,$ext,$togsep) = @_; $ext="-$ext"; # Opens the classic vdw input when the program is used for the beginning. if ($Int ==0) { open (VDWTMP, "< tmpsep/vdw.tmp") || die "Unable to open the file tmpsep/vdw.tmp : $!\n"; } # Opens the vdw input who can be modify when the option -Int is used. else { if ($togsep eq "tog") { # allows to open the template file to calculate one value of vdw per residue open (VDWTMP, "< tmpsep/vdwI.tmp") || die "Unable to open the file tmpsep/vdwI.tmp : $!\n"; } elsif ($togsep eq "sep") { # allows to open the template file to calculate sc and bb values of vdw per residue open (VDWTMP, "< tmpsep/vdw-sep.tmp") || die "Unable to open the file tmpsep/vdw-sep.tmp : $!\n"; } } @vdw = ; close (VDWTMP); # Split the last value of the PDB files array. In all cases, $pdb is the PDB name of the complex (example : 1jtp_al). # In case of the processing of a conformation, $conf is the conformation number (example : conf_17). if ($Int == 1) { ($pdb,$conf) = split('\.',$pdb_ref->[-1]); open (VDWINP, "> inp/$pdb_ref->[-1]/$pdb_ref->[-1].vdw.inp") || die "Unable to create the file inp/$pdb_ref->[-1]/$pdb_ref->[-1].vdw.inp : $!\n"; } else { open (VDWINP, "> inp/$pdb_ref\_complex.vdw.inp") || die "Unable to create the file inp/$pdb_ref\_complex.vdw.inp : $!\n"; } # Runs along the template file. foreach $v (0..$#vdw) { # Identification of the keyword $SET$ in the template file. if ($vdw[$v] =~ /^\$SET\$/) { warn "\tSetting up the input file ...\n" if ($Verbose == 1); printf (VDWINP "SET path %s ! path of the working directory\n", $path); # When the option -Int is used, the user gives the number of residues of each chain. if ($Int == 1) { printf (VDWINP "SET inp %s ! input file name\n", $pdb); printf (VDWINP "SET out %s ! output file name\n", $pdb_ref->[-1]); } if ($Int == 0) { printf (VDWINP "SET name %s ! name of the file \n", $pdb_ref); } printf (VDWINP "SET fp %s ! first chain\n", $fp); printf (VDWINP "SET sp %s ! second chain\n", $sp); printf (VDWINP "SET ext %s ! extension\n", $ext); } elsif ($vdw[$v]=~ /^\$LIB\$/) { printf(VDWINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD rtf unit 2 card\nCLOSE unit 2\n",$top); printf(VDWINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD param unit 2 card\nCLOSE unit 2\n",$par); } # All other lines without keywords. else { printf (VDWINP "%s", $vdw[$v]); } } close (VDWINP); } # Sums the solvant accessible surface, Van der Waals and electrostatic energies for each residue of the complex. sub energy_sum { my $proto = shift; return (0) if (ref($proto)); my ($f1,$f2,$f3,$atoms_ref,$pdb_name,@ref) = @_; # Records useful informations such as the PDB and the 2 chains names. # ELECTROSTATIC ENERGY DECOMPOSITION open (F1, "<$f1") || die ("Unable to open the file $f1 : $!\n"); my @lines = ; chomp (@lines); close (F1); my $t = 0; # initialization of the index of the following array. my @tab; foreach $l (0..$#lines) { # Extracts of the columns to create a 2 dimensions array if (length($lines[$l]) == 37) { $tab[$t][0] = substr($lines[$l],16,3); # Titrated site number. $tab[$t][1] = substr($lines[$l],0,3); # Residue name. $tab[$t][2] = substr($lines[$l],20,8); # Energy elec. $tab[$t][3] = substr($lines[$l],7,3); # Residue number. $tab[$t][4] = substr($lines[$l],29,8); # Energy desolv $t++; } } # Sorts out the data from the array according to column #1 my @range = sort {$$a[0] <=> $$b[0]} @tab; # open (TEST, ">out/test.out"); # permet de voir les energies electr pour chaque residu classe du premier au dernier site titrable # $av=$#range; # foreach $i(0..$#range) { # printf (TEST "$range[$i][1] $range[$i][3] $range[$i][0] $range[$i][2] $range[$i][4]\n"); # } my $j = 1; # Initialization of the index of the following arrays. my @add; # Array containing the energy for each residue. my @desolv; # Array containing the energy of desolvation for each residue. my @res; # Array containing the name of each residue. $nb_rang=$#range+1; for ($i=0;$i<$nb_rang;$i++) { # The residue number of the current line correspond to that of the next line. $add[$j] = $range[$i][2]; $desolv[$j] = $range[$i][4]; if ($range[$i][1] =~ /(NTE|5TE|ACE|CTE)/) { $res[$j] = $range[$i+1][1]; } elsif ($range[$i][1] =~ /(CT3|CT2)/) { $res[$j] = $range[$i-1][1]; } else { $res[$j] = $range[$i][1]; } $a=$i+1; while (($range[$i][3] == $range[$a][3]) && (($range[$i][1] eq $range[$a][1]) || ($range[$i][1] eq "NTE") || ($range[$i][1] eq "CTE") || ($range[$i][1] eq "ACE") || ($range[$a][1] eq "CT3") || ($range[$a][1] eq "CT2") || ($range[$i][1] eq "DEO") || ($range[$a][1] eq "DEO") || ($range[$a][1] eq "3TE") || ($range[$i][1] eq "5TE"))) { $add[$j] = $add[$j]+$range[$a][2]; $desolv[$j] = $desolv[$j]+$range[$a][4]; $a++; } $i=$a-1; $j++; } # SOLVENT ACCESSIBLE SURFACE open (F2,"<$f2") || die ("Unable to open the file $f2 : $!\n"); my @lines = ; chomp (@lines); close (F2); # Isolation of the part containing the data to be read : my $step = 0; # Initialization of a counter to record the current step number. my ($begin,$end); # Runs along the file. foreach $l (0..$#lines) { # Identification of the beginning of the data. if (($lines[$l] =~ / SUM OF INDIVIDUAL /) && ($step == 0)) { $begin = $l+1; $step++; } # Identification of the end of the data. elsif (($lines[$l] =~ / SUM OF INDIVIDUAL /) && ($step == 1)) { $end = $l; } } my $s = 1; # Initialization of the index of the following array. my @sas; # Processing the chain. foreach $l ($begin..$end) { # Extraction of the fields from the current line. my @surf = split(' ',$lines[$l]); # Isolation of the column #3 containing the solvent accessible surfaces multiplicated by the gamma factor. $sas[$s] = $surf[2]*0.005; $s++; } # CALCULATION OF THE VAN DER WAALS RADII open (F3, "<$f3") || die "Unable to open the file $f3 : $!\n"; my @lines = ; chomp (@lines); close (F3); # Isolation of the part containing the data to be read : my $v = 1; # initialization of the index of the following array. foreach $l (0..$#lines) { if ($lines[$l]=~ /^ [0-9]/) { my @radius = split(' ',$lines[$l]); # Isolation of the column #3 containing the van der Waals radii divided by 2 because 2 radii are part of the surface of interaction. $vdw[$v] = $radius[2]*0.5; $v++; } } # RESULT FILE open (OUT,"> out/$pdb_name.decomp.dat") || die "Unable to create the file out/$pdb_name.decomp.dat : $!\n"; printf (OUT "RES # ELEC SAS VDW TOTAL DESOLV INTERACTION (ELEC-DESOLV)\n"); #open (EFF,"> out/$pdb_name.amino-acid-efficiency.dat") || die "Unable to create the file out/$pdb_name.amino-acid-efficiency.dat : $!\n"; #printf (EFF "RES # ELEC SAS VDW TOTAL DESOLV INTERACTION\n"); my @ener; # Array containing the total energy for each residue. $elec1=0;$elec2=0; $sas1=0;$sas2=0; $vdw1=0;$vdw2=0; $tot1=0;$tot2=0; $desolv1=0;$desolv2=0; $inte1=0;$inte2=0; $av=0; foreach $e (1..$#res){ # Calculating the free energy by summing E(elect) + E(sas) + E(vdw) $ener[$e] = $add[$e] + $sas[$e] + $vdw[$e]; # Calculation of the sum of energies for the first part of the system if (!(($res[$e] eq $ref[0]) && ($e == $ref[1])) && ($av==0)) { $elec1=$elec1+$add[$e]; $sas1=$sas1+$sas[$e]; $vdw1=$vdw1+$vdw[$e]; $tot1=$tot1+$ener[$e]; $desolv1=$desolv1+$desolv[$e]; $inte1=$inte1+($add[$e]-$desolv[$e]); } # Allows to know when the second part of the system begins elsif (($res[$e] eq $ref[0]) && ($e == $ref[1])) { $av=1; # allows to know that is the second part of the system $elec2=$add[$e]; # adds the values of energies in a new variable for the second part of the system $sas2=$sas[$e]; $vdw2=$vdw[$e]; $tot2=$ener[$e]; $desolv2=$desolv[$e]; $inte2=($add[$e]-$desolv[$e]); } # for the second part of the system, calculation of the sum of energies elsif ($av==1) { $elec2=$elec2+$add[$e]; $sas2=$sas2+$sas[$e]; $vdw2=$vdw2+$vdw[$e]; $tot2=$tot2+$ener[$e]; $desolv2=$desolv2+$desolv[$e]; $inte2=$inte2+($add[$e]-$desolv[$e]); } ($nb_heavy[$e])=Leto::GrpAtoms->amino_heavy_atoms($res[$e]); #printf (EFF "%3s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n", $res[$e], $e,$add[$e]/$nb_heavy[$e], $sas[$e]/$nb_heavy[$e], $vdw[$e]/$nb_heavy[$e], $ener[$e]/$nb_heavy[$e],$desolv[$e]/$nb_heavy[$e], ($add[$e]-$desolv[$e])/$nb_heavy[$e] ); printf (OUT "%3s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n", $res[$e], $e, $add[$e], $sas[$e], $vdw[$e], $ener[$e], $desolv[$e], $add[$e]-$desolv[$e]); } # Calculation of the total of energies for the two parts of the system $elec3=$elec1+$elec2; $sas3=$sas1+$sas2; $vdw3=$vdw1+$vdw2; $tot3=$tot1+$tot2; $desolv3=$desolv1+$desolv2; $inte3=$inte1+$inte2; # writes in a file the results #open (OUTB,"> out/$pdb_name.sum_energies.dat") || die "Unable to create the file out/$pdb_name.sum_energies.dat : $!\n"; #printf (OUTB "%s : Sum of energies for each part of the system\n",$pdb_name); #printf (OUTB " # Sum part1 Sum part2 Sum tot\n"); #printf (OUTB "ELEC %7.4f %7.4f %7.4f\n", $elec1,$elec2,$elec3); #printf (OUTB "SAS %7.4f %7.4f %7.4f\n", $sas1,$sas2,$sas3); #printf (OUTB "VDW %7.4f %7.4f %7.4f\n", $vdw1,$vdw2,$vdw3); #printf (OUTB "TOT %7.4f %7.4f %7.4f\n", $tot1,$tot2,$tot3); #printf (OUTB "DESOLV %7.4f %7.4f %7.4f\n", $desolv1,$desolv2,$desolv3); #printf (OUTB "INTE %7.4f %7.4f %7.4f\n", $inte1,$inte2,$inte3); close (OUT); #close (OUTB); # PDB FILE open (PDB, "> scr/$pdb_name.ener.pdb") || die "Unable to create the file scr/$pdb_name.ener.pdb : $!\n"; my $res_n1 = 1; my $res_n2 = 1; # Initialization of the current residue number for each chain. my $FC = uc $fc; my $SC = uc $sc; # The chains names must be in upper character. # Runs along the atoms array. foreach $index (0..$#{$atoms_ref}) { # Processing the 1st chain. if ($atoms_ref->[$index]->chain() eq $FC) { # The current line belongs to the current residue. if ($atoms_ref->[$index]->residue_number() == $res_n1) { # Replacing the Bfactor attribute by the free ernergy sum in the PDB file. $atoms_ref->[$index]->bfactor($ener[$res_n1]); printf (PDB "%s", $atoms_ref->[$index]->print_pdb_atom()); } # Go to the next residue. else { $res_n1++; # Replacing the Bfactor attribute by the free ernergy sum in the PDB file. $atoms_ref->[$index]->bfactor($ener[$res_n1]); printf (PDB "%s", $atoms_ref->[$index]->print_pdb_atom()); } } # Processing the 2nd chain. elsif ($atoms_ref->[$index]->chain() eq $SC) { # The current line belongs to the current residue. if ($atoms_ref->[$index]->residue_number() == $res_n2) { # Replacing the Bfactor attribute by the free ernergy sum in the PDB file. $atoms_ref->[$index]->bfactor($ener[$res_n1+$res_n2]); printf (PDB "%s", $atoms_ref->[$index]->print_pdb_atom()); } # Go to the next residue. else { $res_n2++; # Replacing the Bfactor attribute by the free ernergy sum in the PDB file. $atoms_ref->[$index]->bfactor($ener[$res_n1+$res_n2]); printf (PDB "%s", $atoms_ref->[$index]->print_pdb_atom()); } } # End of file. else { printf (PDB "%s", $atoms_ref->[$index-1]->print_pdb_ter()); printf (PDB "END\n"); # No need to go further. last; } } close (PDB); return (); } # Creates a shell script decomp_run.com from template. sub print_decomp_run { my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref,$Uhbd,$Verbose,$grid,$Int,$site,$path) = @_; # Opens the classic decomp_run input when the program is used for the beginning. if ($Int == 0) { open (RUNTMP, "< tmpsep/decomp_run.tmp") || die "Unable to open the file tmpsep/decomp_run.tmp : $!\n"; } # Opens the decomp_run input who can be modify when the option -Int is used. else { open (RUNTMP, "< tmpsep/decomp_runI.tmp") || die "Unable to open the file tmpsep/decomp_run.tmp : $!\n"; } @run = ; close (RUNTMP); # The shell script is made to be used on a conformation extracted from a trajectory. if ($Int == 1) { open (RUN, "> inp/$pdb_ref->[-1]/$pdb_ref->[-1].decomp_run.com") || die "Unable to create the file inp/$pdb_ref->[-1]/$pdb_ref->[-1].decomp_run.com : $!\n"; } # The shell script isn't made to be used on a specific conformatio else { open (RUN, "> inp/$pdb_ref\_complex.decomp_run.com") || die "Unable to create the file inp/$pdb_ref\_complex.decomp_run.com : $!\n"; } my ($pdb,$conf) = split('\.',$pdb_ref->[-1]); # Runs along the template file. foreach $r (0..$#run) { # Identification of the keyword $SET$. if ($run[$r] =~ /^\$SET\$/) { warn "\tSetting up the input file ...\n" if ($Verbose == 1); if ($Int == 1) { printf (RUN "set fc=%s.%s\n", $pdb_ref->[0], $conf); printf (RUN "set sc=%s.%s\n", $pdb_ref->[1], $conf); printf (RUN "set complex=%s\n", $pdb_ref->[-1]); } else { printf (RUN "set fc=%s\_fp\n", $pdb_ref); printf (RUN "set sc=%s\_sp\n", $pdb_ref); printf (RUN "set complex=%s\_cx\n", $pdb_ref); } printf (RUN "set SCRIPTS=%s\n", $Uhbd); printf (RUN "set GRID=1.0\n"); ($site_ok) = ($site =~ /site(.*)\.dat/); printf (RUN "set SITE=$site_ok\n"); } # Identification of the keyword $PDB$. elsif ($run[$r] =~ /^\$PDB\$/) { if ($Int == 1) { printf (RUN "cd %s\n", $pdb_ref->[-1]); } else { printf (RUN "cd %s\_complex\n", $pdb_ref); } } # All other lines without keywords. else { printf (RUN "%s", $run[$r]); } } close (RUN); # Makes the file executable. if ($Int == 1) { chmod (0751,"inp/$pdb_ref->[-1]/$pdb_ref->[-1].decomp_run.com"); } else { chmod (0751,"inp/$pdb_ref\_complex.decomp_run.com"); } } # Creates input files gbmv.inp, gbmv_fp.str, gbmv_sp.str from template. # Due to work for a regular complex energy calculation and for calculations on different conformations extracted from the molecular dynamics. sub print_gbmv { my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref,$path,$Verbose,$Int,$top,$par,$fp,$sp) = @_; # Opens the classic gbmv input when the program is used for the beginning. if ($Int ==0) { open (GBMVTMP, "< tmpsep/gbmv.tmp") || die "Unable to open the file tmpsep/gbmv.tmp : $!\n"; } # Opens the gbmv input who can be modify when the option -Int is used. else { open (GBMVTMP, "< tmpsep/gbmvI.tmp") || die "Unable to open the file tmpsep/gbmvI.tmp : $!\n"; } @gbmv = ; close (GBMVTMP); # Split the last value of the PDB files array. In all cases, $pdb is the PDB name of the complex (example : 1jtp_al). # In case of the processing of a conformation, $conf is the conformation number (example : conf_17). if ($Int == 1) { ($pdb,$conf) = split('\.',$pdb_ref->[-1]); open (GBMVINP, "> inp/$pdb_ref->[-1]/$pdb_ref->[-1].gbmv.inp") || die "Unable to create the file inp/$pdb_ref->[-1]/$pdb_ref->[-1].gbmv.inp : $!\n"; } else { open (GBMVINP, "> inp/$pdb_ref\_complex.gbmv.inp") || die "Unable to create the file inp/$pdb_ref\_complex.gbmv.inp : $!\n"; } # Runs along the template file. foreach $v (0..$#gbmv) { # Identification of the keyword $SET$ in the template file. if ($gbmv[$v] =~ /^\$SET\$/) { warn "\tSetting up the input file ...\n" if ($Verbose == 1); # Split the 1st value of the PDB files array. The 2nd of these elements is the name of the 1st chain of the complex. my @fc = split('\_',uc($pdb_ref->[0])); # Split the 2nd value of the PDB files array. The 2nd of these elements is the name of the 2nd chain of the complex. my @sc = split('\_',uc($pdb_ref->[1])); printf (GBMVINP "SET path %s ! path of the working directory\n", $path); if ($Int == 1) { printf (GBMVINP "SET cx %s ! output file name\n", $pdb_ref->[-1]); #printf (GBMVINP "SET fp %s.%s ! output file name\n", $pdb_ref->[0],$conf); #printf (GBMVINP "SET sp %s.%s ! output file name\n", $pdb_ref->[1],$conf); printf (GBMVINP "SET fp %s ! output file name\n", $fp); printf (GBMVINP "SET sp %s ! output file name\n", $sp); } else { printf (GBMVINP "SET name %s ! output file name\n", $pdb_ref); printf (GBMVINP "SET fp %s ! output file name\n", $fp); printf (GBMVINP "SET sp %s ! output file name\n", $sp); } } elsif ($gbmv[$v]=~ /^\$LIB\$/) { printf(GBMVINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD rtf unit 2 card\nCLOSE unit 2\n",$top); printf(GBMVINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD param unit 2 card\nCLOSE unit 2\n",$par); } # Identification of the keyword $PSF$ in the template file. elsif ($gbmv[$v] =~ /^\$PSF\$/) { if (-e "$path/scr/$pdb_ref->[-1]/$pdb_ref->[-1].dyna_desolv.psf") { printf (GBMVINP "open unit 1 card read name \@path/scr/$pdb_ref->[-1]/$pdb_ref->[-1].dyna_desolv.psf\n"); } else { printf (GBMVINP "open unit 1 card read name \@path/scr/$pdb.dyna_desolv.psf\n"); } } # All other lines without keywords. else { printf (GBMVINP "%s", $gbmv[$v]); } } close (GBMVINP); if ($Int == 1) { # copy the gbmv_fp.str and modify the gbmv_sp.str (puts the good name for the psf). system `cp tmpsep/gbmv_fpI.str inp/$pdb_ref->[-1]`; open (SPTMP, "< tmpsep/gbmv_spI.str") || die "Unable to open the file tmpsep/gbmv_spI.str : $!\n"; @sp = ; close (SPTMP); open (GBMVSP, "> inp/$pdb_ref->[-1]/gbmv_spI.str") || die "Unable to create the file inp/$pdb_ref->[-1]/gbmv_spI.str : $!\n"; foreach $v (0..$#sp) { # Identification of the keyword $PSF$ in the template file. if ($sp[$v] =~ /^\$PSF\$/) { if (-e "$path/scr/$pdb_ref->[-1]/$pdb_ref->[-1].dyna_desolv.psf") { printf (GBMVSP "open unit 1 card read name \@path/scr/$pdb_ref->[-1]/$pdb_ref->[-1].dyna_desolv.psf\n"); } else { printf (GBMVSP "open unit 1 card read name \@path/scr/$pdb_ref->[-1].dyna_desolv.psf\n"); } } else { printf (GBMVSP "%s", $sp[$v]); } } } else { # copy the gbmv_fp.str and the gbmv_sp.str file. system `cp tmpsep/gbmv_fp.str inp`; system `cp tmpsep/gbmv_sp.str inp`; } } # Calculate the elec free energy from the output of previous charmm run # Sum the different terms (elec=gas + solv cx - solv fp -solv sp) per residu sub sum_gbmv { my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref,$path,$Verbose,$Int,$conf) = @_; # FOR WHOLE SYSTEM foreach $i(0..2) { if ($i == 0) { $p="fp"; } elsif ($i == 1) { $p="sp"; } elsif ($i == 2) { $p="cx"; } # open the file that contains the charges if ($Int ==1) { open(CHA, "<$path/scr/$pdb_ref->[-1]/$pdb_ref->[$i].$conf-cha.crd.his") || die "Unable to open the file $path/scr/$pdb_ref->[-1]/$pdb_ref->[$i].$conf-cha.crd.his : $!\n"; } else { open(CHA, "<$path/scr/$pdb_ref\_$p-cha.crd.his") || die "Unable to open the file $path/scr/$pdb_ref\_$p-cha.crd.his : $!\n"; } chomp(@cha1=); close(CHA); # allow to remove the first lines of the file in the array $j=0; @cha=0; foreach $w(0..$#cha1) { if (length($cha1[$w])>=70) { $cha[$j]=$cha1[$w]; $j++; } } # open the file that contains the alpha (=Born radii) if ($Int ==1) { open(ALPHA,"<$path/gbmv/$pdb_ref->[-1]/$pdb_ref->[$i].$conf-alpha.crd") || die "Unable to open the file $path/gbmv/$pdb_ref->[-1]/$pdb_ref->[$i].$conf-alpha.crd : $!\n"; } else { open(ALPHA, "<$path/gbmv/$pdb_ref\_$p-alpha.crd") || die "Unable to open the file $path/gbmv/$pdb_ref\_$p-alpha.crd : $!\n"; } chomp(@alpha1=); close(ALPHA); # allow to remove the first lines of the file in the array $j=0; @alpha=0; foreach $w(0..$#alpha1) { if (length($alpha1[$w])>=70) { $alpha[$j]=$alpha1[$w]; $j++; } } # check if the both arrays have the same number of line if ($#cha != $#alpha) { die "The both files don't have the same number of line!\n"; } # open a new file that will contain the result in the last column if ($Int ==1) { open(OUT,">$path/gbmv/$pdb_ref->[-1]/$pdb_ref->[$i].$conf-solv.crd") || die "Unable to open the file $path/gbmv/$pdb_ref->[-1]/$pdb_ref->[$i].$conf-solv.crd : $!\n"; } else { open(OUT,">$path/gbmv/$pdb_ref\_$p-solv.crd")|| die "Unable to open the file $path/gbmv/$pdb_ref\_$p-solv.crd : $!\n"; } printf(OUT "# SOLVATION CALCULATION\n# RESNUM SEGID RESNAME SOLV\n"); foreach $x(0..$#cha) { # you can use $x(0..$#alpha) normaly it's the same thing) $qi=substr($cha[$x],60,10); # charge of atom i $ai=substr($alpha[$x],60,10); # born radius of atom i $xi=substr($alpha[$x],20,10); # X-coordinate of atom i $yi=substr($alpha[$x],30,10); # Y-coordinate of atom i $zi=substr($alpha[$x],40,10); # Z-coordinate of atom i $sum=0; foreach $y(0..$#cha) { # foreach atom except atom i if ($y == $x) { next; } $qj=substr($cha[$y],60,10); # charge of atom j $aj=substr($alpha[$y],60,10); # born radius of atom j $xj=substr($alpha[$y],20,10); # X-coordinate of atom j $yj=substr($alpha[$y],30,10); # Y-coordinate of atom j $zj=substr($alpha[$y],40,10); # Z-coordinate of atom j $rij = sqrt( (($xi-$xj)*($xi-$xj)) + (($yi-$yj)*($yi-$yj)) + (($zi-$zj)*($zi-$zj)) ); $coul= ($qi*$qj)/( sqrt( ($rij*$rij) + $ai*$aj*exp(-$rij*$rij/(8*$ai*$aj)))); $sum=$sum + $coul; } $dgi= (-166.0 *(1-1/80)) * (($qi * $qi) / $ai) + (-166.0 *(1-1/80)) * $sum; $alpha_new[$x]=$alpha[$x]; substr($alpha_new[$x],60,10)=""; printf (OUT "$alpha_new[$x]%10.5f\n",$dgi); } close(OUT); } foreach $a(0..2) { if ($a == 0) { $p="fp"; } elsif ($a == 1) { $p="sp"; } elsif ($a == 2) { $p="cx"; } # Read the solvation files if ($int == 1) { open(SOLVTOT,"<$path/gbmv/$pdb_ref->[-1]/$pdb_ref->[$a].$conf-solv.crd") || die "Unable to open the file $path/gbmv/$pdb_ref->[-1]/$pdb_ref->[$a].$conf-solv.crd : $!\n"; } else { open(SOLVTOT,"<$path/gbmv/$pdb_ref\_$p-solv.crd") || die "Unable to open the file $path/gbmv/$pdb_ref\_$p-solv.crd : $!\n"; } chomp(@solv=); close(SOLVTOT); if ($int == 1) { open(SOLVRESI,">$path/gbmv/$pdb_ref->[-1]/$pdb_ref->[$a].$conf-solv_resi.crd") || die "Unable to open the file $path/gbmv/$pdb_ref->[-1]/$pdb_ref->[$a].$conf-solv_resi.crd : $!\n"; } else { open(SOLVRESI,">$path/gbmv/$pdb_ref\_$p-solv_resi.crd") || die "Unable to open the file $path/gbmv/$pdb_ref\_$p-solv_resi.crd : $!\n"; } $j=0; $#tab=-1; $nb_solv=$#solv+1; for ($i=0;$i<$nb_solv;$i++) { if (length($solv[$i])<70) { # allows to print in the new file the first commented lines printf(SOLVRESI "$solv[$i]\n"); } else { $b=$i+1; # allows to read the next line $tab[$j][0]=substr($solv[$i],60,10); # solv # Extracts name, segid and resnum of resid $tab[$j][1]=substr($solv[$i],11,3); # name $tab[$j][2]=substr($solv[$i],51,4); # segid $tab[$j][3]=substr($solv[$i],56,4); # resnum while ((substr($solv[$b],51,4) eq substr($solv[$i],51,4)) && (substr($solv[$b],56,4) == substr($solv[$i],56,4))) { $tab[$j][0]=$tab[$j][0]+ substr($solv[$b],60,10); # solvresi $b++; } $j++; $i=$b-1; # allows to pass lines already read } } # prints the result in a new file foreach $j(0..$#tab) { printf(SOLVRESI "%4i\t%4s\t%3s\t%10.5f\n",$tab[$j][3],$tab[$j][2],$tab[$j][1],$tab[$j][0]); } close(SOLVRESI); } if ($Int == 1) { open(GAS,"<$path/gbmv/$pdb_ref->[-1]/gaz-decompo.dat") || die "Unable to open the file gaz-decompo.dat : $!\n"; } else { open(GAS,"<$path/gbmv/gaz-decompo.dat") || die "Unable to open the file gaz-decompo.dat : $!\n"; } chomp(@gas=); close(GAS); if ($Int == 1) { open(SOLV,"<$path/gbmv/$pdb_ref->[-1]/$pdb_ref->[2].$conf-solv_resi.crd") || die "Unable to open the file $path/gbmv/$pdb_ref->[-1]/$pdb_ref->[2].$conf-solv_resi.crd : $!\n"; } else { open(SOLV,"<$path/gbmv/$pdb_ref\_cx-solv_resi.crd") || die "Unable to open the file $path/gbmv/$pdb_ref\_cx-solv_resi.crd : $!\n"; } chomp(@solv_cx=); close(SOLV); if ($Int == 1) { open(SOLV,"<$path/gbmv/$pdb_ref->[-1]/$pdb_ref->[1].$conf-solv_resi.crd") || die "Unable to open the file $path/gbmv/$pdb_ref->[-1]/$pdb_ref->[1].$conf-solv_resi.crd : $!\n"; } else { open(SOLV,"<$path/gbmv/$pdb_ref\_sp-solv_resi.crd") || die "Unable to open the file $path/gbmv/$pdb_ref\_sp-solv_resi.crd : $!\n"; } chomp(@solv_sp=); close(SOLV); if ($Int == 1) { system `cp $path/gbmv/$pdb_ref->[-1]/$pdb_ref->[0].$conf-solv_resi.crd $path/gbmv/$pdb_ref->[-1]/fpsp-solv_resi.crd`; open(SOLV,">>$path/gbmv/$pdb_ref->[-1]/fpsp-solv_resi.crd") || die "Unable to open the file $path/gbmv/$pdb_ref->[-1]/fpsp-solv_resi.crd : $!\n"; } else { system `cp $path/gbmv/$pdb_ref\_fp-solv_resi.crd $path/gbmv/$pdb_ref\_fpsp-solv_resi.crd`; open(SOLV,">>$path/gbmv/$pdb_ref\_fpsp-solv_resi.crd") || die "Unable to open the file $path/gbmv/$pdb_ref\_fpsp-solv_resi.crd : $!\n"; } foreach $j(2..$#solv_sp) { print(SOLV "$solv_sp[$j]\n"); } if ($Int == 1) { open(SOLV,"<$path/gbmv/$pdb_ref->[-1]/fpsp-solv_resi.crd") || die "Unable to open the file $path/gbmv/$pdb_ref->[-1]/fpsp-solv_resi.crd : $!\n"; } else { open(SOLV,"<$path/gbmv/$pdb_ref\_fpsp-solv_resi.crd") || die "Unable to open the file $path/gbmv/$pdb_ref\_fpsp-solv_resi.crd : $!\n"; } chomp(@solv_p=); close(SOLV); if ($Int == 1) { open(OUT,">$path/gbmv/$pdb_ref->[-1]/elec.crd") || die "Unable to open the file : $!\n"; } else { open(OUT,">$path/gbmv/elec.crd") || die "Unable to open the file : $!\n"; } print(OUT "Num Segid Name Gas Solvcx Solvp Sum\n"); $total=0; foreach $i(2..$#gas-1) { ($num,$segid,$name,$gass,$gas)=split(" ",$gas[$i]); ($num,$segid,$name,$solv_cx)=split(" ",$solv_cx[$i]); ($num,$segid,$name,$solv_p)=split(" ",$solv_p[$i]); $sum=$gas+$solv_cx-$solv_p; $total=$total+$sum; printf(OUT "%3i %4s %3s %10.5f %10.5f %10.5f %10.5f\n",$num,$segid,$name,$gas,$solv_cx,$solv_p,$sum); } printf(OUT "sum elec= %10.5f\n",$total); } # Sums the solvant accessible surface, Van der Waals and electrostatic energies for each residue of the complex. sub energy_sum_conf { my $proto = shift; return (0) if (ref($proto)); my ($cas,$f1,$f2,$f3,$atoms_ref,$pdb_ref,$conf_ref,$nb_chn,$nom_chn,$togsep,@ref) = @_; # Records useful informations such as the PDB and the 2 chains names. my ($pdb,$fc) = split('\_',$pdb_ref->[0]); $fname_conf=$conf_ref; my ($fname,$sc) = split('\_',$pdb_ref->[1]); # ELECTROSTATIC ENERGY DECOMPOSITION # FROM UHBD if ($cas eq "uhbd") { open (F1, "<$f1") || die ("Unable to open the file $f1 : $!\n"); my @lines = ; chomp (@lines); close (F1); my $t = 0; # initialization of the index of the following array. my @tab; foreach $l (0..$#lines) { # Extracts of the columns to create a 2 dimensions array if (length($lines[$l]) == 37) { $tab[$t][0] = substr($lines[$l],16,3); # Titrated site number. $tab[$t][1] = substr($lines[$l],0,3); # Residue name. $tab[$t][2] = substr($lines[$l],20,8); # Energy elec. $tab[$t][3] = substr($lines[$l],7,3); # Residue number. $tab[$t][4] = substr($lines[$l],29,8); # Energy desolv $tab[$t][5] = substr($lines[$l],11,2); # SC or BB $t++; } } # Sorts out the data from the array according to column #1 my @range = sort {$$a[0] <=> $$b[0]} @tab; #file to check if the sort is correct open (TEST,"> out/$fname_conf/$fname_conf.test.dat") || die "Unable to create the file out/$fname_conf/$fname_conf.test.dat : $!\n"; foreach $kk(0..$#range) { print (TEST "$range[$kk][0]$range[$kk][5] $range[$kk][1] $range[$kk][2] $range[$kk][3] $range[$kk][4]\n"); } my $j = 1; # Initialization of the index of the following arrays. @add; # Array containing the energy for each residue. @desolv; # Array containing the energy of desolvation for each residue. @res; # Array containing the name of each residue. $nb_rang=$#range+1; for ($i=0;$i<$nb_rang;$i++) { # The residue number of the current line correspond to that of the next line. $add[$j] = $range[$i][2]; #energy elec $desolv[$j] = $range[$i][4]; # energy desolv $a=$i+1; # if residu number is same #while (($range[$i][3] == $range[$a][3]) && (($range[$i][1] eq "NTE") || ($range[$i][1] eq "CTE") || ($range[$i][1] eq "NTN") || ($range[$i][1] eq "CTN") || ($range[$i][1] eq "ACE") || ($range[$a][1] eq "CT3") || ($range[$a][1] eq "CT2") || ($range[$i][1] eq "DEO") || ($range[$a][1] eq "DEO") || ($range[$a][1] eq "3TE") || ($range[$i][1] eq "5TE"))) { # $add[$j] = $add[$j]+$range[$a][2]; # $desolv[$j] = $desolv[$j]+$range[$a][4]; # $a++; #} while (($range[$i][3] == $range[$a][3]) && (($range[$i][1] eq "ACE")|| ($range[$a][1] eq "CT3"))){ $add[$j] = $add[$j]+$range[$a][2]; $desolv[$j] = $desolv[$j]+$range[$a][4]; $a++; } $i=$a-1; $j++; } } # FROM GBMV elsif ($cas eq "gbmv"){ open (F1, "<$f1") || die ("Unable to open the file $f1 : $!\n"); my @lines = ; chomp (@lines); close (F1); my $t = 1; # initialization of the index of the following array. my @tab; foreach $l (1..$#lines-1) { # Extracts of the columns to create a 2 dimensions array $tab[$t][0] = substr($lines[$l],4,4); # Chain name. $tab[$t][1] = substr($lines[$l],9,3); # Residue name. $tab[$t][2] = substr($lines[$l],46,10); # Energy elec. $tab[$t][3] = substr($lines[$l],0,3); # Residue number. $tab[$t][4] = substr($lines[$l],24,10); # Energy desolv $t++; } @add; # Array containing the energy for each residue. @desolv; # Array containing the energy of desolvation for each residue. #@res; # Array containing the name of each residue. $nb_tab=$#tab+1; for ($i=1;$i<$nb_tab;$i++) { $add[$i] = $tab[$i][2]; $desolv[$i] = $tab[$i][4]; #$res[$i] = $tab[$i][1]; } } # SOLVENT ACCESSIBLE SURFACE open (F2,"<$f2") || die ("Unable to open the file $f2 : $!\n"); my @lines = ; chomp (@lines); close (F2); # Isolation of the part containing the data to be read : my $step = 0; # Initialization of a counter to record the current step number. my ($begin,$end); # Runs along the file. foreach $l (0..$#lines) { # Identification of the beginning of the data. if (($lines[$l] =~ / SUM OF INDIVIDUAL /) && ($step == 0)) { $begin = $l+1; $step++; } # Identification of the end of the data. elsif (($lines[$l] =~ / SUM OF INDIVIDUAL /) && ($step == 1)) { $end = $l; } } my $s = 1; # Initialization of the index of the following array. my @sas; # Processing the chain. #foreach $l ($begin..$end) { for ($l=$begin; $l<$end+1; $l++) { # Extraction of the fields from the current line. my @surf = split(' ',$lines[$l]); if ($togsep eq "tog") { $res[$s] = $surf[1]; # extraction of the name of residue $sas[$s] = $surf[2]*0.005; # Isolation of the column #3 containing the solvent accessible surfaces multiplicated by the gamma factor. $s++; } elsif ($togsep eq "sep"){ if (($surf[2] eq "PRO") || ($surf[2] eq "GLY")) { # GLY and PRO have no SC for elec calculation with uhbd @surf_next = split(' ',$lines[$l+1]); # extraction of the fields in the next line $res[$s] = $surf[2]; # extraction of the name of residue $sas[$s] = ($surf[3]*0.005) + ($surf_next[3]*0.005); # we do the sum for SAS values $bbsc[$s] = "no"; $num[$s] = $surf[1]; # extraction of number of residue $s++; $l=$l+1; } elsif ($surf[2] eq "NONE") { # NONE is a residue without BB @surf_next = split(' ',$lines[$l+1]);# extraction of the fields in the next line $res[$s] = $surf_next[2]; # extraction of the name of residue in next line $sas[$s] = ($surf[3]*0.005) + ($surf_next[3]*0.005); $bbsc[$s] = "no"; $num[$s] = $surf_next[1]; # extraction of number of residue $s++; $l=$l+1; # skip the next residue } else { $res[$s] = $surf[2]; # extraction of the name of residue $sas[$s] = $surf[3]*0.005; # Isolation of the column #3 containing the solvent accessible surfaces multiplicated by the gamma factor. $bbsc[$s] = $surf[0]; # extraction of bb or sc $num[$s] = $surf[1]; # extraction of number of residue $s++; } } } # CALCULATION OF THE VAN DER WAALS RADII open (F3, "<$f3") || die "Unable to open the file $f3 : $!\n"; my @lines = ; chomp (@lines); close (F3); # Isolation of the part containing the data to be read : my $v = 1; # initialization of the index of the following array. for ($l=0; $l<$#lines; $l++) { #foreach $l (0..$#lines) { if (($lines[$l]=~ /^BB/) ||($lines[$l]=~ /^SC/)) { my @radius = split(' ',$lines[$l]); if ($togsep eq "tog") { # Isolation of the column #3 containing the van der Waals radii divided by 2 because 2 radii are part of the surface of interaction. $vdw[$v] = $radius[2]*0.5; $v++; } elsif ($togsep eq "sep"){ if (($radius[2] eq "PRO") || ($radius[2] eq "GLY") || ($radius[2] eq "NONE")) { # GLY and PRO and NONE have no SC for elec calculation with uhbd @radius_next = split(' ',$lines[$l+1]);# extraction of the fields in the next line $vdw[$v] = ($radius[3]*0.5) + ($radius_next[3]*0.5);# so we do the sum for VDW values $v++; $l++;# skip the next residue } else { $vdw[$v] = $radius[3]*0.5;# Isolation of the column #4 containing the van der Waals radii divided by 2 because 2 radii are part of the surface of interaction. $v++; } } } } # RESULT FILES open (OUT,"> out/$fname_conf/$fname_conf.decomp.dat") || die "Unable to create the file out/$fname_conf/$fname_conf.decomp.dat : $!\n"; if ($togsep eq "tog") { printf (OUT "RES # ELEC SAS VDW TOTAL DESOLV INTERACTION (ELEC-DESOLV)\n"); } elsif ($togsep eq "sep"){ printf (OUT "RES PART # ELEC SAS VDW TOTAL DESOLV INTERACTION (ELEC-DESOLV)\n"); } #open (EFF,"> out/$fname_conf/$fname_conf.amino-acid-efficiency.dat") || die "Unable to create the file out/$fname_conf/$fname_conf.amino-acid-efficiency.dat : $!\n"; #if ($togsep eq "tog") { # printf (EFF "RES # ELEC SAS VDW TOTAL DESOLV INTERACTION\n"); #} #elsif ($togsep eq "sep"){ # printf (EFF "RES PART # ELEC SAS VDW TOTAL DESOLV INTERACTION\n"); #} my @ener; # Array containing the total energy for each residue. $elec1=0;$elec2=0; $sas1=0;$sas2=0; $vdw1=0;$vdw2=0; $tot1=0;$tot2=0; $desolv1=0;$desolv2=0; $inte1=0;$inte2=0; $av=0; foreach $e (1..$#res-1){ # Calculating the free energy by summing E(elect) + E(sas) + E(vdw) $ener[$e] = $add[$e] + $sas[$e] + $vdw[$e]; # Calculation of the sum of energies for the first part of the system if (!(($res[$e] eq $ref[0]) && ($e == $ref[1])) && ($av==0)) { $elec1=$elec1+$add[$e]; $sas1=$sas1+$sas[$e];#print "pour $e sas= $sas1\n"; $vdw1=$vdw1+$vdw[$e]; $tot1=$tot1+$ener[$e]; $desolv1=$desolv1+$desolv[$e]; $inte1=$inte1+$add[$e]-$desolv[$e]; } # Allows to know when the second part of the system begins elsif (($res[$e] eq $ref[0]) && ($e == $ref[1])) { $av=1; # allows to know that is the second part of the system $elec2=$add[$e]; # adds the values of energies in a new variable for the second part of the system $sas2=$sas[$e]; $vdw2=$vdw[$e]; $tot2=$ener[$e]; $desolv2=$desolv[$e]; $inte2=$add[$e]-$desolv[$e]; } # for the second part of the system, calculation of the sum of energies elsif ($av==1) { $elec2=$elec2+$add[$e]; $sas2=$sas2+$sas[$e]; $vdw2=$vdw2+$vdw[$e]; $tot2=$tot2+$ener[$e]; $desolv2=$desolv2+$desolv[$e]; $inte2=$inte2+$add[$e]-$desolv[$e]; } ($nb_heavy[$e])=Leto::GrpAtoms->amino_heavy_atoms($res[$e]); if ($togsep eq "tog") { #printf (EFF "%3s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n", $res[$e], $e,$add[$e]/$nb_heavy[$e], $sas[$e]/$nb_heavy[$e], $vdw[$e]/$nb_heavy[$e], $ener[$e]/$nb_heavy[$e], $desolv[$e]/$nb_heavy[$e], ($add[$e]-$desolv[$e])/$nb_heavy[$e]); printf (OUT "%3s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n", $res[$e], $e, $add[$e], $sas[$e], $vdw[$e], $ener[$e], $desolv[$e], $add[$e]-$desolv[$e]); } elsif ($togsep eq "sep"){ #printf (EFF "%3s %2s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n", $res[$e], $bbsc[$e], $num[$e],$add[$e]/$nb_heavy[$e], $sas[$e]/$nb_heavy[$e], $vdw[$e]/$nb_heavy[$e], $ener[$e]/$nb_heavy[$e], $desolv[$e]/$nb_heavy[$e], ($add[$e]-$desolv[$e])/$nb_heavy[$e]); printf (OUT "%3s %2s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n", $res[$e], $bbsc[$e], $num[$e], $add[$e], $sas[$e], $vdw[$e], $ener[$e], $desolv[$e], $add[$e]-$desolv[$e]); } } # Calculation of the total of energies for the two parts of the system $elec3=$elec1+$elec2; $sas3=$sas1+$sas2; $vdw3=$vdw1+$vdw2; $tot3=$tot1+$tot2; $desolv3=$desolv1+$desolv2; $inte3=$inte1+$inte2; # writes in a file the results #open (OUTB,"> out/$fname_conf/$fname_conf.sum_energies.dat") || die "Unable to create the file out/$fname_conf/$fname_conf.sum_energies.dat : $!\n"; #printf (OUTB "%s : Sum of energies for each part of the system\n",$fname_conf); #printf (OUTB " # Sum part1 Sum part2 Sum tot\n"); #printf (OUTB "ELEC %7.4f %7.4f %7.4f\n", $elec1,$elec2,$elec3); #printf (OUTB "SAS %7.4f %7.4f %7.4f\n", $sas1,$sas2,$sas3); #printf (OUTB "VDW %7.4f %7.4f %7.4f\n", $vdw1,$vdw2,$vdw3); #printf (OUTB "TOT %7.4f %7.4f %7.4f\n", $tot1,$tot2,$tot3); #printf (OUTB "DESOLV %7.4f %7.4f %7.4f\n", $desolv1,$desolv2,$desolv3); #printf (OUTB "INTE %7.4f %7.4f %7.4f\n", $inte1,$inte2,$inte3); close (OUT); #close (EFF); #close (OUTB); # PDB FILE # this part allows to put in the b-factor column total energy, it's from the first version of ElMo so I don't know if it works open (PDB, "> scr/$fname_conf/$fname_conf.ener.pdb") || die "Unable to create the file scr/$fname_conf/$fname_conf.ener.pdb : $!\n"; my @resn; foreach $i (0..$nb_chn-1) { # Initialization of the current residue number for each chain. $resn[$i]=1; } my $NOM_CHN= uc $nom_chn; # The chains names must be in upper character. @chn=split(',',$NOM_CHN); # Runs along the atoms array. foreach $index (0..$#{$atoms_ref}) { CHN:foreach $i (0..$nb_chn-1) { # Processing the chain. if ($atoms_ref->[$index]->chain() eq $chn[$i]) { # The current line belongs to the current residue. $resi=0; if ($atoms_ref->[$index]->residue_number() == $resn[$i]) { # Replacing the Bfactor attribute by the free ernergy sum in the PDB file. foreach $k (0..$i) { $resi=$resi + $resn[$k]; } $atoms_ref->[$index]->bfactor($ener[$resi]); printf (PDB "%s", $atoms_ref->[$index]->print_pdb_atom()); } # Go to the next residue. else { $resn[$i]++; # Replacing the Bfactor attribute by the free ernergy sum in the PDB file. foreach $k (0..$i) { $resi=$resi + $resn[$k]; } $atoms_ref->[$index]->bfactor($ener[$resi]); printf (PDB "%s", $atoms_ref->[$index]->print_pdb_atom()); } last CHN; } elsif (($i == $nb_chn-1) && ($atoms_ref->[$index]->chain() ne $chn[$i])) { printf (PDB "%s", $atoms_ref->[$index-1]->print_pdb_ter()); printf (PDB "END\n"); ## No need to go further. last; } } } close (PDB); return (); } # Creates an input file dyna.inp from template. sub print_dyna { my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref,$ext,$path,$Verbose) = @_; # Creation of the complex PDB file name if it doesn't already exist : if ($pdb_ref->[-1] =~ /.+_[a-z]?$/) { my ($pdb1,$fc) = split('\_',$pdb_ref->[0]); my ($pdb2,$sc) = split('\_',$pdb_ref->[1]); push (@{$pdb_ref}, join('_',$pdb1,($fc . $sc))); } warn "\tSetting up the input files ...\n" if ($Verbose == 1); # Split the 1st value of the PDB files array. The 2nd of these elements is the name of the 1st chain of the complex. my @fc = split('\_',uc($pdb_ref->[0])); # Split the 2nd value of the PDB files array. The 2nd of these elements is the name of the 2nd chain of the complex. my @sc = split('\_',uc($pdb_ref->[1])); ## SOLVATION ## open (DYNATMP, "< tmpsep/dyna_solv.tmp") || die "Unable to open the file tmpsep/dyna_solv.tmp : $!\n"; @dyna = ; close (DYNATMP); open (DYNA, "> inp/$pdb_ref->[-1].dyna_solv.inp") || die "Unable to create the file inp/$pdb_ref->[-1].dyna_solv.inp : $!\n"; # Runs along the template file. foreach $d (0..$#dyna) { # Identification of the keyword $SET$. if ($dyna[$d] =~ /^\$SET\$/) { printf (DYNA "SET job dyna_solv ! name of the current job\n"); printf (DYNA "SET path %s ! path of the working directory\n", $path); printf (DYNA "SET ext %s ! extension of the previous minimization\n", $ext); printf (DYNA "SET fc %s ! first chain of the complex\n", $fc[1]); printf (DYNA "SET sc %s ! second chain of the complex\n", $sc[1]); printf (DYNA "SET pdb %s ! name of the structure\n", $pdb_ref->[-1]); } # All other lines without keywords. else { printf (DYNA "%s", $dyna[$d]); } } close (DYNA); ## HEATING ## open (DYNATMP, "< tmpsep/dyna_heat.tmp") || die "Unable to open the file tmpsep/dyna_heat.tmp : $!\n"; @dyna = ; close (DYNATMP); open (DYNA, "> inp/$pdb_ref->[-1].dyna_heat.inp") || die "Unable to create the file inp/$pdb_ref->[-1].dyna_heat.inp : $!\n"; # Runs along the template file. foreach $d (0..$#dyna) { # Records the temperature set. if ($dyna[$d] =~ /set temp/) { $temp = $'; printf (DYNA "%s", $dyna[$d]); } # Identification of the keyword $SET$. elsif ($dyna[$d] =~ /^\$SET\$/) { printf (DYNA "SET job dyna_heat ! name of the current job\n"); printf (DYNA "SET path %s ! path of the working directory\n", $path); printf (DYNA "SET fc %s ! first chain of the complex\n", $fc[1]); printf (DYNA "SET sc %s ! second chain of the complex\n", $sc[1]); printf (DYNA "SET pdb %s ! name of the structure\n", $pdb_ref->[-1]); } # All other lines without keywords. else { printf (DYNA "%s", $dyna[$d]); } } close (DYNA); ## EQUILIBRATION ## open (DYNATMP, "< tmpsep/dyna_equil.tmp") || die "Unable to open the file tmpsep/dyna_equil.tmp : $!\n"; @dyna = ; close (DYNATMP); open (DYNA, "> inp/$pdb_ref->[-1].dyna_equil.inp") || die "Unable to create the file inp/$pdb_ref->[-1].dyna_equil.inp : $!\n"; # Runs along the template file. foreach $d (0..$#dyna) { # Identification of the keyword $SET$. if ($dyna[$d] =~ /^\$SET\$/) { printf (DYNA "SET job dyna_equil ! name of the current job\n"); printf (DYNA "SET path %s ! path of the working directory\n", $path); printf (DYNA "SET fc %s ! first chain of the complex\n", $fc[1]); printf (DYNA "SET sc %s ! second chain of the complex\n", $sc[1]); printf (DYNA "SET pdb %s ! name of the structure\n", $pdb_ref->[-1]); } # All other lines without keywords. else { printf (DYNA "%s", $dyna[$d]); } } close (DYNA); ## PRODUCTION ## open (DYNATMP, "< tmpsep/dyna_prod.tmp") || die "Unable to open the file tmpsep/dyna_prod.tmp : $!\n"; @dyna = ; close (DYNATMP); open (DYNA, "> inp/$pdb_ref->[-1].dyna_prod1.inp") || die "Unable to create the file inp/$pdb_ref->[-1].dyna_prod1.inp : $!\n"; # Runs along the template file. foreach $d (0..$#dyna) { # Records the total number of steps of the molecular dynamics. if ($dyna[$d] =~ /set step_lim/) { $step_lim = $'; printf (DYNA "%s", $dyna[$d]); } # Records the number of steps between each restart. elsif ($dyna[$d] =~ /set step_n/) { $step_n = $'; printf (DYNA "%s", $dyna[$d]); } # Identification of the keyword $SET$. elsif ($dyna[$d] =~ /^\$SET\$/) { printf (DYNA "SET temp %d ! dynamics temperature\n", $temp); printf (DYNA "SET path %s ! path of the working directory\n", $path); printf (DYNA "SET pdb %s ! name of the structure\n", $pdb_ref->[-1]); printf (DYNA "SET inp dyna_equil ! name of the input file\n"); printf (DYNA "SET job dyna_prod1 ! name of the current job\n"); printf (DYNA "SET out dyna_prod1 ! name of the output file\n"); } # All other lines without keywords. else { printf (DYNA "%s", $dyna[$d]); } } close (DYNA); ## RESTARTS ## # The number of restart files equals to the total number of steps divided by the number of steps between the restarts. my $restart_n = $step_lim / $step_n; for ($i=2;$i<=$restart_n;$i++) { open (DYNA, "> inp/$pdb_ref->[-1].dyna_prod$i.inp") || die "Unable to create the file inp/$pdb_ref->[-1].dyna_prod$i.inp : $!\n"; # Runs along the template file. foreach $d (0..$#dyna) { # Identification of the keyword $SET$. if ($dyna[$d] =~ /^\$SET\$/) { printf (DYNA "SET path %s ! path of the working directory\n", $path); printf (DYNA "SET pdb %s ! name of the structure\n", $pdb_ref->[-1]); printf (DYNA "SET inp dyna_prod%d ! name of the input file\n", $i-1); printf (DYNA "SET job dyna_prod%d ! name of the current job\n", $i); printf (DYNA "SET out dyna_prod%d ! name of the output file\n", $i); } # All other lines without keywords. else { printf (DYNA "%s", $dyna[$d]); } } close (DYNA); } # Returns the PDB files array, the number of restarts and the total number of steps. return ($pdb_ref,$restart_n,$step_lim); } # Creates an input file ener.inp from template. sub print_ener { my $proto = shift; my @tab; return (0) if (ref($proto)); my ($pdb_ref,$path,$Verbose,$Int,$restart_n,$conf_n,$etat,$top,$par,$skip,$fp,$sp) = @_; # Opens the classic ener input when the program is used for the beginning. if ($Int == 0) { open (ENERTMP, "< tmpsep/ener.tmp") || die "Unable to open the file tmpsep/ener.tmp : $!\n"; } # Opens the ener input who can be modify when the option -Int is used. else { open (ENERTMP, "< tmpsep/enerI.tmp") || die "Unable to open the file tmpsep/enerI.tmp : $!\n"; } my @ener = ; close (ENERTMP); # Creation of the complex PDB file name if it doesn't already exist : if (($pdb_ref->[-1] =~ /.+_[a-z]?$/) && ($Int == 0)){ my ($pdb1,$fc) = split('\_',$pdb_ref->[0]); my ($pdb2,$sc) = split('\_',$pdb_ref->[1]); push (@{$pdb_ref}, join('_',$pdb1,($fc . $sc))); } # Creates a stream file that will contain the commands to open the coordinates files. open (STREAM, "> inp/all-traj.dat") || die "Unable to create the file inp/all-traj.dat : $!\n"; # Step making it possible to have the number of restart and duration of molecular dynamic, the values # are written into hard in the file dyna_prod.tmp # Else this values are asks to the users if ($Int == 0) { open (DYNATMP, "< tmp/dyna_prod.tmp") || die "Unable to open the file tmp/dyna_prod.tmp : $!\n"; @dyna = ; close (DYNATMP); foreach $d (0..$#dyna) { # Records the total number of steps of the molecular dynamics. if ($dyna[$d] =~ /set step_lim/) { $step_lim = $'; } # Records the number of steps between each restart. elsif ($dyna[$d] =~ /set step_n/) { $step_n = $'; } } my $restart_n = $step_lim / $step_n; } my $unitnumber=21; foreach $s (0..$restart_n-1) { printf (STREAM "open read unformatted unit %d name \@path/scr/\@pdb.dyna_prod%d.cor\n", $unitnumber+$s, $s+1); } printf (STREAM "return\n"); close (STREAM); open (ENERINP, "> inp/$pdb_ref->[-1].ener.inp") || die "Unable to create the file inp/$pdb_ref->[-1].ener.inp : $!\n"; # Split the 1st value of the PDB files array. The 2nd of these elements is the name of the 1st chain of the complex. my @fc = split('\_',uc($pdb_ref->[0])); # Split the 2nd value of the PDB files array. The 2nd of these elements is the name of the 2nd chain of the complex. my @sc = split('\_',uc($pdb_ref->[1])); # Runs along the template file. foreach $e (0..$#ener) { if ($ener[$e] =~ /set skip/) { $skip = $'; printf (ENERINP "%s", $ener[$e]); if ($Int == 0) { # The number of conformations equals to the total number of steps of the dynamics divided by the number of skips. my $conf_n = $step_lim / $skip; } } if ($ener[$e] =~ /set step_n/) { $step_n = $'; printf (ENERINP "%s", $ener[$e]); } if ($ener[$e] =~ /set restart_n/) { $restart_n = $'; printf (ENERINP "%s", $ener[$e]); } # Identification of the keyword $SET$. elsif ($ener[$e] =~ /^\$SET\$/) { warn "\tSetting up the input file ...\n" if ($Verbose == 1); printf (ENERINP "SET skip %s ! skip size for the energy calculation\n", $skip); printf (ENERINP "SET conf_n %s ! number of conformations\n", $conf_n); printf (ENERINP "SET path %s ! absolute path of the working directory\n", $path); printf (ENERINP "SET count %d ! COR files processed\n", $restart_n); printf (ENERINP "SET pdb %s ! file name\n", $pdb_ref->[-1]); printf (ENERINP "SET etat %s ! solv or desolv\n", $etat); printf (ENERINP "SET fc %s ! first chain\n", $fc[1]); printf (ENERINP "SET sc %s ! second chain\n", $sc[1]); printf (ENERINP "SET fp %s ! first part\n", $fp); printf (ENERINP "SET sp %s ! second part\n", $sp); } elsif ($ener[$e]=~ /^\$LIB\$/) { printf(ENERINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD rtf unit 2 card\nCLOSE unit 2\n",$top); printf(ENERINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD param unit 2 card\nCLOSE unit 2\n",$par); } # All other lines without keywords. else { printf (ENERINP "%s", $ener[$e]); } } close (ENERINP); # Returns the PDB files array. return ($pdb_ref,$restart_n,$conf_n); } # Analyses the energy result file and selects the conformations to be extracted. sub traj_analysis { my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref,$inter_n,$Int) = @_; # Creation of the complex PDB file name if it doesn't already exist : if ($Int == 0) { if ($pdb_ref->[-1] =~ /.+_[a-z]?$/) { my ($pdb1,$fc) = split('\_',$pdb_ref->[0]); my ($pdb2,$sc) = split('\_',$pdb_ref->[1]); push (@{$pdb_ref}, join('_',$pdb1,($fc . $sc))); } } open (ENER, "< scr/$pdb_ref->[-1].ener.inte") || die "Unable to open the file scr/$pdb_ref->[-1].ener.inte : $!\n"; my @ener = ; close (ENER); open (DISTRIB, "> out/$pdb_ref->[-1].distrib.ener.dat") || die "Unable to create the file out/distrib.ener.dat : $!\n"; printf (DISTRIB "Conf\tNumber\n"); open (OUT, "> out/$pdb_ref->[-1].ener.dat") || die "Unable to create the file out/$pdb_ref->[-1].ener.dat : $!\n"; my @conf; printf (OUT "CONF# ENERGY\n"); # Titles of the output file. # Runs along the energy result file. foreach $e (0..$#ener) { my $conf_ni = 0; if ($ener[$e] =~ / CONFORMATIONAL NUMBER =/) { # Extracts the current conformation number. my @temp = split (" ",$'); chomp ($conf_ni = $temp[0]); if ($ener[$e+1] =~ / ELECTROSTATIC =/) { # Extracts the correponding electrostatic energy. # To each conformation number in the array corresponds its energy. chomp ($conf[$conf_ni] = $'); } # Writes the data extracted to the output file. printf (OUT "%5d %8.3f\n", $conf_ni, $conf[$conf_ni]); } } # Determination of the maximum and minimum energies from all the conformations. my $min = $conf[1]; my $max = $conf[1]; # Runs along the conformations array. foreach $c (1..$#conf) { if ($conf[$c] > $max) { # The maximum and minimum values are truncated. $max = int($conf[$c]); } if ($conf[$c] < $min) { # The minimum is substracted by 1 to ensure the overall interval doesn't lack the lowest energy value. $min = int($conf[$c])-1; } } # The length of an interval equals to the length of the overall interval divided by the number of intervals. $inter = ($max-$min)/$inter_n; my $conf_nb = 0; # Initialization of the conformation number. printf (OUT "\n--------------------------------------------------------------------------\n\n"); # Proceeds interval by interval : foreach $i (1..$inter_n) { my $sum = 0; # Initialization of the energy sum to calculate the average energy for the current inerval. printf (OUT "Interval #%d from %.3f to %.3f\nCONF# ENERGY\n", $i, $min, $min+$inter); # Runs along the conformations array. foreach $c (1..$#conf) { # The energy of the current conformation must be between the 2 limits of the current interval, not under the minimum or over the maximum. if (((($min+$inter) == ($max)) && (($conf[$c] >= $min) && ($conf[$c] <= $max))) || (($conf[$c] >= $min) && ($conf[$c] <= ($min+$inter)))) { # Creates an hash for each interval which name is the interval number. # Records the energy of the current conformation in this hash which keys are the conformation number. $$i{$c} = $conf[$c]; # Records the number of conformations for each interval in an array. $counter[$i]++; # Sums the energy of the current conformation. $sum += $conf[$c]; printf (OUT "%5d %8.3f\n", $c, $conf[$c]); } } # The average energy equals to the sum of the energies of all the conformations divided by the number of conformations for the current interval. $average[$i] = $sum / $counter[$i] unless ($counter[$i] == 0); printf (OUT "AVERAGE ENERGY = %.3f\n", $average[$i]); printf (OUT "NUMBER OF CONFORMATIONS = %d\n", $counter[$i]); printf (OUT "\n"); # The new minimum for the next interval. $min += $inter; # The total number of conformations. $conf_nb += $counter[$i]; } my $sum = 0; # Initialization of the enrgy sum to calculate the weighted average energy for all the intervals. foreach $i (1..$inter_n) { # For each interval, the sum of the energies for the current interval equals to the average energy multiplicated by the number of conformations of the interval. $sum += $average[$i] * $counter[$i] unless ($counter[$i] == 0); } # The weighted average energy equals to the sum of all the energies divided by the total number of conformations. my $w_average = $sum / $conf_nb; printf (OUT "-------------------------------------\n\nWEIGHED AVERAGE ENERGY = %.3f\n", $w_average); close (OUT); # For each interval, selects the conformation which energy is the nearest from the average energy of the interval. foreach $i (1..$inter_n) { my $diff = 100; # Initialization of the difference value. # Runs along the current interval hash. while (($key, $value) = each %$i) { # One can't guess if the energy of the conformation being checked is lower or higher than the average energyof the current interval. # That's why the difference value is compared to the square root not to worry about the sign of the differencebetween the 2 terms. if ($diff > sqrt(($average[$i]-$value)*($average[$i]-$value))) { # The conformation has an energy nearer of the average one than the previously recorded one. $selec = $key; # Records the new reference. $diff = sqrt(($average[$i]-$value)*($average[$i]-$value)); } } # Records the selected conformation in an array. push (@conf_ref,$selec); printf (DISTRIB "%d\t%d\n",$selec,$counter[$i]); } close (DISTRIB); # Returns the PDB files array and the selected conformations array. return ($pdb_ref,\@conf_ref); } # Creates an input file conf.inp from template. sub print_conf { my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref,$conf_ref,$path,$Verbose,$Int,$conf_n,$restart_n,$etat,$top,$par,$skip,$fp,$sp) = @_; # Opens the classic conf input when the program is used for the beginning. if ($Int == 0) { open (CONFTMP, "< tmpsep/conf.tmp") || die "Unable to open the file tmpsep/conf.tmp : $!\n"; } # Opens the conf input who can be modify when the option -Int is used. else { open (CONFTMP, "< tmpsep/confI.tmp") || die "Unable to open the file tmpsep/confI.tmp : $!\n"; } @conf = ; close (CONFTMP); # Creation of the complex PDB file name if it doesn't already exist : if ($Int ==0) { if ($pdb_ref->[-1] =~ /.+_[a-z]?$/) { my ($pdb1,$fc) = split('\_',$pdb_ref->[0]); my ($pdb2,$sc) = split('\_',$pdb_ref->[1]); push (@{$pdb_ref}, join('_',$pdb1,($fc . $sc))); } } open (CONFINP, "> inp/$pdb_ref->[-1].conf.inp") || die "Unable to create the file inp/$pdb_ref->[-1].conf.inp : $!\n"; # Split the 1st value of the PDB files array. The 2nd of these elements is the name of the 1st chain of the complex. my @fc = split('\_',uc($pdb_ref->[0])); # Split the 2nd value of the PDB files array. The 2nd of these elements is the name of the 2nd chain of the complex. my @sc = split('\_',uc($pdb_ref->[1])); # Runs along the template file. foreach $c (0..$#conf) { # Records the number of skips. if ($conf[$c] =~ /set skip/) { $skip = $'; printf (CONFINP "%s", $conf[$c]); } if ($conf[$c] =~ /set step_n/) { $step_n = $'; printf (CONFINP "%s", $conf[$c]); } if ($conf[$c] =~ /set restart_n/) { $restart_n = $'; printf (CONFINP "%s", $conf[$c]); } # Identification of the keyword $SET$. if ($conf[$c] =~ /^\$SET\$/) { warn "\tSetting up the input file ...\n" if ($Verbose == 1); # The number of conformations equals to the total number of steps of the dynamics divided by the number of skips. if ($Int == 0) { my $conf_n = $step_lim / $skip; } printf (CONFINP "SET path %s ! absolute path of the working directory\n", $path); printf (CONFINP "SET pdb %s ! complex file name\n", $pdb_ref->[-1]); printf (CONFINP "SET etat %s ! solv or desolv\n", $etat); printf (CONFINP "SET skip %d ! skip size\n", $skip); printf (CONFINP "SET conf_n %d ! total number of conformations\n", $conf_n); printf (CONFINP "SET count %d ! COR files processed\n", $restart_n); printf (CONFINP "SET fc %s ! first chain\n", $fc[1]); printf (CONFINP "SET sc %s ! second chain\n", $sc[1]); printf (CONFINP "SET fc_n %s ! first chain file name\n", $pdb_ref->[0]); printf (CONFINP "SET sc_n %s ! second chain file name\n", $pdb_ref->[1]); printf (CONFINP "SET fp %s ! first part\n", $fp); printf (CONFINP "SET sp %s ! second part\n", $sp); foreach $r (0..$#{$conf_ref}) { # Creates a directory for each conformation. system ("mkdir scr/$pdb_ref->[-1].conf_$conf_ref->[$r]"); system ("mkdir inp/$pdb_ref->[-1].conf_$conf_ref->[$r]"); system ("mkdir out/$pdb_ref->[-1].conf_$conf_ref->[$r]"); # Writes the sets for the selected conformations. printf (CONFINP "SET %d %d ! conformation #%d\n", $r+1, $conf_ref->[$r], $conf_ref->[$r]); } } elsif ($conf[$c]=~ /^\$LIB\$/) { printf(CONFINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD rtf unit 2 card\nCLOSE unit 2\n",$top); printf(CONFINP "OPEN READ unit 2 card name \@path/lib/%s\nREAD param unit 2 card\nCLOSE unit 2\n",$par); } # Identification of the keyword $IF$. elsif ($conf[$c] =~ /^\$IF\$/) { # Sets the conditions to extract the selected conformations. foreach $r (0..$#{$conf_ref}) { printf (CONFINP "IF \@i EQ \@%d GOTO writing\n", $r+1); } } # All other lines without keywords. else { printf (CONFINP "%s", $conf[$c]); } } close (CONFINP); # Returns the PDB files array. return ($pdb_ref); } # Allows to change the files cha and rad.crd in order to put the ligand in last position (if it isn't it already). sub transfo_crd { my $proto = shift; return (0) if (ref($proto)); my ($cas,$numfirst_ref,$numlast_ref,$num_ref,$nam_ref,$chn_ref,$conf_files,$ref_pdb)=@_; ($conf)=($conf_files =~/.+_.+\.conf_(.+)/); my @file = glob("scr/$conf_files/$conf_files-{rad,cha}.crd.his"); foreach $i (0..$#file) { # Transformation for all the cha and rad needs to read only the cha and rad of the complex. ($mes)= ($file[$i]=~/.+\/.+_.+\..+-(.+)\..+\..+/); # Allows to know if the script is reading a cha or a rad file. $lig=0; # Counter for the number of atoms of the ligand. $ato=1; # Counter for the number of atoms. $res_avant=1; # Variable contains the number of the last residue. $res=1; # Counter for the number of residues. open (FILE, "< $file[$i]") || die "Unable to open the file $file[$i]\n"; # Opens cha or rad file of the complex. my @line = ; close (FILE); open (OUTCX, "> $file[$i].tr"); # Creates file which will contain cha or rad of complex in which ligand is in last position. open (OUTL, "> scr/$conf_files/$ref_pdb->[1].conf_$conf-$mes.crd.his.tr"); # Creates file cha or rad which contains ligand with good numbers of residues. # Allows to copy the beginning of cha or rad files for the first element of the system in a file with extension .tr. open (FILEL, "< scr/$conf_files/$ref_pdb->[1].conf_$conf-$mes.crd.his"); my @linel = ; close (FILEL); foreach $k(0..$#linel) { chomp($linel[$k]); if (length $linel[$k]<70) { printf (OUTL "$linel[$k]\n"); } } # Allows to copy the beginning of cha or rad files for the second element of the system in a file with extension .tr. open (OUTR, "> scr/$conf_files/$ref_pdb->[0].conf_$conf-$mes.crd.his.tr"); open (FILER, "< scr/$conf_files/$ref_pdb->[0].conf_$conf-$mes.crd.his"); my @liner = ; close (FILER); foreach $k(0..$#liner) { chomp($liner[$k]); if (length $liner[$k]<70) { printf (OUTR "$liner[$k]\n"); } } foreach $k(0..$#line) { chomp($line[$k]); if (length $line[$k]>=70) { # Line that contains data. @champ=split (' ',$line[$k]); # Copy line of files cha or rad in new cha or rad complex and in new cha or rad first element of the system as long as the line read does not correspond to the ligand. if ((($cas == 1) && ($champ[1]!=$num_ref) && ($champ[2] ne $nam_ref)) || (($cas == 2) && ($champ[7] ne $chn_ref)) || (($cas==3) && ($champ[1] < $numfirst_ref))) { printf (OUTCX "$line[$k]\n"); printf (OUTR "$line[$k]\n"); $ato++; # One is added to counter of atoms. if ($champ[1]!=$res_avant) { # If we change residue one is added to counter of residues. $res++; } $res_avant=$champ[1]; # Puts the number of the residue in memory. } # Increments the counter of number of atoms of the ligand if the line corresponds to the ligand. elsif ((($cas == 1) && ($champ[1]==$num_ref) && ($champ[2] eq $nam_ref)) || (($cas == 2) && ($champ[7] eq $chn_ref)) || (($cas==3) && ($champ[7] eq $chn_ref) && ($champ[1] >= $numfirst_ref) && ($champ[1] <= $numlast_ref))) { chomp($line[$k+1]); $lig++; # Looks the next line. @suivant=split(' ',$line[$k+1]); if ((($cas == 1) && ($suivant[1]==$num_ref) && ($suivant[2] eq $nam_ref)) || (($cas == 2) && ($suivant[7] eq $chn_ref)) || (($cas==3) && ($suivant[7] eq $chn_ref) && ($suivant[1] >= $numfirst_ref) && ($suivant[1] <= $numlast_ref))) { next; } # Puts in memory the number of the line which corresponds at the end of the ligand. else { $apres=$k+1; last; } } } # Copy the beginning of the file for the new cha or rad complex. else { printf (OUTCX "$line[$k]\n"); } } # Reads the line after the ligand. foreach $ii($apres..$#line) { chomp($line[$k]); @champ=split(' ',$line[$ii]); if ($champ[1]!=$res_avant) { # If we change residue one is added to counter of residues. $res++; } $champ[8]=$res; # Puts the new good number of residue in the good field. foreach $aa(2,3,7,8) { # Puts spaces at the good place so that alignment is respected in cha or rad.crd files. if (length $champ[$aa]==3) { $champ[$aa]="$champ[$aa] "; } if (length $champ[$aa]==2) { $champ[$aa]="$champ[$aa] "; } if (length $champ[$aa]==1) { $champ[$aa]="$champ[$aa] "; } } # Prints line in cha or rad file with the good format and the good number of atom and residue. printf (OUTCX "%5d%5d %4s %4s%10.5f%10.5f%10.5f %4s %4s%10.5f\n",$ato,$res,$champ[2],$champ[3],$champ[4],$champ[5],$champ[6],$champ[7],$champ[8],$champ[9]); printf (OUTR "%5d%5d %4s %4s%10.5f%10.5f%10.5f %4s %4s%10.5f\n",$ato,$res,$champ[2],$champ[3],$champ[4],$champ[5],$champ[6],$champ[7],$champ[8],$champ[9]); $ato++; # One is added to counter of atoms. $res_avant=$champ[1]; # Puts the number of the residue in memory. } # Reads again the lines which correpond to the ligand. foreach $ii($apres-$lig..$apres-1) { chomp($line[$k]); @champ=split(' ',$line[$ii]); if ($champ[1]!=$res_avant) { # If we change residue one is added to counter of residues. $res++; } # Puts spaces at the good place so that alignment is respected in cha or rad.crd files. $champ[8]=$res; foreach $aa(2,3,7,8) { if (length $champ[$aa]==3) { $champ[$aa]="$champ[$aa] "; } if (length $champ[$aa]==2) { $champ[$aa]="$champ[$aa] "; } if (length $champ[$aa]==1) { $champ[$aa]="$champ[$aa] "; } } # Prints line in cha or rad file with the good format and the good number of atom and residue. printf (OUTCX "%5d%5d %4s %4s%10.5f%10.5f%10.5f %4s %4s%10.5f\n",$ato,$res,$champ[2],$champ[3],$champ[4],$champ[5],$champ[6],$champ[7],$champ[8],$champ[9]); printf (OUTL "%5d%5d %4s %4s%10.5f%10.5f%10.5f %4s %4s%10.5f\n",$ato,$res,$champ[2],$champ[3],$champ[4],$champ[5],$champ[6],$champ[7],$champ[8],$champ[9]); $ato++; # One is added to counter of atoms. $res_avant=$champ[1]; # Puts the number of the residue in memory. } system `mv scr/$conf_files/$ref_pdb->[1].conf_$conf-$mes.crd.his.tr scr/$conf_files/$ref_pdb->[1].conf_$conf-$mes.crd.his`; system `mv scr/$conf_files/$ref_pdb->[0].conf_$conf-$mes.crd.his.tr scr/$conf_files/$ref_pdb->[0].conf_$conf-$mes.crd.his`; system `mv $file[$i].tr $file[$i]`; } close (OUTCX); close (OUTL); close (OUTR); return $conf; } # Allows to make the 3 crd necessary for the case of Cedric or Nicolas to the creation of a psf in which the ligand is in the good place. # If you are not Cedric or Nicolas and your ligand is not placed at the beginning or the end of your psf and crd you must change this function. # This function allows to create the crd necessary to make an new psf in which the ligand is well placed. sub create_crd_for_psf { my $proto = shift; return (0) if (ref($proto)); my ($conf_files,$cas)=@_; ($conf)=($conf_files =~/.+_.+\.conf_(.+)/); %lon = 0; # opens cha file of the complex. open (FILE, "< scr/$conf_files/$conf_files-cha.crd.his") || die "Unable to open the file scr/$conf_files/$conf_files-cha.crd.his\n"; my @line = ; close (FILE); # puts the good name in the variable $perso (you can change this name). if ($cas==1) { $perso="jeremy"; } elsif ($cas==2) { $perso="nicolas"; } elsif ($cas==3) { $perso="michael"; } # opens 3 files because in the case of Cedric and Nicolas 3 crd are necessary to make the psf # in your case you must open as many files as crd necessary to make the psf open (OUTP, "> scr/$conf_files/$perso-p.conf_$conf-cha.crd.his.tr"); open (OUTS, "> scr/$conf_files/$perso-s.conf_$conf-cha.crd.his.tr"); open (OUTT, "> scr/$conf_files/$perso-t.conf_$conf-cha.crd.his.tr"); open (OUTQ, "> scr/$conf_files/$perso-q.conf_$conf-cha.crd.his.tr"); # for the case of Michael if ($cas==3) { # reads every lines of the crd. foreach $k(0..$#line) { chomp($line[$k]); if (length $line[$k]>=70) { # puts every fields of the line in an array. @champ=split (' ',$line[$k]); foreach $i (1..226) { if ($champ[1]==$i) { # if the number of the residue lies between 1 and 226 printf (OUTP "$line[$k]\n"); # copies the line in the first crd (this corresponds to the first part) $lon{"p"}++; # increments a counter to know the number of atoms of the first part to write it at tne beginning of the crd } } foreach $i (227..451) { if ($champ[1]==$i) { # if the number of the residue lies between 227and 451 printf (OUTS "$line[$k]\n"); # copies the line in the second crd (this corresponds to the second part) $lon{"s"}++; # increments a counter to know the number of atoms of the second part to write it at tne beginning of the crd } } foreach $i (452..462) { # if the number of the residue lies between 452 and 462 if ($champ[1]==$i) { # copies the line in the third crd (this corresponds to the third part) printf (OUTT "$line[$k]\n"); $lon{"t"}++; # increments a counter to know the number of atoms of the third part to write it at tne beginning of the crd } } foreach $i (463..472) { # if the number of the residue lies between 463 and 472 if ($champ[1]==$i) { # copies the line in the third crd (this corresponds to the third part) printf (OUTQ "$line[$k]\n"); $lon{"q"}++; # increments a counter to know the number of atoms of the third part to write it at tne beginning of the crd } } } } } # for the case of Nicolas if ($cas==2) { # reads every lines of the crd. foreach $k(0..$#line) { chomp($line[$k]); if (length $line[$k]>=70) { # puts every fields of the line in an array. @champ=split (' ',$line[$k]); foreach $i (1..226) { if ($champ[1]==$i) { # if the number of the residue lies between 1 and 366 printf (OUTP "$line[$k]\n"); # copies the line in the first crd (this corresponds to the first part) $lon{"p"}++; # increments a counter to know the number of atoms of the first part to write it at tne beginning of the crd } } foreach $i (367..373) { if ($champ[1]==$i) { # if the number of the residue lies between 367 and 373 printf (OUTS "$line[$k]\n"); # copies the line in the second crd (this corresponds to the second part) $lon{"s"}++; # increments a counter to know the number of atoms of the second part to write it at tne beginning of the crd } } foreach $i (374..379) { # if the number of the residue lies between 367 and 373 if ($champ[1]==$i) { # copies the line in the third crd (this corresponds to the third part) printf (OUTT "$line[$k]\n"); $lon{"t"}++; # increments a counter to know the number of atoms of the third part to write it at tne beginning of the crd } } } } } # for the case of Jeremy elsif ($cas==1) { # reads every lines of the crd. foreach $k(0..$#line) { chomp($line[$k]); if (length $line[$k]>=70) { # puts every fields of the line in an array. @champ=split (' ',$line[$k]); foreach $i (1..271) { if ($champ[1]==$i) { printf (OUTP "$line[$k]\n"); $lon{"p"}++; } } foreach $i (272..284) { if ($champ[1]==$i) { printf (OUTS "$line[$k]\n"); $lon{"s"}++; } } foreach $i (285..285) { if ($champ[1]==$i) { printf (OUTT "$line[$k]\n"); $lon{"t"}++; } } } } } close (OUTP); close (OUTS); close (OUTT); # this part allows to put the good number of atoms at the beginning of each crd. # for each crd (in the cases of Cedric and Nicolas there are 3 crd p, s and t). foreach $a ("p","s","t","q") { # opens the temporary crd p,s,t or q. open (CRD, "< scr/$conf_files/$perso-$a.conf_$conf-cha.crd.his.tr"); my @crd= ; close (CRD); # opens the final crd. open (OUT, "> scr/$conf_files/$perso-$a.conf_$conf-cha.crd.his"); printf (OUT "* CRD WITH RADII IN WMAIN FOR $perso-$a\n* CONFORMATION = $conf TOTAL ENERGY = X\n* VAN DER WAALS = X ELECTROSTATIC = X \n*CREATED BY USER: elyette\n*\n"); # this part allows to put space in the variable containing the number of atom in order to conserve the alignement in the file. if (length $lon{$a}==1) { $lon{$a}=" $lon{$a}"; } if (length $lon{$a}==2) { $lon{$a}=" $lon{$a}"; } if (length $lon{$a}==3) { $lon{$a}=" $lon{$a}"; } if (length $lon{$a}==4) { $lon{$a}=" $lon{$a}"; } printf (OUT "$lon{$a}\n"); # copies each lines of the temporary crd. foreach $i (0..$#crd) { printf (OUT "$crd[$i]"); } # removes the temporary crd. system `rm scr/$conf_files/$perso-$a.conf_$conf-cha.crd.his.tr`; } } # Allows to create the input file to make the new psf in the case 1 and 2. sub print_join { my $proto = shift; return (0) if (ref($proto)); ($cas,$conf_files->[$c],$path,$conf,$Verbose,$top,$par) = @_; # for the case 1 of Cedric, reads the join_crd_ced.tmp. if ($cas==1) { open (JOINTMP, "< tmpsep/join_crd_ced.tmp") || die "Unable to open the file tmpsep/join_crd_ced.tmp : $!\n"; # opens a template for join_crd and make a psf @join=; close (JOINTMP); open (JOININP, "> inp/$conf_files->[$c]/$conf_files->[$c].join_crd_ced.inp") || die "Unable to create the file inp/$conf_files->[$c]/$conf_files->[$c].join_crd_ced.inp : $!\n"; } # for the case 2 of Nicolas, reads the join_crd_ced.tmp. if ($cas==2) { open (JOINTMP, "< tmpsep/join_crd_nico.tmp") || die "Unable to open the file tmpsep/join_crd_nico.tmp : $!\n"; # opens a template for join_crd and make a psf @join=; close (JOINTMP); open (JOININP, "> inp/$conf_files->[$c]/$conf_files->[$c].join_crd_nico.inp") || die "Unable to create the file inp/$conf_files->[$c]/$conf_files->[$c].join_crd_nico.inp : $!\n"; } # for the case 3 of Michael, reads the join_crd_ced.tmp. if ($cas==3) { open (JOINTMP, "< tmpsep/join_crd_michael.tmp") || die "Unable to open the file tmpsep/join_crd_michael.tmp : $!\n"; # opens a template for join_crd and make a psf @join=; close (JOINTMP); open (JOININP, "> inp/$conf_files->[$c]/$conf_files->[$c].join_crd_michael.inp") || die "Unable to create the file inp/$conf_files->[$c]/$conf_files->[$c].join_crd_michael.inp : $!\n"; } # Runs along the template file. foreach $a (0..$#join) { # creates a join_crd.inp with good path directory, good number of conformation and good name of directory. if ($join[$a]=~ /^\$SET\$/) { printf (JOININP "SET path %s ! absolute path of the working directory\n", $path); printf (JOININP "SET conf %s ! absolute conf\n", $conf); printf (JOININP "SET rep %s ! name of the directory\n",$conf_files->[$c]) } elsif ($join[$a]=~ /^\$LIB\$/) { printf(JOININP "OPEN READ unit 2 card name \@path/lib/%s\nREAD rtf unit 2 card\nCLOSE unit 2\n",$top); printf(JOININP "OPEN READ unit 2 card name \@path/lib/%s\nREAD param unit 2 card\nCLOSE unit 2\n",$par); } else { printf (JOININP $join[$a]); } } close (JOININP); } # Checks whether a CHARMM process terminated normally or not. sub out_verif { my $proto = shift; return (0) if (ref($proto)); my ($out_file,$path,$Verbose) = @_; open (OUT, "< out/$out_file.out") || die "Unable to open the file out/$out_file.out : $!\n"; @out = ; close (OUT); my $result; my @lines; # Runs along the output file. foreach $o (0..$#out) { # Detection of a fatal error for charmm output. if ($out[$o] =~ /Execution terminated due to the detection of a fatal error./) { # Records the intereting data. push (@lines, $out[$o], $out[$o+2], $out[$o+5]); # The process failed. $result = 0; # No need to search further. last; } # Normal termination. elsif ($out[$o] =~ /NORMAL TERMINATION BY NORMAL STOP/) { # Records the interesting data. push (@lines, $out[$o], $out[$o+3]); # The process is successful. $result = 1; # No need to search further. last; } # Detection of a smallest grid for uhbd. elsif ($out[$o] =~ /check: ERROR, requested grid resolution too high!/) { #Records the interesting data. push (@lines, $out[$o], $out[$o+2]); #The run.com must be modified with the good value of grid. $result = 2; # No need to search further. last; } else { $result = 1; } } # Returns the result variable and the log lines array. return($result, \@lines); } # Allows to calculate weighted (or no weighted) average and standard deviation. sub average_stdev { my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref,$ave,$std,$ave_nonpond,$togsep) = @_; my ($pdb1,$fc) = split('\_',$pdb_ref->[0]); my ($pdb2,$sc) = split('\_',$pdb_ref->[1]); push (@{$pdb_ref}, join('_',$pdb1,($fc . $sc))); # opens the file which contains the number (identifier) of each extracted conformation and the number of members of its family. open (OUT, "< out/$pdb_ref->[-1].distrib.ener.dat") || die "Unable to open the file out/distrib.ener.dat : $!\n"; @distrib=; close (OUT); foreach $lines (1..$#distrib){ # split each line of the file in order to extract the identifier of the conformation and the number of members of its family. ($conf,$number)=split("\t", $distrib[$lines]); # opens the file which contains every values of energies. # energies per amino acid open (CONFINP, "< out/$pdb_ref->[-1].conf_$conf/$pdb_ref->[-1].conf_$conf.decomp.dat") || die "Unable to open the file out/$pdb_ref->[-1].conf_$conf/$pdb_ref->[-1].conf_$conf.decomp.dat: $!\n"; my @confinp = ; close(CONFINP); # amino acid efficiency #open (EFF,"< out/$pdb_ref->[-1].conf_$conf/$pdb_ref->[-1].conf_$conf.amino-acid-efficiency.dat") || die "Unable to create the file out/$pdb_ref->[-1].conf_$conf/$pdb_ref->[-1].conf_$conf.amino-acid-efficiency.dat : $!\n"; #my @eff = ; #close(EFF); # energies per parts #open (SUM, "< out/$pdb_ref->[-1].conf_$conf/$pdb_ref->[-1].conf_$conf.sum_energies.dat") || die "Unable to open the file out/$pdb_ref->[-1].conf_$conf/$pdb_ref->[-1].conf_$conf.sum_energies.dat: $!\n"; #my @sum = ; #close(SUM); # the variable $totalconf counts the number of conformation being in the trajectory. $totalconf += $number; # This part allows to extract the data of the sum_energies.dat files. foreach $i(2..$#sum) { #print "$sum[$i]\n"; ($energy,$sum_1,$sum_2,$sum_T)=split(" ",$sum[$i]); # extracts the values of sum for each energy $array[$i]->{'ENERGY'} = $energy; # name of the energy $array[$i]->{'ONE'} += $sum_1*$number; # sum_energy of the part 1 of the system * number of member of the family $array[$i]->{'TWO'} += $sum_2*$number; # sum_energy of the part 2 of the system * number of member of the family $array[$i]->{'TOT'} += $sum_T*$number; # sum_energy of the total system * number of member of the family # calculation for the standard deviation if ($std == 1) { $array[$i]->{'ONE2'} += ($sum_1*$sum_1)*$number; # sum_energy of the part 1˛ * number of member of the family $array[$i]->{'TWO2' } += ($sum_2*$sum_2)*$number; # sum_energy of the part 2˛ * number of member of the family $array[$i]->{'TOT2' } += ($sum_T*$sum_T)*$number; # sum_energy of the total system˛ * number of member of the family } } # This part allows to extract the data of the decomp.dat files. foreach $i (1..$#confinp){ # Extracts and sums the values for the calculation of average energy for each residue if ($togsep eq "tog") { ($resname,$res,$uhbd,$sas,$vdw,$total,$desolv,$interact)=split(" ",$confinp[$i]); } elsif ($togsep eq "sep") { ($resname,$scbb,$res,$uhbd,$sas,$vdw,$total,$desolv,$interact)=split(" ",$confinp[$i]); $tab[$i]->{'SCBB' } = $scbb; # SC or BB } $tab[$i]->{'RES' } = $res; # number of the residue $tab[$i]->{'RESNAME' } = $resname; # name of the residue $tab[$i]->{'UHBD'} += $uhbd*$number; # elec * number of member of the family $tab[$i]->{'SAS' } += $sas*$number; # sas * number of member of the family $tab[$i]->{'VDW' } += $vdw*$number; # vdw * number of member of the family $tab[$i]->{'DESOLV'} += $desolv*$number; # desolv * number of member of the family $tab[$i]->{'INTERACT'} += $interact*$number; # interact * number of member of the family if ($ave_nonpond ==1) { $tab[$i]->{'UHBD_NON'} += $uhbd; $tab[$i]->{'SAS_NON' } += $sas; $tab[$i]->{'VDW_NON' } += $vdw; $tab[$i]->{'DESOLV_NON'} += $desolv; $tab[$i]->{'INTERACT_NON'} += $interact; } # calculation for the standard deviation if ($std == 1) { $tab[$i]->{'UHBD2'} += ($uhbd*$uhbd)*$number; # elec˛ * number of member of the family $tab[$i]->{'SAS2' } += ($sas*$sas)*$number; # sas˛ * number of member of the family $tab[$i]->{'VDW2' } += ($vdw*$vdw)*$number; # vdw˛ * number of member of the family $tab[$i]->{'DESOLV2'} += ($desolv*$desolv)*$number; # desolv˛ * number of member of the family $tab[$i]->{'INTERACT2'} += ($interact*$interact)*$number; # desolv˛ * number of member of the family } } # This part allows to extract the data of the amino-acid-efficiency.dat files. foreach $i (1..$#eff){ # Extracts and sums the values for the calculation of average energy for each residue if ($togsep eq "tog") { ($resname,$res,$uhbd,$sas,$vdw,$total,$desolv,$interact)=split(" ",$eff[$i]); } elsif ($togsep eq "sep") { ($resname,$scbb,$res,$uhbd,$sas,$vdw,$total,$desolv,$interact)=split(" ",$eff[$i]); $arr[$i]->{'SCBB' } = $scbb; # SC or BB } $arr[$i]->{'RES' } = $res; # number of the residue $arr[$i]->{'RESNAME' } = $resname; # name of the residue $arr[$i]->{'UHBD'} += $uhbd*$number; # elec * number of member of the family $arr[$i]->{'SAS' } += $sas*$number; # sas * number of member of the family $arr[$i]->{'VDW' } += $vdw*$number; # vdw * number of member of the family $arr[$i]->{'DESOLV'} += $desolv*$number; # desolv * number of member of the family $arr[$i]->{'INTERACT'} += $interact*$number; # interact * number of member of the family if ($ave_nonpond ==1) { $arr[$i]->{'UHBD_NON'} += $uhbd; $arr[$i]->{'SAS_NON' } += $sas; $arr[$i]->{'VDW_NON' } += $vdw; $arr[$i]->{'DESOLV_NON'} += $desolv; $arr[$i]->{'INTERACT_NON'} += $interact; } # calculation for the standard deviation if ($std == 1) { $arr[$i]->{'UHBD2'} += ($uhbd*$uhbd)*$number; # elec˛ * number of member of the family $arr[$i]->{'SAS2' } += ($sas*$sas)*$number; # sas˛ * number of member of the family $arr[$i]->{'VDW2' } += ($vdw*$vdw)*$number; # vdw˛ * number of member of the family $arr[$i]->{'DESOLV2'} += ($desolv*$desolv)*$number; # desolv˛ * number of member of the family $arr[$i]->{'INTERACT2'} += ($interact*$interact)*$number; # desolv˛ * number of member of the family } } } if ($ave == 1) { # Results of average of decomp.dat files open (AVE, "> out/$pdb_ref->[-1].average.ener.dat") || die "Unable to open the file out/$pdb_ref->[-1].average.ener.dat : $!\n"; if ($togsep eq "tog") { printf (AVE "RES # ELEC SAS VDW TOTAL DESOLV INTERACTION => AVERAGE PONDERE\n"); } elsif ($togsep eq "sep") { printf (AVE "RES PART # ELEC SAS VDW TOTAL DESOLV INTERACTION => AVERAGE PONDERE\n"); } # Results of average of amino-acid-efficiency.dat files #open (AVEEFF, "> out/$pdb_ref->[-1].efficiency-average.ener.dat") || die "Unable to open the file out/$pdb_ref->[-1].efficiency-average.ener.dat : $!\n"; #if ($togsep eq "tog") { # printf (AVEEFF "RES # ELEC SAS VDW TOTAL DESOLV INTERACTION => AVERAGE PONDERE\n"); #} #elsif ($togsep eq "sep") { # printf (AVEEFF "RES PART # ELEC SAS VDW TOTAL DESOLV INTERACTION => AVERAGE PONDERE\n"); #} # Results of average of sum_energies.dat files #open (AVESUM, "> out/$pdb_ref->[-1].sum_energies_average.dat") || die "Unable to open the file out/$pdb_ref->[-1].sum_energies_average.dat : $!\n"; #printf (AVESUM "Average of sum of energies for each part of the system\n"); #printf (AVESUM " # Sum part1 Sum part2 Sum tot\n"); } if ($ave_nonpond == 1) { # Results of average of decomp.dat files open (AVENON, "> out/$pdb_ref->[-1].average_nonpondere.ener.dat") || die "Unable to open the file out/average_nonpondere.ener.out : $!\n"; if ($togsep eq "tog") { printf (AVENON "RES # ELEC SAS VDW TOTAL DESOLV INTERACTION => AVERAGE NON PONDERE\n"); } elsif ($togsep eq "sep") { printf (AVENON "RES PART # ELEC SAS VDW TOTAL DESOLV INTERACTION => AVERAGE NON PONDERE\n"); } # Results of average of amino-acid-efficiency.dat files #open (AVEEFFNON, "> out/$pdb_ref->[-1].efficiency-average_nonpondere.ener.dat") || die "Unable to open the file out/$pdb_ref->[-1].efficiency-average_nonpondere.ener.dat : $!\n"; #if ($togsep eq "tog") { # printf (AVEEFFNON "RES # ELEC SAS VDW TOTAL DESOLV INTERACTION => AVERAGE NON PONDERE\n"); #} #elsif ($togsep eq "sep") { # printf (AVEEFFNON "RES PART # ELEC SAS VDW TOTAL DESOLV INTERACTION => AVERAGE NON PONDERE\n"); #} } if ($std == 1) { # Results of average of decomp.dat files open (STD, "> out/$pdb_ref->[-1].stdev.ener.dat") || die "Unable to open the file out/$pdb_ref->[-1].stdev.ener.dat : $!\n"; if ($togsep eq "tog") { printf (STD "RES # ELEC SAS VDW TOTAL DESOLV INTERACTION => STANDARD DEVIATION\n"); } elsif ($togsep eq "sep") { printf (STD "RES PART # ELEC SAS VDW TOTAL DESOLV INTERACTION => STANDARD DEVIATION\n"); } # Results of average of amino-acid-efficiency.dat files #open (STDEFF, "> out/$pdb_ref->[-1].efficiency-stdev.ener.dat") || die "Unable to open the file out/$pdb_ref->[-1].efficiency-stdev.ener.dat : $!\n"; #if ($togsep eq "tog") { # printf (STDEFF "RES # ELEC SAS VDW TOTAL DESOLV INTERACTION => STANDARD DEVIATION\n"); #} #elsif ($togsep eq "sep") { # printf (STDEFF "RES PART # ELEC SAS VDW TOTAL DESOLV INTERACTION => STANDARD DEVIATION\n"); #} # Results of average of sum_energies.dat files #open (STDSUM, "> out/$pdb_ref->[-1].sum_energies_stdev.dat") || die "Unable to open the file out/$pdb_ref->[-1].sum_energies_stdev.dat : $!\n"; #printf (STDSUM "Standard deviation of sum of energies for each part of the system\n"); #printf (STDSUM " # Sum part1 Sum part2 Sum tot\n"); } foreach $l (1..$#tab){ $uhbd1 = $tab[$l]->{'UHBD'}/$totalconf; $sas1 = $tab[$l]->{'SAS'}/$totalconf; $vdw1 = $tab[$l]->{'VDW'}/$totalconf; $tot1=$uhbd1+$sas1+$vdw1; $desolv1 = $tab[$l]->{'DESOLV'}/$totalconf; $interact1 = $tab[$l]->{'INTERACT'}/$totalconf; if ($ave_nonpond ==1) { $uhbd_non = $tab[$l]->{'UHBD_NON'}/$#distrib; $sas_non = $tab[$l]->{'SAS_NON'}/$#distrib; $vdw_non = $tab[$l]->{'VDW_NON'}/$#distrib; $tot_non=$uhbd_non+$sas_non+$vdw_non; $desolv_non = $tab[$l]->{'DESOLV_NON'}/$#distrib; $interact_non = $tab[$l]->{'INTERACT_NON'}/$#distrib; } if ($std == 1) { $uhbd2 = $tab[$l]->{'UHBD2'}/$totalconf; $sas2 = $tab[$l]->{'SAS2'}/$totalconf; $vdw2 = $tab[$l]->{'VDW2'}/$totalconf; $desolv2 = $tab[$l]->{'DESOLV2'}/$totalconf; $interact2 = $tab[$l]->{'INTERACT2'}/$totalconf; $uhbd3 = (abs($uhbd2 - ($uhbd1*$uhbd1))); $sas3 = (abs($sas2 - ($sas1*$sas1))); $vdw3 = (abs($vdw2 - ($vdw1*$vdw1))); $desolv3 = (abs($desolv2 - ($desolv1*$desolv1))); $interact3 = (abs($interact2 - ($interact1*$interact1))); $uhbd = sqrt ($uhbd3); $sas = sqrt ($sas3); $vdw = sqrt ($vdw3); $desolv = sqrt ($desolv3); $interact = sqrt ($interact3); $tot =$uhbd+$sas+$vdw; if ($togsep eq "tog") { printf (STD "%3s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n",$tab[$l]->{'RESNAME' },$tab[$l]->{'RES' },$uhbd,$sas,$vdw,$tot,$desolv,$interact ); } elsif ($togsep eq "sep") { printf (STD "%3s %2s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n",$tab[$l]->{'RESNAME' },$tab[$l]->{'SCBB' },$tab[$l]->{'RES' },$uhbd,$sas,$vdw,$tot,$desolv,$interact ); } } if ($ave == 1) { if ($togsep eq "tog") { printf (AVE "%3s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n",$tab[$l]->{'RESNAME' }, $tab[$l]->{'RES'},$uhbd1,$sas1,$vdw1,$tot1,$desolv1,$interact1 ); } elsif ($togsep eq "sep") { printf (AVE "%3s %2s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n",$tab[$l]->{'RESNAME' },$tab[$l]->{'SCBB' }, $tab[$l]->{'RES'},$uhbd1,$sas1,$vdw1,$tot1,$desolv1,$interact1 ); } } if ($ave_nonpond == 1) { if ($togsep eq "tog") { printf (AVENON "%3s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n",$tab[$l]->{'RESNAME' }, $tab[$l]->{'RES'},$uhbd_non,$sas_non,$vdw_non,$tot_non,$desolv_non,$interact_non ); } if ($togsep eq "sep") { printf (AVENON "%3s %2s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n",$tab[$l]->{'RESNAME' },$tab[$l]->{'SCBB' }, $tab[$l]->{'RES'},$uhbd_non,$sas_non,$vdw_non,$tot_non,$desolv_non,$interact_non ); } } } foreach $l (1..$#arr){ $uhbd1 = $arr[$l]->{'UHBD'}/$totalconf; $sas1 = $arr[$l]->{'SAS'}/$totalconf; $vdw1 = $arr[$l]->{'VDW'}/$totalconf; $tot1=$uhbd1+$sas1+$vdw1; $desolv1 = $arr[$l]->{'DESOLV'}/$totalconf; $interact1 = $arr[$l]->{'INTERACT'}/$totalconf; if ($ave_nonpond ==1) { $uhbd_non = $arr[$l]->{'UHBD_NON'}/$#distrib; $sas_non = $arr[$l]->{'SAS_NON'}/$#distrib; $vdw_non = $arr[$l]->{'VDW_NON'}/$#distrib; $tot_non=$uhbd_non+$sas_non+$vdw_non; $desolv_non = $arr[$l]->{'DESOLV_NON'}/$#distrib; $interact_non = $arr[$l]->{'INTERACT_NON'}/$#distrib; } if ($std == 1) { $uhbd2 = $arr[$l]->{'UHBD2'}/$totalconf; $sas2 = $arr[$l]->{'SAS2'}/$totalconf; $vdw2 = $arr[$l]->{'VDW2'}/$totalconf; $desolv2 = $arr[$l]->{'DESOLV2'}/$totalconf; $interact2 = $arr[$l]->{'INTERACT2'}/$totalconf; $uhbd3 = (abs($uhbd2 - ($uhbd1*$uhbd1))); $sas3 = (abs($sas2 - ($sas1*$sas1))); $vdw3 = (abs($vdw2 - ($vdw1*$vdw1))); $desolv3 = (abs($desolv2 - ($desolv1*$desolv1))); $interact3 = (abs($interact2 - ($interact1*$interact1))); $uhbd = sqrt ($uhbd3); $sas = sqrt ($sas3); $vdw = sqrt ($vdw3); $desolv = sqrt ($desolv3); $interact = sqrt ($interact3); $tot =$uhbd+$sas+$vdw; if ($togsep eq "tog") { printf (STDEFF "%3s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n",$arr[$l]->{'RESNAME' },$arr[$l]->{'RES' },$uhbd,$sas,$vdw,$tot,$desolv,$interact); } elsif ($togsep eq "sep") { printf (STDEFF "%3s %2s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n",$arr[$l]->{'RESNAME' },$arr[$l]->{'SCBB' },$arr[$l]->{'RES' },$uhbd,$sas,$vdw,$tot,$desolv,$interact); } } if ($ave == 1) { if ($togsep eq "tog") { printf (AVEEFF "%3s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n",$arr[$l]->{'RESNAME' }, $arr[$l]->{'RES'},$uhbd1,$sas1,$vdw1,$tot1,$desolv1,$interact1); } elsif ($togsep eq "sep") { printf (AVEEFF "%3s %2s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n",$arr[$l]->{'RESNAME' }, $arr[$l]->{'SCBB' },$arr[$l]->{'RES'},$uhbd1,$sas1,$vdw1,$tot1,$desolv1,$interact1); } } if ($ave_nonpond == 1) { if ($togsep eq "tog") { printf (AVEEFFNON "%3s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n",$arr[$l]->{'RESNAME' }, $arr[$l]->{'RES'},$uhbd_non,$sas_non,$vdw_non,$tot_non,$desolv_non,$interact_non); } elsif ($togsep eq "sep") { printf (AVEEFFNON "%3s %2s %3d %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f\n",$arr[$l]->{'RESNAME' },$arr[$l]->{'SCBB' }, $arr[$l]->{'RES'},$uhbd_non,$sas_non,$vdw_non,$tot_non,$desolv_non,$interact_non); } } } foreach $l (2..$#array){ $one1 = $array[$l]->{'ONE'}/$totalconf; $two1 = $array[$l]->{'TWO'}/$totalconf; $total1 = $array[$l]->{'TOT'}/$totalconf; if ($std == 1) { $one2 = $array[$l]->{'ONE2'}/$totalconf; $two2 = $array[$l]->{'TWO2'}/$totalconf; $total2 = $array[$l]->{'TOT2'}/$totalconf; $one3 = (abs($one2 - ($one1*$one1))); $two3 = (abs($two2 - ($two1*$two1))); $total3 = (abs($total2 - ($total1*$total1))); $one = sqrt ($one3); $two = sqrt ($two3); $total = sqrt ($total3); printf (STDSUM "%3s %7.4f %7.4f %7.4f\n",$array[$l]->{'ENERGY' },$one,$two,$total); } if ($ave == 1) { printf (AVESUM "%3s %7.4f %7.4f %7.4f\n",$array[$l]->{'ENERGY' },$one1,$two1,$total1); } } close(AVESUM); close(STDSUM); close(AVE); close(STD); close(AVENON); close(STDEFF); close(AVEEFF); close(AVEEFFNON); return(); } # Allows to change the offset. sub offset { my $proto = shift; return (0) if (ref($proto)); my ($res) = @_; if ((0 < $res) && ($res < 236)) { $res=$res+223; } elsif ((235 < $res) && ($res < 247)) { $res=$res-235; } elsif (247 == $res) { $res=$res-246; } elsif ((247 < $res) && ($res < 482)) { $res=$res-66; } elsif ((481 < $res) && ($res < 492)) { $res=$res-481; } elsif (492 == $res) { $res=$res-491; } return ($res); } # Allows to write the names of amino acid in monoletter code from the triletter code. sub amino_monoletter { my $proto = shift; return (0) if (ref($proto)); my ($res) = @_; if ($res=~/GLY/) { $res=~s/GLY/G/; } elsif ($res=~/GLP/) { $res=~s/GLP/G/; } elsif ($res=~/VAL/) { $res=~s/VAL/V/; } elsif ($res=~/ALA/) { $res=~s/ALA/A/; } elsif ($res=~/LEU/) { $res=~s/LEU/L/; } elsif ($res=~/ILE/) { $res=~s/ILE/I/; } elsif ($res=~/ARG/) { $res=~s/ARG/R/; } elsif ($res=~/LYS/) { $res=~s/LYS/K/; } elsif ($res=~/ASP/) { $res=~s/ASP/D/; } elsif ($res=~/ASN/) { $res=~s/ASN/N/; } elsif ($res=~/GLU/) { $res=~s/GLU/E/; } elsif ($res=~/GLN/) { $res=~s/GLN/Q/; } elsif ($res=~/PRO/) { $res=~s/PRO/P/; } elsif ($res=~/PRP/) { $res=~s/PRP/P/; } elsif ($res=~/PHE/) { $res=~s/PHE/F/; } elsif ($res=~/TRP/) { $res=~s/TRP/W/; } elsif ($res=~/TYR/) { $res=~s/TYR/Y/; } elsif ($res=~/SER/) { $res=~s/SER/S/; } elsif ($res=~/THR/) { $res=~s/THR/T/; } elsif ($res=~/CYS/) { $res=~s/CYS/C/; } elsif ($res=~/CSS/) { $res=~s/CSS/C/; } elsif ($res=~/MET/) { $res=~s/MET/M/; } elsif ($res=~/HIS/) { $res=~s/HIS/H/; } elsif ($res=~/HS[E,P,D]/) { $res=~s/HS[E,P,D]/H/; } return ($res); } # Allows to count the number of heavy atoms of each amino acid. sub amino_heavy_atoms { my $proto = shift; return (0) if (ref($proto)); my ($res) = @_; if ($res=~/GLY/) { $nb_heavy=4; } elsif ($res=~/GLP/) { $nb_heavy=4; } elsif ($res=~/VAL/) { $nb_heavy=7; } elsif ($res=~/ALA/) { $nb_heavy=5; } elsif ($res=~/LEU/) { $nb_heavy=8; } elsif ($res=~/ILE/) { $nb_heavy=8; } elsif ($res=~/ARG/) { $nb_heavy=11; } elsif ($res=~/LYS/) { $nb_heavy=9; } elsif ($res=~/ASP/) { $nb_heavy=8; } elsif ($res=~/ASN/) { $nb_heavy=8; } elsif ($res=~/GLU/) { $nb_heavy=9; } elsif ($res=~/GLN/) { $nb_heavy=9; } elsif ($res=~/PRO/) { $nb_heavy=7; } elsif ($res=~/PRP/) { $nb_heavy=7; } elsif ($res=~/PHE/) { $nb_heavy=11; } elsif ($res=~/TRP/) { $nb_heavy=14; } elsif ($res=~/TYR/) { $nb_heavy=12; } elsif ($res=~/SER/) { $nb_heavy=6; } elsif ($res=~/THR/) { $nb_heavy=7; } elsif ($res=~/CYS/) { $nb_heavy=6; } elsif ($res=~/CSS/) { $nb_heavy=6; } elsif ($res=~/MET/) { $nb_heavy=8; } elsif ($res=~/HIS/) { $nb_heavy=10; } elsif ($res=~/HS[E,P,D]/) { $nb_heavy=10; } else { $nb_heavy=1; } return ($nb_heavy); } # Allows to create the data files for gnuplot and makes postscripts representations for the average and standard deviation of several conformations. sub firstplot { my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref) = @_; my ($pdb1,$fc) = split('\_',$pdb_ref->[0]); foreach $x("decomp","amino-acid-efficiency") { # opens the file which contains energies for each residue. open (AVE, "< out/$pdb1.$x.dat") || die "Unable to open the file out/$pdb1.$x.dat : $!\n"; @ave=; close (AVE); # opens the new file. open (OUT, "> plot/first/$pdb1.$x.signi.dat"); undef @name_res; # array containing the name of residue undef @num_res; # array containing the number of residue $lig=0; # counter of line foreach $i(1..$#ave) { # for each line of decomp.dat file @ener=split(' ',$ave[$i]); # split the line in a ener array in function of space. chomp ($ave[$i]); # if a energy (elec, sas, vdw or total) is lower than -$cut or superior than $cut if ($x eq "decomp") { $cut=0.1; # variable of the cut energy } elsif ($x eq "amino-acid-efficiency") { $cut=0.05; # variable of the cut energy } if (($ener[2]>$cut) || ($ener[3]>$cut) || ($ener[4]>$cut) || ($ener[5]>$cut) || ($ener[2]<-$cut) || ($ener[3]<-$cut) || ($ener[4]<-$cut) || ($ener[5]<-$cut)) { $lig++; # adds one to the counter $name_res[$lig]=$ener[0]; # puts the name of residue in an array ($name_res[$lig])=Leto::GrpAtoms->amino_monoletter($name_res[$lig]); $num_res[$lig]=$ener[1]; # puts the number of residue in an array ($num_res[$lig])=Leto::GrpAtoms->offset($num_res[$lig]); print (OUT "$lig $ave[$i]\n"); # writes the line of the average file followed by the line of stdev file and preceded by the number of line } } $xtics=0; # variable that contains the xtics line $x1=0; $x2=0; $xtics="set xtics nomirror rotate ("; foreach $i(1..$#name_res-1) { # for each residu in the new file, puts the number and name in the xtics variable $x1="\"$num_res[$i] $name_res[$i]\" $i, "; $xtics=$xtics . $x1; } $p=$#name_res; $x2="\"$num_res[$#name_res] $name_res[$#name_res]\" $p)\n"; # for the end of the xtics $xtics=$xtics . $x2; # opens the template file for gnuplot open (TMP, "; close(TMP); # Allows to write the gnuplot data_file with good data. foreach $ener ("elec","sas","vdw","tot","all","desolv","interact") { open (TXT,">plot/first/plot_$ener-$x.txt"); foreach $i (0..$#tmp) { # Identification of the keyword TITLE if ($tmp[$i]=~/TITLE/) { print (TXT "set title \"$ener energy for important residues for $pdb1\" font \"Times-Roman,18\"\n"); } # Identification of the keyword RANGE elsif ($tmp[$i]=~/RANGE/) { $der=$lig+1; print (TXT "set xrange [0:$der]\n"); } # Identification of the keyword XTICS elsif ($tmp[$i]=~/XTICS/) { print (TXT "$xtics"); } # Identification of the keyword YTICS elsif ($tmp[$i]=~/YTICS/) { print (TXT "set ylabel \"energy $ener (kcal/mol)\" font \"Times-Roman,18\"\n"); if ($ener eq "all") { print (TXT "set boxwidth 0.2"); } } # Identification of the keyword PLOT elsif ($tmp[$i]=~/PLOT/) { if ($ener eq "elec") { print (TXT "plot 'plot/first/$pdb1.$x.signi.dat' using (\$1):4 title \"ELECTROSTATIC\" with boxes linestyle 1\n"); } elsif ($ener eq "sas") { print (TXT "plot 'plot/first/$pdb1.$x.signi.dat' using (\$1):5 title \"SOLVENT ACCESSIBLE SURFACE\" with boxes linestyle 2\n"); } elsif ($ener eq "vdw") { print (TXT "plot 'plot/first/$pdb1.$x.signi.dat' using (\$1):6 title \"VAN DER WAALS\" with boxes linestyle 3\n"); } elsif ($ener eq "tot") { print (TXT "plot 'plot/first/$pdb1.$x.signi.dat' using (\$1):7 title \"TOTAL\" with boxes linestyle 4\n"); } elsif ($ener eq "all") { print (TXT "plot 'plot/first/$pdb1.$x.signi.dat' using (\$1):4 title \"ELECTROSTATIC\" with boxes linestyle 1,\\\n"); print (TXT "'plot/first/$pdb1.$x.signi.dat' using (\$1+0.2):5 title \"SOLVENT ACCESSIBLE SURFACE\" with boxes linestyle 2,\\\n"); print (TXT "'plot/first/$pdb1.$x.signi.dat' using (\$1+0.4):6 title \"VAN DER WAALS\" with boxes linestyle 3,\\\n"); print (TXT "'plot/first/$pdb1.$x.signi.dat' using (\$1+0.6):7 title \"TOTAL\" with boxes linestyle 4\n"); } elsif ($ener eq "desolv") { print (TXT "plot 'plot/first/$pdb1.$x.signi.dat' using (\$1):8 title \"DESOLV\" with boxes linestyle 3\n"); } elsif ($ener eq "interact") { print (TXT "plot 'plot/first/$pdb1.$x.signi.dat' using (\$1):9 title \"INTERACT\" with boxes linestyle 4\n"); } } # Identification of the keyword OUT elsif ($tmp[$i]=~/OUT/) { print (TXT "set output \"plot/first/$ener-$x.ps\"\n"); } # All others lines without keyword else { print (TXT "$tmp[$i]"); } } # Runs gnuplot (creates the postscript representations system `gnuplot plot/first/plot_$ener-$x.txt`; } } } # Allows to create the data files for gnuplot and makes postscripts representations for the average and standard deviation of several conformations. sub confplot { my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref,$togsep) = @_; my ($pdb1,$fc) = split('\_',$pdb_ref->[0]); my ($pdb2,$sc) = split('\_',$pdb_ref->[1]); push (@{$pdb_ref}, join('_',$pdb1,($fc . $sc))); foreach $x("average","efficiency-average") { # opens the file which contains the average of energies for each residue. open (AVE, "< out/$pdb_ref->[-1].$x.ener.dat") || die "Unable to open the file out/$pdb_ref->[-1].$x.ener.dat : $!\n"; @ave=; close (AVE); if ($x eq "average") { $stdev="stdev"; } elsif ($x eq "efficiency-average") { $stdev="efficiency-stdev"; } # opens the file which contains the standard deviation of energies for each residue. open (STD, "< out/$pdb_ref->[-1].$stdev.ener.dat") || die "Unable to open the file out/$pdb_ref->[-1].$stdev.ener.dat : $!\n"; @std=; close (STD); # opens the new file. open (OUT, "> plot/$x/$pdb_ref->[-1].$x.signi.dat"); undef @name_res; # array containing the name of residue undef @num_res; # array containing the number of residue $lig=0; # counter of line foreach $i(1..$#ave) { # for each line of average file @energy=split(' ',$ave[$i]); # split the line in a ener array in function of space. chomp ($ave[$i]); chomp ($std[$i]); # if a energy (elec, sas, vdw or total) is lower than -$cut or superior than $cut if ($x eq "average") { $cut=0.5; # variable of the cut energy } elsif ($x eq "efficiency-average") { $cut=0.25; # variable of the cut energy } if ($togsep eq "tog") { if (($energy[2]>$cut) || ($energy[3]>$cut) || ($energy[4]>$cut) || ($energy[5]>$cut) || ($energy[2]<-$cut) || ($energy[3]<-$cut) || ($energy[4]<-$cut) || ($energy[5]<-$cut)) { $lig++; # adds one to the counter $name_res[$lig]=$energy[0]; # puts the name of residue in an array ($name_res[$lig])=Leto::GrpAtoms->amino_monoletter($name_res[$lig]); $num_res[$lig]=$energy[1]; # puts the number of residue in an array ($num_res[$lig])=Leto::GrpAtoms->offset($num_res[$lig]); print (OUT "$lig $ave[$i] $std[$i]\n"); # writes the line of the average file followed by the line of stdev file and preceded by the number of line } } elsif ($togsep eq "sep") { if (($energy[3]>$cut) || ($energy[4]>$cut) || ($energy[5]>$cut) || ($energy[6]>$cut) || ($energy[3]<-$cut) || ($energy[4]<-$cut) || ($energy[5]<-$cut) || ($energy[6]<-$cut)) { $lig++; # adds one to the counter $name_res[$lig]=$energy[0]; # puts the name of residue in an array ($name_res[$lig])=Leto::GrpAtoms->amino_monoletter($name_res[$lig]); $num_res[$lig]=$energy[2]; # puts the number of residue in an array ($num_res[$lig])=Leto::GrpAtoms->offset($num_res[$lig]); $scbb[$lig]=$energy[1]; # puts bb or sc print (OUT "$lig $ave[$i] $std[$i]\n"); # writes the line of the average file followed by the line of stdev file and preceded by the number of line } } } $xtics=0; # variable that contains the xtics line $x1=0; $x2=0; $xtics="set xtics nomirror rotate ("; if ($togsep eq "tog") { foreach $i(1..$#name_res-1) { # for each residu in the new file, puts the number and name in the xtics variable $x1="\"$num_res[$i] $name_res[$i]\" $i, "; $xtics=$xtics . $x1; } $p=$#name_res; $x2="\"$num_res[$#name_res] $name_res[$#name_res]\" $p)\n"; # for the end of the xtics $xtics=$xtics . $x2; } elsif ($togsep eq "sep") { foreach $i(1..$#name_res-1) { # for each residu in the new file, puts the number and name in the xtics variable $x1="\"$num_res[$i] $name_res[$i] $scbb[$i]\" $i, "; $xtics=$xtics . $x1; } $p=$#name_res; $x2="\"$num_res[$#name_res] $name_res[$#name_res] $scbb[$#name_res]\" $p)\n"; # for the end of the xtics $xtics=$xtics . $x2; } # opens the template file for gnuplot open (TMP, "; close(TMP); # Allows to write the gnuplot data_file with good data. @list=("elec","sas","vdw","tot","all","desolv","interact"); foreach $ener(0..$#list) { open (TXT,">plot/$x/plot_$list[$ener].txt"); foreach $i (0..$#tmp) { # Identification of the keyword TITLE if ($tmp[$i]=~/TITLE/) { print (TXT "set title \"$list[$ener] energy for important residues for $pdb_ref->[-1]\" font \"Times-Roman,18\"\n"); } # Identification of the keyword RANGE elsif ($tmp[$i]=~/RANGE/) { $der=$lig+1; print (TXT "set xrange [0:$der]\n"); } # Identification of the keyword XTICS elsif ($tmp[$i]=~/XTICS/) { print (TXT "$xtics"); } # Identification of the keyword YTICS elsif ($tmp[$i]=~/YTICS/) { print (TXT "set ylabel \"energy $list[$ener] (kcal/mol)\" font \"Times-Roman,18\"\n"); if ($list[$ener] eq "all") { print (TXT "set boxwidth 0.2"); } } # Identification of the keyword PLOT elsif ($tmp[$i]=~/PLOT/) { if ($togsep eq "tog") { if ($list[$ener] eq "elec") { print (TXT "plot 'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):4 title \"ELECTROSTATIC\" with boxes linestyle 1,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):4:12 notitle with yerrorbars linestyle 1\n"); } elsif ($list[$ener] eq "sas") { print (TXT "plot 'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):5 title \"SOLVENT ACCESSIBLE SURFACE\" with boxes linestyle 2,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):5:13 notitle with yerrorbars linestyle 2\n"); } elsif ($list[$ener] eq "vdw") { print (TXT "plot 'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):6 title \"VAN DER WAALS\" with boxes linestyle 3,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):6:14 notitle with yerrorbars linestyle 3\n"); } elsif ($list[$ener] eq "tot") { print (TXT "plot 'plot/average/$pdb_ref->[-1].average.signi.dat' using (\$1):7 title \"TOTAL\" with boxes linestyle 4,\\\n"); print (TXT "'plot/average/$pdb_ref->[-1].average.signi.dat' using (\$1):7:15 notitle with yerrorbars linestyle 4\n"); } elsif ($list[$ener] eq "all") { print (TXT "plot 'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):4 title \"ELECTROSTATIC\" with boxes linestyle 1,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1+0.2):5 title \"SOLVENT ACCESSIBLE SURFACE\" with boxes linestyle 2,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1+0.4):6 title \"VAN DER WAALS\" with boxes linestyle 3,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1+0.6):7 title \"TOTAL\" with boxes linestyle 4\n"); } elsif ($list[$ener] eq "desolv") { print (TXT "plot 'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):8 title \"DESOLVATATION\" with boxes linestyle 1,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):8:16 notitle with yerrorbars linestyle 1\n"); } elsif ($list[$ener] eq "interact") { print (TXT "plot 'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):9 title \"INTERACT\" with boxes linestyle 1,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):9:17 notitle with yerrorbars linestyle 1\n"); } } elsif ($togsep eq "sep") { if ($list[$ener] eq "elec") { print (TXT "plot 'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):5 title \"ELECTROSTATIC\" with boxes linestyle 1,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):5:14 notitle with yerrorbars linestyle 1\n"); } elsif ($list[$ener] eq "sas") { print (TXT "plot 'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):6 title \"SOLVENT ACCESSIBLE SURFACE\" with boxes linestyle 2,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):6:15 notitle with yerrorbars linestyle 2\n"); } elsif ($list[$ener] eq "vdw") { print (TXT "plot 'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):7 title \"VAN DER WAALS\" with boxes linestyle 3,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):7:16 notitle with yerrorbars linestyle 3\n"); } elsif ($list[$ener] eq "tot") { print (TXT "plot 'plot/average/$pdb_ref->[-1].average.signi.dat' using (\$1):8 title \"TOTAL\" with boxes linestyle 4,\\\n"); print (TXT "'plot/average/$pdb_ref->[-1].average.signi.dat' using (\$1):8:17 notitle with yerrorbars linestyle 4\n"); } elsif ($list[$ener] eq "all") { print (TXT "plot 'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):5 title \"ELECTROSTATIC\" with boxes linestyle 1,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1+0.2):6 title \"SOLVENT ACCESSIBLE SURFACE\" with boxes linestyle 2,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1+0.4):7 title \"VAN DER WAALS\" with boxes linestyle 3,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1+0.6):8 title \"TOTAL\" with boxes linestyle 4\n"); } elsif ($list[$ener] eq "desolv") { print (TXT "plot 'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):9 title \"DESOLVATATION\" with boxes linestyle 1,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):9:18 notitle with yerrorbars linestyle 1\n"); } elsif ($list[$ener] eq "interact") { print (TXT "plot 'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):10 title \"INTERACT\" with boxes linestyle 1,\\\n"); print (TXT "'plot/$x/$pdb_ref->[-1].$x.signi.dat' using (\$1):10:19 notitle with yerrorbars linestyle 1\n"); } } } # Identification of the keyword OUT elsif ($tmp[$i]=~/OUT/) { print (TXT "set output \"plot/$x/$list[$ener].ps\"\n"); } # All others lines without keyword else { print (TXT "$tmp[$i]"); } } # Runs gnuplot (creates the postscript representations system `gnuplot plot/$x/plot_$list[$ener].txt`; } } } # This function was used to make a test (allows to compare the energies of the largest familly and the average energies). sub supplement { my $proto = shift; return (0) if (ref($proto)); my ($pdb_ref) = @_; my ($pdb1,$fc) = split('\_',$pdb_ref->[0]); my ($pdb2,$sc) = split('\_',$pdb_ref->[1]); push (@{$pdb_ref}, join('_',$pdb1,($fc . $sc))); open (OUT, "< out/$pdb_ref->[-1].distrib.ener.dat") || die "Unable to open the file out/distrib.ener.dat : $!\n"; @distrib=; close (OUT); $number_max=0; $conf_max=0; foreach $lines (1..$#distrib){ ($conf,$number)=split("\t", $distrib[$lines]); if ($number > $number_max) { $number_max=$number; $conf_max=$conf; } } open (MAX, "< out/$pdb_ref->[-1].conf_$conf_max/$pdb_ref->[-1].conf_$conf_max.decomp.dat") || die "Unable to open the file out/$pdb_ref->[-1].conf_$conf_max/$pdb_ref->[-1].conf_$conf_max.decomp.dat: $!\n"; my @max = ; close(MAX); open (MOY, "< out/$pdb_ref->[-1].average.ener.dat") || die "Unable to open the file out/$pdb_ref->[-1].average.ener.dat: $!\n"; my @moy = ; close(MOY); open (DIF, "> out/$pdb_ref->[-1].diff.ener.dat") || die "Unable to open the file out/$pdb_ref->[-1].diff.ener.dat : $!\n"; printf (DIF "RES # ELEC SAS VDW TOTAL => DIFFERENCE\n"); foreach $i (1..$#max){ ($res,$resname,$uhbd_max,$sas_max,$vdw_max,$tot_max)=split(" ",$max[$i]); ($res,$resname,$uhbd_moy,$sas_moy,$vdw_moy,$tot_moy)=split(" ",$moy[$i]); $dif[$i]->{'RES' } = $res; $dif[$i]->{'RESNAME' } = $resname; $dif[$i]->{'UHBD'} = $uhbd_max-$uhbd_moy; $dif[$i]->{'SAS' } = $sas_max-$sas_moy; $dif[$i]->{'VDW' } = $vdw_max-$vdw_moy; $dif[$i]->{'TOT' } = $tot_max-$tot_moy; printf (DIF "%3s %3d %7.4f %7.4f %7.4f %7.4f\n",$dif[$i]->{'RES' },$dif[$i]->{'RESNAME' },$dif[$i]->{'UHBD'},$dif[$i]->{'SAS' },$dif[$i]->{'VDW' },$dif[$i]->{'TOT' }); } close(DIF); return(); } 1;