find.interchanging.residues <- function(dm1, dm2, dcm=NULL, occupancy=.5, diff.cut=.5) { if ( is.null(dcm) ) { my.dcm <- dcm(dm1,dm2, occupancy=occupancy, diff.cut=diff.cut) } else { my.dcm <- dcm } diff.inds <- which(my.dcm!=0, arr.ind=T) avg.dists.1 <- dm1$dmat.avg[diff.inds] avg.dists.2 <- dm2$dmat.avg[diff.inds] ## table of difference contacts (dc) dc <- cbind(diff.inds, avg.dists.1, avg.dists.2, avg.dists.1-avg.dists.2) colnames(dc)=c("row", "col", "avg.dists.1", "avg.dists.2", "avg.dist.diff") dc=dc[ order(dc[,"avg.dist.diff"]), ] ## destinguish the contacts a.inds <- which(dc[,"avg.dist.diff"]<0) b.inds <- which(dc[,"avg.dist.diff"]>0) a <- dc[a.inds,] b <- dc[b.inds,] ## b will now also hold indices for a in which ## interchangind residues are found a.inds <- c(1:nrow(b))*0 b=data.frame(cbind(b, a.inds)) b=transform(b, a.inds=as.character(a.inds)) for ( i in 1:nrow(b) ) { resa <- b[i,"row"] resb <- b[i,"col"] k=which( a[,"row"]==resa | a[,"row"]==resb | a[,"col"]==resa | a[,"col"]==resb, arr.ind=T ) b[i, "a.inds"]= paste(k, collapse="|") } out <- list(a=a, b=b, dcm=my.dcm) return( out ) } trunc.mat <- function(mat, xlim=NULL, ylim=NULL) { mat[,ylim[2]:ncol(mat)] = 0 mat[xlim[2]:nrow(mat),] = 0 mat[,1:ylim[1]] = 0 mat[1:xlim[1],] = 0 return(mat) } plot.dcm <- function(mat, pdb, xlim=NULL, ylim=NULL, sse.grid=TRUE, sse.grid.col="gray80", sse.grid.lty="dotted", helix.col="gray20", sheet.col="gray80", xlab='Residue No', ylab='Residue No', cex=0.6, pch=c(2,20), col=c('green', 'blue'), labels.cex=NULL, labels.offset=5, sse=NULL, top=FALSE, bot=TRUE, left=TRUE, ... ) { library(grid) library(gridBase) library(maptools) seq <- seq.pdb(pdb) s <- array(seq) g<-which(mat==1, arr.ind=TRUE) plot.bio3d(g[,1], g[,2], type='p', cex=cex, pch=pch[1], col=col[1], xlim=xlim, ylim=ylim, ylim2zero=FALSE, sse=sse, sse.border=TRUE, top=top, bot=bot, xlab=xlab, ylab=ylab, ...) g<-which(mat==-1, arr.ind=TRUE) lines(g[,1], g[,2], type='p', pch=pch[2], col=col[2]) g<-which(mat!=0, arr.ind=TRUE) if(!is.null(labels.cex)) { labels <- c() for ( i in 1:nrow(g) ) { label <- paste( paste(s[g[i,1]], g[i,1]+1, sep=""), paste( s[g[i,2]], g[i,2]+1, sep=""), sep="-") labels=c(labels, label) } pointLabel(g[,1], g[,2], labels, cex=labels.cex, offset=labels.offset) } ##sse.grid=FALSE if (!is.null(sse)) { if(is.list(sse)) { gridpoints <- c(sse$sheet$start[sse$sheet$length>4], ##sse$sheet$end[sse$sheet$length>4], ##sse$helix$end[sse$helix$length>4]) sse$helix$start[sse$helix$length>4] ) } else { gridpoints <- sse } left=TRUE if(left){ left <- min(xlim) - (diff(xlim)*0.06) right <- min(xlim) - (diff(xlim)*0.01) if(!is.null(sse$helix$start)) rect(xleft=left, xright=right, ybottom=sse$helix$start, ytop=sse$helix$end, col=helix.col, border=TRUE) if(!is.null(sse$sheet$start)) rect(xleft=left, xright=right, ybottom=sse$sheet$start, ytop=sse$sheet$end, col=sheet.col, border=TRUE) } if ( sse.grid==TRUE ) { abline(h=sse$helix$start, col=sse.grid.col, lty=sse.grid.lty) abline(v=sse$helix$start, col=sse.grid.col, lty=sse.grid.lty) } } } ## parallel version dm.xyz.pdbs <- function(pdbs, ids, scut=4, dcut=4, threads=2 ) { gaps.res <- gap.inspect(pdbs$ali) dim <- length(gaps.res$f.inds) dmat <- matrix(0,nrow=dim, ncol=dim) dmat.avg <- matrix(0,nrow=dim, ncol=dim) if ( "rparallel" %in% names( getLoadedDLLs()) && threads > 1 ) { runParallel( resultVar=c("dmat", "dmat.avg"), resultOp=c("dmat.merge", "dmat.merge"), verbose="normal", nWorkers=threads ) } else{ for ( i in 1:length(ids)) { library(bio3d) id <- ids[i] filename <- pdbs$id[id] pdb <- read.pdb(filename) print(paste("Calcualting ", i, " of ", length(ids), " files...")) print(paste("This is file",filename)) res <- pdbs$resno[id, gaps.res$f.inds] res <- paste("^", res, "$", sep="") #res <- paste(res, "$", sep="") pdbres <- grep(paste(res,collapse="|"), pdb$atom[,"resno"]) new <- pdb$atom[pdbres,] xyz <- as.numeric( t(new[,c("x","y","z")]) ) internalMat <- dm.xyz(xyz, grpby=new[,"resno"], scut=scut) dmat.avg = dmat.merge(dmat.avg, internalMat) tmpMat <- dmat.reduce( internalMat, dim, dcut ) dmat <- dmat.merge(tmpMat, dmat) } } dmat <- dmat/length(ids) dmat.avg=dmat.avg/length(ids) out <- list(dmat=dmat, dmat.avg=dmat.avg) return( out ) } ## only works for two residues dm.inds <- function(trj.xyz, pdb, inds) { dst <- c() for ( i in 1:nrow(trj.xyz) ) { d = dm.xyz(trj.xyz[i,inds$xyz], grpby=pdb$atom[inds$atom,"resno"]) dst = c(dst, d[1,2]) } return (dst) } ## seriel version dm.xyz.pdbs2 <- function(pdbs, ids, scut=4, dcut=4 ) { gaps.res <- gap.inspect(pdbs$ali) dim <- length(gaps.res$f.inds) dmat <- matrix(0,nrow=dim, ncol=dim) dmat.avg <- matrix(0,nrow=dim, ncol=dim) for ( i in 1:length(ids)) { id <- ids[i] filename <- pdbs$id[id] pdb <- read.pdb(filename) print(paste("Calcualting ", i, " of ", length(ids), " files...")) print(paste("This is file",filename)) res <- pdbs$resno[id, gaps.res$f.inds] #print("These residues are used:") #print(res) #res <- paste("^", res, sep="") #res <- paste(res, "$", sep="") res <- paste("^", res, "$", sep="") pdbres <- grep(paste(res,collapse="|"), pdb$atom[,"resno"]) print("These atoms are used") print(head(pdb$atom[pdbres,])) print(tail(pdb$atom[pdbres,])) new <- pdb$atom[pdbres,] #print(nrow(new)) xyz <- as.numeric( t(new[,c("x","y","z")]) ) internalMat <- dm.xyz(xyz, grpby=new[,"resno"], scut=scut) #print(internalMat[which(internalMat>0)]) dmat.avg=dmat.avg+internalMat tmpMat <- dmat.reduce( internalMat, dim, dcut ) dmat <- tmpMat + dmat } dmat <- dmat/length(ids) dmat.avg=dmat.avg/length(ids) out <- list(dmat=dmat, dmat.avg=dmat.avg) return( out ) } if(FALSE) { ## old not to be used... dm.xyz.pdbs <- function(files, scut=4, dcut=4 ) { aa <- aa123(c("G","A","V","F","P","L","I","R","D","E","S","T","C","N","Q","H","K","Y","M","W")) aa <- paste(aa, collapse="|") aa <- paste(aa, "HID", "HIE", sep="|") pdb <- read.pdb(files[1]) ca.inds <- atom.select(pdb, "calpha") dim <- length(ca.inds$atom) dmat <- matrix(0,nrow=dim, ncol=dim) dmat.avg <- matrix(0,nrow=dim, ncol=dim) for ( i in 1:length(files)) { print(paste("Calcualting ", i, " of ", length(files), " files...")) pdb <- read.pdb(files[i]) prot.inds <- grep(aa, pdb$atom[,"resid"]) new <- pdb$atom[prot.inds,] xyz <- as.numeric( t(new[,c("x","y","z")]) ) internalMat <- dm.xyz(xyz, grpby=new[,"resno"], scut=scut) dmat.avg=dmat.avg+internalMat tmpMat <- dmat.reduce( internalMat, dim, dcut ) dmat <- tmpMat + dmat } dmat <- dmat/length(files) dmat.avg=dmat.avg/length(files) out <- list(dmat=dmat, dmat.avg=dmat.avg) return( out ) } } dmat.calc <- function(trj.xyz, pdb, scut=4, dcut=4 ) { library(bio3d) ca.inds <- atom.select(pdb, "calpha") dim <- length(ca.inds$atom) ##dat <- array(0.00, dim=c(dim, dim, nrow(trj.xyz))) dmat <- matrix(0.00,nrow=dim, ncol=dim) dmat.avg <- matrix(0.00,nrow=dim, ncol=dim) for ( i in 1:nrow(trj.xyz) ) { internalMat <- dm.xyz(trj.xyz[i,], grpby=pdb$atom[,"resno"], scut=scut) dmat.avg = dmat.merge(dmat.avg, internalMat) tmpMat <- dmat.reduce( internalMat, dim, dcut ) dmat <- dmat.merge(tmpMat, dmat) ##dat[, ,i] = internalMat } out <- list(dmat=dmat, dmat.avg=dmat.avg ) return( out ) } # new version dm.xyz.trj3 <- function( trj.xyz, pdb, threads=2, scut=4, dcut=4 ) { library(bio3d) library(multicore) #threads=threads-1 ca.inds <- atom.select(pdb, "calpha") dim <- length(ca.inds$atom) dmat <- matrix(0.00,nrow=dim, ncol=dim) dmat.avg <- matrix(0.00,nrow=dim, ncol=dim) ptm <- proc.time() rnames=rep(1:threads, each=(nrow(trj.xyz)/threads)) print(rnames) jobs <- list() for ( i in 1:threads ) { rinds=which(rnames==i) trj=trj.xyz[rinds,] q=parallel(dmat.calc(trj, pdb)) jobs[[i]]=q } res=collect(jobs, wait=TRUE) print(proc.time() - ptm) for ( job in res ) { dmat=dmat+job$dmat dmat.avg=dmat.avg+job$dmat.avg } dmat <- dmat/nrow(trj.xyz) dmat.avg = dmat.avg/nrow(trj.xyz) out <- list(dmat=dmat, dmat.avg=dmat.avg) return(out) } # old version dm.xyz.trj <- function( trj.xyz, pdb, threads=2, scut=4, dcut=4 ) { library(bio3d) ca.inds <- atom.select(pdb, "calpha") dim <- length(ca.inds$atom) dmat <- matrix(0.00,nrow=dim, ncol=dim) dmat.avg <- matrix(0.00,nrow=dim, ncol=dim) done <- c() ptm <- proc.time() if ( "rparallel" %in% names( getLoadedDLLs()) && threads > 1 ) { runParallel( resultVar=c("dmat", "dmat.avg"), resultOp=c("dmat.merge", "dmat.merge"), verbose="normal", nWorkers=threads ) } else{ for ( i in 1:nrow(trj.xyz) ) { library(bio3d) done = c(i, done) print(length(done)) internalMat <- dm.xyz(trj.xyz[i,], grpby=pdb$atom[,"resno"], scut=scut) dmat.avg = dmat.merge(dmat.avg, internalMat) tmpMat <- dmat.reduce( internalMat, dim, dcut ) dmat <- dmat.merge(tmpMat, dmat) } } print(proc.time() - ptm) dmat <- dmat/nrow(trj.xyz) dmat.avg = dmat.avg/nrow(trj.xyz) out <- list(dmat=dmat, dmat.avg=dmat.avg) return( out ) } dmat.reduce <- function( tmpMat, dim, dcut ) { newTmp <- matrix(0,nrow=dim, ncol=dim) g <- which( tmpMat <= dcut, arr.ind = TRUE) a <- as.vector(g[,1]) b <- as.vector(g[,2]) for ( j in 1:length(a) ) { newTmp[a[j],b[j]] = 1 } return( newTmp ) } dmat.merge <- function( tmpMat, resultMat ) { return( tmpMat+resultMat ) } dcm <- function( A, B, occupancy=0.5, diff.cut=1 ){ dim <- dim(A$dmat) new1 <- matrix(0,nrow=dim[1], ncol=dim[2]) g <- which( A$dmat >= occupancy, arr.ind = TRUE) a <- as.vector(g[,1]) b <- as.vector(g[,2]) for ( j in 1:length(a) ) { new1[a[j],b[j]] = 1 } new2 <- matrix(0,nrow=dim, ncol=dim) g <- which( B$dmat >= occupancy, arr.ind = TRUE) a <- as.vector(g[,1]) b <- as.vector(g[,2]) for ( j in 1:length(a) ) { new2[a[j],b[j]] = 1 } c <- new1-new2 avg <- abs(A$dmat.avg-B$dmat.avg) avg=avg*abs(c) new <- matrix(0, nrow=nrow(c), ncol=ncol(c)) new[which(avg>=diff.cut)]=1 c=c*new return(c) } dm.xyz2 <- function(xyz, grpby=NULL, scut=NULL, mask.lower=TRUE, inds=NULL) { ##-- New distance matrix function with 'grpby' option ## ndm(pdb$xyz, grpby=pdb$atom[,"resno"], scut=3) if( !is.vector(xyz) ) stop(" Input 'xyz' should be a numeric vector" ) ##- Full Distance matrix (could use 'dm' or 'dist.xyz') d <- as.matrix(dist(matrix(xyz, ncol = 3, byrow = TRUE))) ##- Mask lower.tri if( mask.lower ) d[lower.tri(d)] = NA ##- Mask concetive atoms if( is.null(grpby) ) { if (!is.null(scut)) d[diag.ind(d, n = scut)] = NA return(d) } else { ##- Group by concetive numbers in 'grpby' if( length(xyz) != (length(grpby)*3) ) stop("dimension miss-match in 'xyz' and 'grpby', check lengths") ##- Bounds of 'grpby' numbers inds <- bounds(grpby, dup=TRUE) nres <- nrow(inds) ##- Per-residue matrix m <- matrix(, ncol=nres, nrow=nres) ij <- pairwise(nres) ## Ignore concetive groups (residues) if (!is.null(scut)) ij <- ij[ij[,2]-ij[,1] > (scut-1),] ##- Min per residue for(k in 1 : nrow(ij) ) { m[ij[k,1],ij[k,2]] <- min( d[ (inds[ij[k,1],"start"]:inds[ij[k,1],"end"]), (inds[ij[k,2],"start"]:inds[ij[k,2],"end"])], na.rm=TRUE ) } if( !mask.lower ) m[lower.tri(m)] = t(m)[lower.tri(m)] return(m) } }