#datapath <- "/Users/edvinfuglebakk/projects/aromatic_residues_peripherial_binding/code/opm_statistics/results/simple_protrusion_analyzer"
datapath <- "/Users/edvinfuglebakk/projects/manuscripts/protrusions_overleaf_plos/figures"
outpath <- "/Users/edvinfuglebakk/projects/manuscripts/protrusions_overleaf_plos/"
#sse_data <- "/Users/edvinfuglebakk/projects/aromatic_residues_peripherial_binding/dbs/sse_data/opm_dssp_data.csv"
sse_data <- "/Users/edvinfuglebakk/projects/aromatic_residues_peripherial_binding/dbs/sse_data/opm_dssp_data.csv"
sse_data_alternate_set <- "/Users/edvinfuglebakk/projects/aromatic_residues_peripherial_binding/dbs/sse_data/alternate_sets_dssp_data.csv"

exp_classification_data <- "/Users/edvinfuglebakk/projects/aromatic_residues_peripherial_binding/dbs/exprimental_ibs/classification.csv"

load <- function(name){
	name <- paste(datapath, name, sep="/")
	return(data.frame(read.csv(name, strip=T, na.strings=c("None"))))
}

load_sse <- function(simplify=T, dssp=sse_data){
	sse <- data.frame(read.csv(dssp, strip=T, stringsAsFactors=F, header=T))
	sse[sse[,"sse"]=="T",][,"sse"] <- "Turn"
	sse[sse[,"sse"]=="S",][,"sse"] <- "Bend"
	sse[sse[,"sse"]=="None",][,"sse"] <- "Loop"
	sse[sse[,"sse"]=="B",][,"sse"] <- "Beta bridge"
	sse[sse[,"sse"]=="E",][,"sse"] <- "Beta ladder / sheet"
	sse[sse[,"sse"]=="G",][,"sse"] <- "Helix (3)"
	sse[sse[,"sse"]=="H",][,"sse"] <- "Helix"
	sse[sse[,"sse"]=="I",][,"sse"] <- "Helix (5)"
	
	if (simplify){
		sse[sse[,"sse"]=="Beta bridge",][,"sse"] <- "Beta"
		sse[sse[,"sse"]=="Beta ladder / sheet",][,"sse"] <- "Beta"
		sse[sse[,"sse"]=="Helix (3)",][,"sse"] <- "Helix"
		#sse[sse[,"sse"]=="Helix",][,"sse"] <- "Helix"
		sse[sse[,"sse"]=="Helix (5)",][,"sse"] <- "Helix"
	}
	
	sse[,"sse"] <- as.factor(sse[,"sse"])
	return(sse)
}

classification_periph <- load("classification_periph.csv")
classification_tm  <- load("classification_tm.csv")
structure_data_periph <- load("pdb_periph.csv")
structure_data_tm <- load("pdb_tm.csv")
residue_data_periph <- load("protrusions_periph.csv")
residue_data_tm <- load("protrusions_tm.csv")

experimental_periph_ref <- load("comp_exp_ref.csv")
experimental_periph <- load("comp_exp.csv")
manual_inspection_periph <- load("manual_inspection_set.csv")
classification_exp <- data.frame(read.csv(exp_classification_data, strip=T, stringsAsFactors=F, header=T, comment.char="#"))

classification_alternate_periph <- load("classification_periph_alternate.csv")
classification_alternate_reference  <- load("classification_reference_alternate.csv")
structure_data_alternate_periph <- load("pdb_periph_alternate.csv")
structure_data_alternate_reference <- load("pdb_reference_alternate.csv")
residue_data_alternate_periph <- load("protrusions_periph_alternate.csv")
residue_data_alternate_reference <- load("protrusions_reference_alternate.csv")

oligomerisation <- load("oligomerisation.csv")

get_classes <- function(classification, level){
	return(unique(classification[,level]))
}

get_pdbs <- function(classification, level, value){
	return(unique(classification[classification[,level]==value,][,"pdb"]))
}

distance <- function(r1, r2){
	return(as.numeric(((r1["x"]-r2["x"])**2 + (r1["y"]-r2["y"])**2 + (r1["z"]-r2["z"])**2)**.5))
}
min_distance <- function(r, data){
	diffs <- c()
	d <- data[data[,"structure"]==data[r,"structure"],]
	for(rn in rownames(d)){
		if (r!=rn){
			diffs <- c(diffs, distance(d[rn,], d[r,]))
		}
	}		
	return(min(diffs))
}
min_distances <- function(data){
	rr <- c()
	for(r in rownames(data)){
		rr <- c(rr, min_distance(r, data))
	}
	return(rr)
}

data_set_stats <- function(classification){
	print(length(unique(classification[,"family"])))
	print(length(unique(classification[,"pdb"])))
}
print("Families Peripheral")
data_set_stats(classification_periph)
print("Families Non bindin surfaces")
data_set_stats(classification_tm)
print("Families Peripheral P")
print(length(classification_alternate_periph[,1]))
print("Families Reference proteins")
print(length(classification_alternate_reference[,1]))

protrusion <- function(convhull=NULL, exposed=NULL, max_neighbours=NULL, min_neighbours=NULL, interface=NULL, max_min_pair_distance=NULL, type=NULL, min_clusterness=NULL, pre=NULL, min_facet_neighbours_ww_oct=NULL, min_facet_neighbours_ww_if=NULL, max_facet_neighbours_ww_if=NULL){
	f <- function(data){
		r <- data
		if(!is.null(pre)){
			r <- pre(data)
		}
		
		if(!is.null(type)){
			d <- r[rep(FALSE, length(r)),]
			for (t in type){
				d <- rbind(d, r[r[,"type"]==t,])
			}
			r <- d
		}
		
		if(!is.null(convhull)){
			if(convhull){
				r <- r[r[,"convhull"]==1,]
			}
			if(!convhull){
				r <- r[r[,"convhull"]==0,]
			}
			
		}
		if(!is.null(exposed)){
			if(exposed){
				r <- r[r[,"exposed"]==1,]
			}
			if(!exposed){
				r <- r[r[,"exposed"]==0,]
			}
			
		}
		
		if(!is.null(max_neighbours)){
			r <- r[r[,"neighbours"]<=max_neighbours,]
		}
		if(!is.null(min_neighbours)){
			r <- r[r[,"neighbours"]>=min_neighbours,]
		}
		if(!is.null(interface)){
			r <- r[r[,"interface"]==interface,]
		}

		if(!is.null(min_clusterness)){
			r <- r[r[,"clustnerness"]>=min_clusterness,]
		}

		if(!is.null(max_min_pair_distance)){
			md <- min_distances(r)
			r <- r[md <= max_min_pair_distance,]
		}
		if(!is.null(min_facet_neighbours_ww_oct)){
			r <- r[r[,"facet_neighbours_ww_oct"]>=min_facet_neighbours_ww_oct,]
		}
		if(!is.null(min_facet_neighbours_ww_if)){
			r <- r[r[,"facet_neighbours_ww_if"]>=min_facet_neighbours_ww_if,]
		}
		if(!is.null(max_facet_neighbours_ww_if)){
			r <- r[r[,"facet_neighbours_ww_if"]<=max_facet_neighbours_ww_if,]
		}
		return(r)
	}
	return(f)
}

facet_neighbours_filter <- function(column, min_neighbours){
	return(function(d){
		
		d <- d[d[,column]>=min_neighbours,]
		return(d)
	})
}

size_select <- function(size_bin, struct_data){
	sel <- struct_data[struct_data[,"size"]>size_bin[1],]
	sel <-sel[sel[,"size"]<=size_bin[2],]
	return(sel[,"structure"])
}

size_filter <- function(size_bin){
	if (is.null(size_bin)){
		return(function(x){return(x)})
	}
	pdbs <- unlist(size_select(size_bin, rbind(structure_data_periph, structure_data_tm)))
	filt <- function(residue_data){
		return(residue_data[residue_data[,"structure"] %in% pdbs,])
	}
	return(filt)
}

dssp_annotate <- function(data, dssp=sse_data){
	sse_data <- load_sse(dssp=dssp)
	one_chain <- data[is.na(data[,"chain"]),]
	rest <- data[!is.na(data[,"chain"]),]

	if (length(intersect(one_chain[,"structure"], rest[,"structure"]))){
		print("Error: Single chain filtering does some mistake")
		quit()
	}

	one_chain_merge <- merge(one_chain, sse_data[,c("pdbid","resnr","resid","aa","sse")], by.x=c("structure", "resid"), by.y=c("pdbid", "resid"))
	rest_merge <- merge(rest, sse_data, by.x=c("structure", "chain", "resid"), by.y=c("pdbid", "chain", "resid"))

	return(rbind(one_chain_merge, rest_merge))
}

dssp_select <- function(dssp_codes, dssp=sse_data){
	sse_data <- load_sse(dssp=dssp)
	f <- function(data){
		sse <- merge(data, sse_data, by.x=c("structure", "resid"), by.y=c("pdbid", "resid"))
		d <- sse[rep(F, length(sse[,1])),]
		for (cd in dssp_codes){
			d <- rbind(d, sse[sse[,"sse"]==cd,])
		}
		return(d)
	}
	return(f)
}

classification_aggregate <- function(classification, data, class, column, fun){
	d <- merge(classification, data, by.x="pdb", by.y="structure")
	groups = list(as.factor(d[,class]))
	agg <- aggregate(d[,column], by=groups,FUN=fun)
	colnames(agg) <- c(class, paste("aggregate(", column ,")", sep=""))
	return(agg)
}

classification_apply <- function(data, column, fun, level="family", funname="agg", altclass=NULL){

	if (is.null(altclass)){
		cl <- rbind(classification_periph, classification_tm)
	}
	else{
		cl <- altclass
	}
		
	agg <- classification_aggregate(cl, data, level, column, fun)
	colnames(agg) <- c(level, paste(funname, "(",column,")"))
	return(agg)
}

family_mean <- function(data, column){
	return(classification_apply(data, column, mean, "family", "mean"))
}

pdb_mean_alt <- function(data, column){
	return(classification_apply(data, column, mean, "pdb", "mean", altclass=rbind(classification_alternate_periph, classification_alternate_reference)))
}

family_mean_exp <- function(data, column){
	return(classification_apply(data, column, mean, "family", "mean", altclass=classification_exp))
}

family_median <- function(data, column){
	return(classification_apply(data, column, median, "family", "median"))
}

superfamily_mean <- function(data, column){
	return(classification_apply(data, column, mean, "superfamily", "mean"))
}

class_mean <- function(data, column){
	return(classification_apply(data, column, mean, "class", "mean"))
}

representative_selector <- function(level, n=1, altclass=NULL){
	set.seed(112)
	
	if(is.null(altclass)){
		cl <- rbind(classification_periph, classification_tm)
	}
	else{
		cl <- altclass
	}
	
	cl[,"pdb"] <- as.character(cl[,"pdb"])
	pdbs <- c()
	for (a in unique(cl[,level])){
		pdbs <- c(pdbs, sample(cl[cl[,level]==a,][,"pdb"],n)[1])
	}
	sel <- function(set){
		return(set[set[,"structure"] %in% pdbs,])
	}
	return(sel)
}

strat_level <- "family"
strat_mean <- family_mean
strat_median <- family_median
strat_sel <- representative_selector(strat_level)
#strat_avv <- function(data, column){return(family_mean(strat_sel(data), column))}

strat_sel_alt <- representative_selector("pdb", altclass=rbind(classification_alternate_periph, classification_alternate_reference))
strat_avg_alt <- pdb_mean_alt

strat_mean_exp <- family_mean_exp

family_median <- function(data, column){
	return(classification_apply(data, column, median, "family", "median"))
}
count_classes <- function(residue_data, level){
	cl <- rbind(classification_periph, classification_tm)
	d <- merge(cl, residue_data, by.x="pdb", by.y="structure")
	return(length(unique(d[,level])))
}
counts <- function(vec, column, name="counts"){
	t <- data.frame(table(vec[,column]))
	colnames(t) <- c(column, name)
	return(data.frame(t))
}

surface_density <- function(residue_data, protrusions, ref){
	d <- counts(protrusions(residue_data), "structure")
	ref <- counts(ref(residue_data), "structure", "refcounts")
	d <- merge(d, ref, by="structure")
	d[,"surface_density"] <- d[,"counts"] / d[,"refcounts"]
	d[is.nan(d[,"surface_density"]),][,"surface_density"] <- d[is.nan(d[,"surface_density"]),][,"refcounts"]
	return(d)
}

color_tm <- function(transparent=T){
	if(transparent){
		return("#ca002099")
	}
	else{
		return("#ca0020FF")
	}
}
color_periph <- function(transparent=T){
	if(transparent){
		return("#0571b099")
	}
	else{
		return("#0571b0FF")
	}
}
color_ppi <- function(transparent=T){
	if(transparent){
		return("#0000ff22")
	}
	else{
		return("#0000ff00")
	}
}
aa_pch <- function(x){
	if (x %in% c("TRP")){
		return(0)
	}
	else if (x %in% c("LEU")){
		return(16)
	}
	else if (x %in% c("ILE")){
		return(17)
	}
	else if (x %in% c("TYR")){
		return(1)
	}	
	else if(x %in% c("CYS")){
		return(2)
	}
	else if(x %in% c("MET")){
		return(5)
	}
	else if(x %in% c("PHE")){
		return(15)
	}
	else if(x %in% c("VAL")){
		return(4)
	}
	
	else{
		print("unreq. aa")
		exit
	}
}



compare_prot_densities <- function(protrusion, ref, set1, set2, strat_stat=strat_mean){
	densities1 <- surface_density(set1,  protrusion, ref)
	densities2 <- surface_density(set2,  protrusion, ref)
	print(ks.test(unlist(strat_stat(densities2, "surface_density")[,2]), unlist(strat_stat(densities1, "surface_density")[,2]), alternative="greater"))
}

binned_protrusions_counts <- function(protrusions, ref, lower, upper, set1, set2){
	tp <- counts(protrusions(set2), "structure")
	tr <- counts(ref(set2), "structure", "selsize")
	tm <- merge(tr, tp, by="structure")

	pp <- counts(protrusions(set1), "structure")
	pr <- counts(ref(set1), "structure", "selsize")
	periph <- merge(pr, pp, by="structure")
	
	pp <- periph[periph[,"selsize"]>=lower,]
	pp <- pp[pp[,"selsize"]<upper,]
		
	tp <- tm[tm[,"selsize"]>=lower,]
	tp <- tp[tp[,"selsize"]<upper,]
	
	return(list(periph=pp, tm=tp))

}

protrusion_exists_barplot <- function(protrusions, ref, title="", start=0, end=110, inc=30, limit=0, set1=residue_data_periph, set2=residue_data_tm, strat_stat=strat_mean){

	lower <- seq(start, end-inc, inc)
	upper <- seq(start+inc, end, inc)
	labels <- c()
	frac_periph <- c()
	u_ci_periph <- c()
	l_ci_periph <- c()
	frac_tm <- c()
	u_ci_tm <- c()
	l_ci_tm <- c()
	for (i in 1:length(lower)){
		labels <- c(labels, paste(lower[i], "-", upper[i]))
		r <- binned_protrusions_counts(protrusions, ref, lower[i], upper[i], set1, set2)
		fmp <- strat_stat(r$periph, "counts")[,2]
		fmt <- strat_stat(r$tm, "counts")[,2]

		t_p <- prop.test(length(fmp[fmp>limit]), length(fmp))
		t_t <- prop.test(length(fmt[fmt>limit]), length(fmt))
		
		frac_periph <- c(frac_periph, t_p$estimate)
		u_ci_periph <- c(u_ci_periph, t_p$conf.int[2])
		l_ci_periph <- c(l_ci_periph, t_p$conf.int[1])
		frac_tm <- c(frac_tm, t_t$estimate)
		u_ci_tm <- c(u_ci_tm, t_t$conf.int[2])
		l_ci_tm <- c(l_ci_tm, t_t$conf.int[1])
		
		
		#l <- list(fmt, fmp)
		#boxplot(l, col=c(color_tm(), color_periph()), main=paste(lower[i], "-", upper[i]))
		
	}
	barcenters <- barplot(t(as.matrix(data.frame(list(periph=frac_periph, tm=frac_tm)))), beside=T, col=c(color_periph(), color_tm()), names.arg=labels, ylim=c(0,1), xlab="# globally accessible resiudes", ylab="fraction containing hydrophobic protrusions")
	segments(barcenters[1,], u_ci_periph, barcenters[1,], l_ci_periph)
	segments(barcenters[2,], u_ci_tm, barcenters[2,], l_ci_tm)
}

coins_dependence <- function(co_ins_protrusions_generator, ref, title="", start=10, end=91, inc=40, coins_min=0, coins_max=5, limit=0.5, set1=residue_data_periph, set2=residue_data_tm, strat_stat=strat_mean, legend_title="hull size"){
	
	lower <- seq(start, end-inc, inc)
	upper <- seq(start+inc, end, inc)
	labels <- c()
	
	coins_range <- coins_min:coins_max	

	get_data <- function(i){
		frac_periph <- c()
		u_ci_periph <- c()
		l_ci_periph <- c()
		frac_tm <- c()
		u_ci_tm <- c()
		l_ci_tm <- c()

		for (coins in coins_range){
			protrusions <- co_ins_protrusions_generator(coins)
			
			r <- binned_protrusions_counts(protrusions, ref, lower[i], upper[i], set1, set2)

			r$periph[,"counts"] <- r$periph[,"counts"]>=1
			r$tm[,"counts"] <- r$tm[,"counts"]>=1

			fmp <- strat_stat(r$periph, "counts")[,2]
			fmt <- strat_stat(r$tm, "counts")[,2]

			t_p <- prop.test(length(fmp[fmp>limit]), length(fmp))
			t_t <- prop.test(length(fmt[fmt>limit]), length(fmt))
		
			frac_periph <- c(frac_periph, t_p$estimate)
			u_ci_periph <- c(u_ci_periph, t_p$conf.int[2])
			l_ci_periph <- c(l_ci_periph, t_p$conf.int[1])
			frac_tm <- c(frac_tm, t_t$estimate)
			u_ci_tm <- c(u_ci_tm, t_t$conf.int[2])
			l_ci_tm <- c(l_ci_tm, t_t$conf.int[1])
			
		}
		
		l <- list(frac_periph=frac_periph, frac_tm=frac_tm, u_ci_periph=u_ci_periph, l_ci_periph=l_ci_periph, u_ci_tm=u_ci_tm, l_ci_tm=l_ci_tm)
		return(l)
	}

	l <- get_data(1)
	tot_upper_tm <- l$u_ci_tm
	tot_upper_periph <- l$u_ci_periph
	tot_lower_tm <- l$l_ci_tm
	tot_lower_periph <- l$l_ci_periph

	for (i in 2:length(lower)){

		l <- get_data(i)
		tot_upper_tm <- pmax(l$u_ci_tm, tot_upper_tm)
		tot_upper_periph <- pmax(l$u_ci_periph, tot_upper_periph)
		tot_lower_tm <- pmin(l$l_ci_tm, tot_lower_tm)
		tot_lower_periph <- pmin(l$l_ci_periph, tot_lower_periph)

	}

	plot(coins_range, coins_range, ylim=c(0,1), xlab="min # co-insertable", ylab="fraction of families", type="n", main=title)

	polygon(c(coins_range, rev(coins_range)), c(tot_lower_periph, rev(tot_upper_periph)), col="lightgrey", border=F)
	polygon(c(coins_range, rev(coins_range)), c(tot_lower_tm, rev(tot_upper_tm)), col="lightgrey", border=F)


	for (i in 1:length(lower)){
		
		labels <- c(labels, paste(lower[i], "-", upper[i]))

		l <- get_data(i)
				
		points(coins_range, l$frac_periph, col=color_periph(F), type="b", pch=i)
		points(coins_range, l$frac_periph, col="black", type="p", pch=i)
		
		points(coins_range, l$frac_tm, col=color_tm(F), type="b", pch=i)
		points(coins_range, l$frac_tm, col="black", type="p", pch=i)
		
		
	}
	legend("topright", legend=labels, pch=seq(1,length(lower)), title=legend_title, bty="n")
}

coins_comparison <- function(prot, coins, ref, range=NULL, set1=residue_data_periph, set2=residue_data_tm, strat_stat=strat_mean, title="", ylab="", ticklabels=T){

	get_data <- function(protrusion){
		frac_periph <- c()
		u_ci_periph <- c()
		l_ci_periph <- c()
		frac_tm <- c()
		u_ci_tm <- c()
		l_ci_tm <- c()

		r <- binned_protrusions_counts(protrusion, ref, range[1], range[2], set1, set2)
		r$periph[,"counts"] <- r$periph[,"counts"]>=1
		r$tm[,"counts"] <- r$tm[,"counts"]>=1

		fmp <- strat_stat(r$periph, "counts")[,2]
		fmt <- strat_stat(r$tm, "counts")[,2]
		t_p <- prop.test(sum(fmp), length(fmp))
		t_t <- prop.test(sum(fmt), length(fmt))
		
		l <- list(frac_periph=t_p$estimate, frac_tm=t_t$estimate, u_ci_periph=t_p$conf.int[2], l_ci_periph=t_p$conf.int[1], u_ci_tm=t_t$conf.int[2], l_ci_tm=t_t$conf.int[1])
		return(l)
	}

	protdata <- get_data(prot)
	coinsdata <- get_data(coins)
	estimates <- matrix(c(protdata$frac_periph, protdata$frac_tm, coinsdata$frac_periph, coinsdata$frac_tm), ncol=2, byrow=F)

	barcenters <- barplot(estimates, names=c("Isolated", "Co-ins."), col=c(color_periph(), color_tm()), ylim=c(0,1.0), beside=T, main=title, ylab=ylab, yaxt="n", cex.names=0.7)
	ticks = c(0.0,0.2,0.4,0.6,0.8,1.0)
	axis(2, at=ticks, labels=F)
	if (ticklabels){
		axis(2, at=ticks, labels=ticks)
	}
	print("coins_comparison")
	print(estimates)
	lower <- c(protdata$l_ci_periph, protdata$l_ci_tm)
	upper <- c(protdata$u_ci_periph, protdata$u_ci_tm)
	segments(barcenters[,1], lower, barcenters[,1], upper)
	arrows(barcenters[,1], lower, barcenters[,1], upper, code=3, angle=90, length=0.05)

	lower <- c(coinsdata$l_ci_periph, coinsdata$l_ci_tm)
	upper <- c(coinsdata$u_ci_periph, coinsdata$u_ci_tm)
	segments(barcenters[,2], lower, barcenters[,2], upper)
	arrows(barcenters[,2], lower, barcenters[,2], upper, code=3, angle=90, length=0.05)

}

simple_mean_counts <- function(protrusion, title="", set1=residue_data_periph, set2=residue_data_tm, strat_stat=strat_mean){
	c_tm <- counts(protrusion(set2), "structure")
	avg_tm <- strat_stat(c_tm, "counts")[,2]
	
	c_p <- counts(protrusion(set1), "structure")
	avg_p <- strat_stat(c_p, "counts")[,2]
	
	breaks <- seq(0,max(avg_tm, avg_p)+10, 10)
	
	hist(avg_p, breaks=breaks, col=color_periph(), main=title, xlab="size", ylab="counts")
	hist(avg_tm, breaks=breaks, col=color_tm(), add=T)

}

sizes <- function(){

	hist(structure_data_periph[,"surface_size_selected"], col=color_periph(), main="surface size", breaks=seq(0,5000, 10), xlim=c(0,1000), freq=F)
	hist(structure_data_tm[,"surface_size_selected"], col=color_tm(), add=T, breaks=seq(0,5000, 10), freq=F)

	hist(structure_data_periph[,"size"], col=color_periph(), main="size", freq=F)
	hist(structure_data_tm[,"size"], col=color_tm(), add=T, freq=F)

}

size_prot <- function(prot, markers=NULL, set1=residue_data_periph, set2=residue_data_tm, xlab="hull size"){
	
	prot_periph <- counts(prot(set1), "structure")
	prot_tm <- counts(prot(set2), "structure")
		
	ma <- max(prot_tm[,"counts"], prot_periph[,"counts"])
	step=5
	breaks <- seq(0, ma+step, step)

	hp <- hist(prot_periph[,"counts"], breaks=breaks, plot=F)
	ht <- hist(prot_tm[,"counts"], breaks=breaks, plot=F)
	yl <- max(hp$counts, ht$counts)
	plot(hp, col=color_periph(), main="", ylab="# proteins", xlab=xlab, ylim=c(0,yl))
	plot(ht, col=color_tm(), add=T)
	
	if(!is.null(markers)){
		for (m in markers){
			abline(v=m, lty=2)
		}
	}

}
size_prot_scatter <- function(prot, ref, set1=residue_data_periph, set2=residue_data_tm, avg=strat_mean){
	
	prot_periph <- counts(prot(set1), "structure")
	ref_periph <- counts(ref(set1), "structure")
	prot_tm <- counts(prot(set2), "structure")
	ref_tm <- counts(ref(set2), "structure")
		
		
	periph <- avg(prot_periph, "counts")[,2]
	periph_ref <- avg(ref_periph, "counts")[,2]

	tm <- avg(prot_tm, "counts")[,2]
	tm_ref <- avg(ref_tm, "counts")[,2]

	ref <- c(periph_ref, tm_ref)
	prot <- c(periph, tm)

	plot(ref, prot, type="n")
	points(periph_ref, periph, col=color_periph())
	points(tm_ref, tm, col=color_tm())
}

plot_see <- function(protrusions, reference, dssp=sse_data){
	sse_data <- load_sse(dssp=dssp)
	sse <- merge(residue_data_periph, sse_data, by.x=c("structure", "resid"), by.y=c("pdbid", "resid"))
	ref_sse <- reference(sse)
	bf <- prop.table(table(ref_sse[,"sse"]))
	prot_sse <- protrusions(sse)
	af <- prop.table(table(prot_sse[,"sse"]))
	ins_sse <- prot_sse[prot_sse[,"insertion"] > -1.0,]
	inf <- prop.table(table(ins_sse[,"sse"]))
	print(bf)
	print(af)
	print(inf)
}

plot_aa_odds_ratio <- function(reference, set1=residue_data_periph, set2=residue_data_tm, ylim=NULL, strat_stat=strat_mean){
	library(epitools)
		
	aa <- unique(set1[,"type"])
	log_or <- c()
	lower_ci <- c()
	upper_ci <- c()
	for(a in aa){
		set_periph <- reference(set1)
		
		periph_counts <- counts(set_periph[set_periph[,"type"]==a,], "structure")
		periph_comp <- counts(set_periph[set_periph[,"type"]!=a,], "structure")

		set_tm <- reference(set2)
		tm_counts <- counts(set_tm[set_tm[,"type"]==a,], "structure")
		tm_comp <- counts(set_tm[set_tm[,"type"]!=a,], "structure")
		
		hfprot_periph <- sum(strat_stat(periph_counts, "counts")[,2])
		comp_periph <- sum(strat_stat(periph_comp, "counts")[,2])
		hfprot_tm <- sum(strat_stat(tm_counts, "counts")[,2])
		comp_tm <- sum(strat_stat(tm_comp, "counts")[,2])
		
		tt <- c(comp_tm, hfprot_tm, comp_periph, hfprot_periph)
		r <- oddsratio(tt, method="wald")
		d <- log(r$measure["Exposed2", "estimate"])
		lci <- log(r$measure["Exposed2", "lower"])
		uci <- log(r$measure["Exposed2", "upper"])

		log_or <- c(log_or, d)
		lower_ci <- c(lower_ci, lci)
		upper_ci <- c(upper_ci, uci)
		
	}
	f <- data.frame(list(log_or=log_or, lower_ci=lower_ci, upper_ci=upper_ci, names=aa))
	f <- f[with(f, order(-f[,"log_or"])),]
	
	wmcol <- function(x){
		if (x %in% c("TRP", "PHE", "TYR", "LEU", "ILE", "CYS", "MET")){
			return("grey")
		}
		else{
			return("white")
		}
	}
	if (is.null(ylim)){
		ylim=c(min(lower_ci), max(upper_ci))
	}
	
	barcenters <- barplot(f[,"log_or"], names.arg=f[,"names"], las=2, ylim=ylim, col=sapply(f[,"names"], wmcol), ylab="ln(R)", cex.names=0.7)
	segments(barcenters, f[,"lower_ci"], barcenters, f[,"upper_ci"])
	arrows(barcenters, f[,"lower_ci"], barcenters, f[,"upper_ci"], code=3, angle=90, length=0.05)
}

neighbour_vs_odds_ratio <- function(ref, hf, start=0, step=6, end=39, title="", set1=residue_data_periph, set2=residue_data_tm, strat_stat=strat_mean, column="neighbours", ylim=c(-1.5, 1.5), ylab="ln(R)"){
	library(epitools)

	lower <- seq(start, end, step)
	upper <- seq(start+step-1, end+step-1, step)

	log_or <- c()
	assa <- c()
	lower_ci <- c()
	upper_ci <- c()
	labels <- c()
	for (i in 1:length(lower)){
		set_periph <- ref(set1[set1[,column] %in% seq(lower[i], upper[i]),])
		set_tm <- ref(set2[set2[,column] %in% seq(lower[i], upper[i]),])

		periph_counts <- counts(set_periph[set_periph[,"type"] %in% hf,], "structure")
		periph_comp <- counts(set_periph, "structure")
		
		tm_counts <- counts(set_tm[set_tm[,"type"] %in% hf,], "structure")
		tm_comp <- counts(set_tm, "structure")
		
		hfprot_periph <- sum(strat_stat(periph_counts, "counts")[,2])
		comp_periph <- sum(strat_stat(periph_comp, "counts")[,2])
		hfprot_tm <- sum(strat_stat(tm_counts, "counts")[,2])
		comp_tm <- sum(strat_stat(tm_comp, "counts")[,2])
		
		tt <- c(comp_tm - hfprot_tm, hfprot_tm, comp_periph - hfprot_periph, hfprot_periph)
		r <- oddsratio(tt, method="wald")
		d <- log(r$measure["Exposed2", "estimate"])
		lci <- log(r$measure["Exposed2", "lower"])
		uci <- log(r$measure["Exposed2", "upper"])
		

		log_or <- c(log_or, d)
		lower_ci <- c(lower_ci, lci)
		upper_ci <- c(upper_ci, uci)
		labels <- c(labels, paste(lower[i], "-", upper[i]))
		
	}
	barcenters <- barplot(log_or, names.arg=labels, las=1, ylim=ylim, main=title, ylab=ylab, xlab="", cex.names=0.6)
	segments(barcenters, lower_ci, barcenters, upper_ci)
	arrows(barcenters, lower_ci, barcenters, upper_ci, code=3, angle=90, length=0.05)
}

betacol <- "#4daf4a99"
helixcol <- "#984ea399"
loopcol <- "#ffff3399"
turncol <- "#ff7f0099"
bendcol <- "#a6562899"
ssecols <- c(loopcol, turncol, bendcol, helixcol, betacol)
sseorder <- c("Loop", "Turn", "Bend",  "Helix", "Beta")

sse_vs_odds_ratio <- function(ref, hf, title="", set1=residue_data_periph, set2=residue_data_tm, strat_stat=strat_mean, ylab="ln(R)", dssp=sse_data, ylim=c(-1.0,1.8), legend.labels=T){
	tm_dssp <- dssp_annotate(set2, dssp=dssp)
	tm_dssp[,"sse"] <- as.factor(tm_dssp[,"sse"])
	periph_dssp <- dssp_annotate(set1, dssp=dssp)
	periph_dssp[,"sse"] <- as.factor(periph_dssp[,"sse"])

	library(epitools)

	log_or <- c()
	assa <- c()
	lower_ci <- c()
	upper_ci <- c()
	labels <- c()
	sses <- sseorder
	for (l in sses){

		set_periph <- ref(periph_dssp[periph_dssp[,"sse"] == l,])
		set_tm <- ref(tm_dssp[tm_dssp[,"sse"] == l,])

		periph_counts <- counts(set_periph[set_periph[,"type"] %in% hf,], "structure")
		periph_comp <- counts(set_periph, "structure")

		tm_counts <- counts(set_tm[set_tm[,"type"] %in% hf,], "structure")
		tm_comp <- counts(set_tm, "structure")

		hfprot_periph <- sum(strat_stat(periph_counts, "counts")[,2])
		comp_periph <- sum(strat_stat(periph_comp, "counts")[,2])
		hfprot_tm <- sum(strat_stat(tm_counts, "counts")[,2])
		comp_tm <- sum(strat_stat(tm_comp, "counts")[,2])

		tt <- c(comp_tm - hfprot_tm, hfprot_tm, comp_periph - hfprot_periph, hfprot_periph)
		r <- oddsratio(tt, method="wald")
		d <- log(r$measure["Exposed2", "estimate"])
		lci <- log(r$measure["Exposed2", "lower"])
		uci <- log(r$measure["Exposed2", "upper"])

		log_or <- c(log_or, d)
		lower_ci <- c(lower_ci, lci)
		upper_ci <- c(upper_ci, uci)
		labels <- c(labels, l)
		
	}
	if (!legend.labels){
		labels <- F
	}
	barcenters <- barplot(log_or, col=ssecols, names.arg=labels, las=1, main=title, ylab=ylab, xlab="", las=2, ylim=ylim, yaxt="n")
	ticks=c(-1.0,0.0,0,1)
	axis(2, at=ticks, las=2)
	segments(barcenters, lower_ci, barcenters, upper_ci)
	arrows(barcenters, lower_ci, barcenters, upper_ci, code=3, angle=90, length=0.05)
}

coins_vs_sse <- function(prot, start=0, step=1, end=3, set1=residue_data_periph, strat_stat=strat_mean, ylab="# protrusions", legend.text=T, ylim=NULL, dssp=sse_data){
	periph_dssp <- dssp_annotate(set1, dssp=dssp)
	periph_dssp[,"sse"] <- as.factor(periph_dssp[,"sse"])
	
	lower <- seq(start, end, step)
	upper <- seq(start+step-1, end+step-1, step)
	
	sses <- unique(periph_dssp[,"sse"])
	sses <- sseorder
	tab <- NULL
	labels <- c()
	
	for (i in 1:length(lower)){
		set <- prot(periph_dssp)
		set <- set[set[,"facet_neighbours_ww_if"] %in% seq(lower[i], upper[i]),]
		
		proteindata <- data.frame(structure=unlist(unique(periph_dssp[,"structure"])))
		for (sse in sses){
			co <- c()
			for (pdb in proteindata[,"structure"]){
				resd <- set[set[,"structure"]==pdb,]
				co <- c(co, sum(resd[,"sse"]==sse))
			}
			proteindata[,sse] <- co
		}
		sse_total <- c()
		for (sse in sses){
			sse_total <- c(sse_total, sum(strat_stat(proteindata, sse)[,2]))
		}

		if (is.null(tab)){
			tab <- sse_total
		}
		else{
			tab <- rbind(tab, sse_total)
		}
		labels <- c(labels, lower[i])
	}
	row.names(tab) <- labels
	colnames(tab) <- sses
		
	barplot(t(tab), beside=T, las=1, col=ssecols, xlab="# co-insertable", ylab=ylab, legend.text=legend.text, ylim=ylim, args.legend=list(bty="n", x="topright", xpd=T))
}

coins_comparison_sse <- function(prot, coins, set1=residue_data_periph, strat_stat=strat_mean, ylab="Ns", legend.text=T, ylim=NULL, dssp=sse_data){
	
	periph_dssp <- dssp_annotate(set1, dssp=dssp)
	periph_dssp[,"sse"] <- as.factor(periph_dssp[,"sse"])
		
	sses <- sseorder
	
	get_data <- function(selector){
		set <- selector(periph_dssp)
		
		proteindata <- data.frame(structure=unlist(unique(periph_dssp[,"structure"])))
		for (sse in sses){
			co <- c()
			for (pdb in proteindata[,"structure"]){
				resd <- set[set[,"structure"]==pdb,]
				co <- c(co, sum(resd[,"sse"]==sse))
			}
			proteindata[,sse] <- co
		}
		sse_total <- c()
		for (sse in sses){
			sse_total <- c(sse_total, sum(strat_stat(proteindata, sse)[,2]))
		}

		tab <- sse_total
		return(tab)
	}
	tab <- rbind(get_data(prot), get_data(coins))
	rownames(tab) <- c( "Isolated", "Co-ins.")
	colnames(tab) <- sses
	print(tab)
	barplot(t(tab), beside=T, las=1, col=ssecols, xlab="", ylab=ylab, legend.text=legend.text, ylim=ylim, args.legend=list(bty="n", x="topright", xpd=T, inset=c(-0.6,-0.2)))
}

assa_vs_neigbour <- function(prot, title){
	plot(prot(residue_data_periph)[,c("sc_assa", "neighbours")])
}

surface_proprotions <- function(prot, ref, aa, set1=residue_data_periph, set2=residue_data_tm, title="", strat_stat=strat_mean, ylab=""){
	library(epitools)
	
	prot_periph <- prot(set1)
	ref_periph <- ref(set1)
	
	prot_tm <- prot(set2)
	ref_tm <- ref(set2)
	
	prop_periph <- c()
	u_ci_periph <- c()
	l_ci_periph <- c()
	prop_tm <- c()
	u_ci_tm <- c()
	l_ci_tm <- c()
	or <- c()
	for (a in aa){
		
		count_tp <- strat_stat(counts(prot_periph[prot_periph[,"type"]==a,], "structure"), "counts")
		ref_count_tp <- strat_stat(counts(ref_periph, "structure"), "counts")

		tp <- merge(count_tp, ref_count_tp, by=colnames(count_tp)[1])
		tp <- tp[tp[,3]>0,]
		ctp <- sum(tp[,2])
		rtp <- sum(tp[,3])
		

		count_tm <- strat_stat(counts(prot_tm[prot_tm[,"type"]==a,], "structure"), "counts")
		ref_count_tm <- strat_stat(counts(ref_tm, "structure"), "counts")

		tm <- merge(count_tm, ref_count_tm, by=colnames(count_tm)[1])
		tm <- tm[tm[,3]>0,]
		ctm <- sum(tm[,2])
		rtm <- sum(tm[,3])
		

		t_p <- prop.test(ctp, rtp)
		t_t <- prop.test(ctm, rtm)

		prop_periph <- c(prop_periph, t_p$estimate)
		u_ci_periph <- c(u_ci_periph, t_p$conf.int[2])
		l_ci_periph <- c(l_ci_periph, t_p$conf.int[1])
		prop_tm <- c(prop_tm, t_t$estimate)
		u_ci_tm <- c(u_ci_tm, t_t$conf.int[2])
		l_ci_tm <- c(l_ci_tm, t_t$conf.int[1])
		
		#
		# odds ratio for sorting barplot left to right
		#
		r <- oddsratio(c(rtm-ctm, ctm, rtp-ctp, ctp), method="wald")
		d <- r$measure["Exposed2", "estimate"]
		or <- c(or, d)
	}

	f <- data.frame(list(periph=prop_periph, tm=prop_tm, upper_periph=u_ci_periph, lower_periph=l_ci_periph, upper_tm=u_ci_tm, lower_tm=l_ci_tm, or=or))
	rownames(f) <- aa
	f <- f[with(f, order(- f[,"or"])),]

	m <- as.matrix(f[,c("periph", "tm")], ylab="fraction of protrusions")
	barcenters <- barplot(t(m), beside=T, las=2, legend.text=F, ylim=c(0, max(f[,"upper_periph"], f[,"upper_tm"])), main=title, col=c(color_periph(), color_tm()), ylab=ylab)
	segments(barcenters[1,], f[,"lower_periph"], barcenters[1,], f[,"upper_periph"])
	segments(barcenters[2,], f[,"lower_tm"], barcenters[2,], f[,"upper_tm"])

	arrows(barcenters[1,], f[,"lower_periph"], barcenters[1,], f[,"upper_periph"], code=3, angle=90, length=0.05)
	arrows(barcenters[2,], f[,"lower_tm"], barcenters[2,], f[,"upper_tm"], code=3, angle=90, length=0.05)
}

protrusion_aa_frequencies <- function(prot, ref, aa, set1=residue_data_periph, set2=residue_data_tm, title="", col=color_periph(F), col2=color_periph(F), ylab="prot", xlab="ref"){
	set1=strat_sel(set1)
	set2=strat_sel(set2)
	prot_periph <- prot(set1)
	ref_tm <- ref(set2)
	
	prop_periph <- c()
	ci_upper_periph <- c()
	ci_lower_periph <- c()
	prop_tm_ref <- c()
	ci_upper_ref <- c()
	ci_lower_ref <- c()
	
	for (a in aa){
		t_p <- prop.test(length(prot_periph[prot_periph[,"type"]==a,][,1]), length(prot_periph[,1]))
		
		prop_periph <- c(prop_periph, t_p$estimate)
		ci_upper_periph <- c(ci_upper_periph, t_p$conf.int[2])
		ci_lower_periph <- c(ci_lower_periph, t_p$conf.int[1])

		t_tr <- prop.test(length(ref_tm[ref_tm[,"type"]==a,][,1]), length(ref_tm[,1]))
		prop_tm_ref <- c(prop_tm_ref, t_tr$estimate)
		ci_upper_ref <- c(ci_upper_ref, t_tr$conf.int[2])
		ci_lower_ref <- c(ci_lower_ref, t_tr$conf.int[1])
	}
	limm=c(0,max(ci_upper_ref, ci_upper_periph))
	pchs = sapply(aa, aa_pch)
	plot(prop_tm_ref, prop_periph, col="black", asp=1, xlim=limm, ylim=limm, pch=pchs, xlab=xlab, ylab=ylab, cex=2)
	#text(prop_periph, ci_upper_ref + 0.001, aa)
	#abline(0, 1)
	
	segments(prop_tm_ref, ci_upper_periph, prop_tm_ref, ci_lower_periph, col=col)
	segments(ci_lower_ref, prop_periph, ci_upper_ref, prop_periph, col=col2)
	#segments(barcenters[2,], f[,"lower_tm"], barcenters[2,], f[,"upper_tm"])
	#plot surface proportions of prot with different aa in periph and tm
	#add confidence intervals
	legend("topleft", legend=aa, pch=pchs, bty="n")
}

largest_facet_location <- function(prot, column, col=color_periph(), cum=T, ylim=c(0,1), strat_stat=strat_median, xticks=T, yticks=T){

	selection <- residue_data_periph
	prots <- prot(selection)	

	locations <- c()
	inside <- c()
	facet_neighbours <- c()
	pdbs <- unique(prots[,"structure"])
	for (p in pdbs){
		sd <- prots[prots[,"structure"]==p,]
		if (!is.null(column)){
			sd <- sd[with(sd, order( c(-sd[,column], sd[,"neighbours"]))),]
		}
		else{
			sd <- sd[sample(1:nrow(sd), 1),]
		}
		top <- sd[1,]
		locations <- c(locations, top[,"insertion"])
	}
	d <- data.frame(list(structure=pdbs, insertions=locations))
	d <- strat_stat(d, "insertions")
	br <- seq(-20.25,1.25,0.25)
	tp <- hist(d[,2], plot=F, breaks=br, right=F)
	tp$counts <- tp$counts/sum(tp$counts)
	if (cum){
		tp$counts <- rev(cumsum(rev(tp$counts)))
	}
	if (xticks){
		xlab="insertion (nm)"
	}
	else{
		xlab=""
	}
	if(yticks){
		ylab="fraction periph."
	}
	else{
		ylab=""
	}
	plot(tp, xlim=c(-3,1), col=col, ylab=ylab, xlab=xlab, ylim=ylim, main="", axes=F)
	ticks = seq(-3,1,1)
	axis(1, at=ticks, labels=F)
	if (xticks){
		axis(1, at=ticks, labels=ticks, las=1, cex.axis=0.8)
	}
	ticks = seq(0.1, ylim[2], ylim[2]/5)
	axis(2, at=ticks, labels=F)
	if (yticks){
		ticks = seq(0.1, ylim[2], 2*ylim[2]/5)
		axis(2, at=ticks, labels=ticks, las=2, cex.axis=0.8)
	}
	abline(v=0, lty=2)
}

conditional_hydrophobics_or <- function(set1=structure_data_periph, set2=structure_data_tm, strat_stat=strat_mean, names=c("", ""), ylab="ln(R)", ylim=c(-.6,1.1)){
	library(epitools)
	d_p <- set1
	d_t <- set2

	one_p <- sum(strat_stat(d_p,"facet_pairs_at_least_one_ww_if")[,2])
	two_p <- sum(strat_stat(d_p,"facet_pairs_two_ww_if")[,2])
	one_pr <- sum(strat_stat(d_p,"facet_pairs_at_least_one_ww_if_random")[,2])
	two_pr <- sum(strat_stat(d_p,"facet_pairs_two_ww_if_random")[,2])

	one_t <- sum(strat_stat(d_t,"facet_pairs_at_least_one_ww_if")[,2])
	two_t <- sum(strat_stat(d_t,"facet_pairs_two_ww_if")[,2])
	one_tr <- sum(strat_stat(d_t,"facet_pairs_at_least_one_ww_if_random")[,2])
	two_tr <- sum(strat_stat(d_t,"facet_pairs_two_ww_if_random")[,2])

	orp <- oddsratio(c(one_pr-two_pr, two_pr, one_p-two_p, two_p), method="wald")
	ort <- oddsratio(c(one_tr-two_tr, two_tr, one_t-two_t, two_t), method="wald")

	est <- log(c(orp$measure["Exposed2", "estimate"], ort$measure["Exposed2", "estimate"]))
	upper <- log(c(orp$measure["Exposed2", "upper"], ort$measure["Exposed2", "upper"]))
	lower <- log(c(orp$measure["Exposed2", "lower"], ort$measure["Exposed2", "lower"]))

	if (is.null(ylim)){
		ylim=c(min(0,min(lower)),max(upper))
	}

	barcenters <- barplot(est, col=c(color_periph(), color_tm()), ylim=ylim, xlab="", ylab=ylab, names.arg=names)
	segments(barcenters, lower, barcenters, upper)
	arrows(barcenters, lower, barcenters, upper, code=3, angle=90, length=0.05)
}

conditional_hydrophobics_scatter <- function(set1=structure_data_periph, set2=structure_data_tm, strat_stat=strat_mean, names=c("", ""), ylab="ln(R)"){
	library(epitools)
	d_p <- set1
	d_t <- set2

	pairs_p <- strat_stat(d_p,"facet_pairs")[,2]
	coins_p <- strat_stat(d_p,"facet_pairs_two_ww_if")[,2]

	pairs_t <- strat_stat(d_t,"facet_pairs")[,2]
	coins_t <- strat_stat(d_t,"facet_pairs_two_ww_if")[,2]

	step=100
	start=0
	end=300


	estimates_p <- c()
	estimates_t <- c()
	for (s in seq(start, end-step, step)){
		s1 <- set1[set1[,"facet_pairs"]>=s,]
		s1 <- s1[s1[,"facet_pairs"]<s+step,]
		s1[,"has_coins"] <- s1[,"facet_pairs_two_ww_if"] > 0
		mean_has_coins_p <- strat_stat(s1,"has_coins")[,2]
		
		s2 <- set2[set2[,"facet_pairs"]>=s,]
		s2 <- s2[s2[,"facet_pairs"]<s+step,]
		s2[,"has_coins"] <- s2[,"facet_pairs_two_ww_if"] > 0
		mean_has_coins_t <- strat_stat(s2,"has_coins")[,2]
		
		estimates_p <- c(estimates_p, mean(mean_has_coins_p))
		estimates_t <- c(estimates_t, mean(mean_has_coins_t))
	}
	plot(1:length(estimates_p), estimates_p, col=color_periph(T), ylim=c(0,max(estimates_p)))
	points(1:length(estimates_t), estimates_t, col=color_tm(T))
}

comv_exp_cosines_hist <- function(experimental_periph_set=experimental_periph){
	annotated_set <- merge(experimental_periph_set, classification_exp, by.x="structure", by.y="pdb")
	annotated_set[,"manual"] <- annotated_set[,"structure"] %in% manual_inspection_periph[,"structure"]	
	annotated_set <- annotated_set[annotated_set[,"manual"]==F,]
	
annotated_set[,"angle"] <- acos(annotated_set[,"cosine"])*180/pi
	hist(annotated_set[,"angle"], breaks=c(0,90,180), freq=T)
}

#comv_exp_cosines_hist()
#comv_exp_cosines_hist(experimental_periph_ref)


comp_v_exp_cosines <- function(experimental_periph_set=experimental_periph, legend=T){
	annotated_set <- merge(experimental_periph_set, classification_exp, by.x="structure", by.y="pdb")
	annotated_set[,"manual"] <- annotated_set[,"structure"] %in% manual_inspection_periph[,"structure"]	
	annotated_set <- annotated_set[annotated_set[,"manual"]==F,]
	
	annotated_set[,"structure"] <- droplevels(annotated_set[,"structure"])
	annotated_set <- annotated_set[with(annotated_set, order(annotated_set[,"structure"])),]
	
	nfams <- nlevels(structure)
	
	plotit <- function(data, add=F, col="black", pch=0, xaxt="n"){
		angle <- acos(data[,"cosine"])*180/pi
		struct <- data[,"structure"]

		if (legend){
			stripchart(angle~struct, col=col, las=2, add=add, pch=pch, ylim=c(0,180), yaxt=xaxt, vertical=T)
		}
		else{
			stripchart(angle~struct, col=col, las=2, add=add, pch=pch, ylim=c(0,180), yaxt=xaxt, xaxt="n", vertical=T)
		}	
	}
	
	plotit(annotated_set[annotated_set[,"manual"]==F,])
	
	missing <- annotated_set[is.na(annotated_set[,"cosine"]),]
	if (length(missing[,1]) > 0){
		missing[,"cosine"] <- -1
		plotit(missing, T, "black", pch=4)		
	}
		
	verified <- annotated_set[annotated_set[,"frac_predicted_verified"]==1,]
	plotit(verified, T, "red")

	#manual <- annotated_set[annotated_set[,"manual"]==T,]
	#plotit(manual, T, "orange")

	axis(2, at=c(0,45,90,135,180))
	abline(h=90, lty=2)
}

comp_v_exp_cosines_ref <- function(){
	annotated_set_ref <- merge(experimental_periph_ref, classification_exp, by.x="structure", by.y="pdb")
	annotated_set_ref[,"manual"] <- annotated_set_ref[,"structure"] %in% manual_inspection_periph[,"structure"]
	annotated_set_ref <- annotated_set_ref[annotated_set_ref[,"manual"]==F,]
	
	annotated_set_ref[,"structure"] <- droplevels(annotated_set_ref[,"structure"])
	annotated_set_ref <- annotated_set_ref[with(annotated_set_ref, order(annotated_set_ref[,"structure"])),]
	annotated_set_ref[,"angle"] <- acos(annotated_set_ref[,"cosine"])*180/pi
		
	angle <- annotated_set_ref[,"angle"]
	struct <- annotated_set_ref[,"structure"]
	nstruct <- length(levels(struct))
	boxplot(angle~struct, yaxt="n", outline=T, range=0, at=seq(1,nstruct), las=2)
	axis(2, at=c(90))
}

comp_v_exp_cosines_on_ref <- function(experimental_periph_set=experimental_periph, legend=T){
	
	annotated_set_ref <- merge(experimental_periph_ref, classification_exp, by.x="structure", by.y="pdb")
	annotated_set_ref[,"manual"] <- annotated_set_ref[,"structure"] %in% manual_inspection_periph[,"structure"]
	annotated_set_ref <- annotated_set_ref[annotated_set_ref[,"manual"]==F,]
	
	annotated_set_ref[,"structure"] <- droplevels(annotated_set_ref[,"structure"])
	annotated_set_ref <- annotated_set_ref[with(annotated_set_ref, order(annotated_set_ref[,"structure"])),]
	annotated_set_ref[,"angle"] <- acos(annotated_set_ref[,"cosine"])*180/pi
	
	annotated_set <- merge(experimental_periph_set, classification_exp, by.x="structure", by.y="pdb")
	annotated_set[,"manual"] <- annotated_set[,"structure"] %in% manual_inspection_periph[,"structure"]	
	annotated_set <- annotated_set[annotated_set[,"manual"]==F,]
	
	annotated_set[,"structure"] <- droplevels(annotated_set[,"structure"])
	
	annotated_set[,"order"] <- rank(annotated_set$cosine, ties.method="random")
	annotated_set <- annotated_set[order(annotated_set$order, decreasing=T),]
	
	annotated_set_ref <- merge(annotated_set_ref, annotated_set[,c("structure", "order")])
	annotated_set_ref <- annotated_set_ref[order(annotated_set_ref$order, decreasing=T),]

	angle <- annotated_set_ref[,"angle"]
	struct <- factor(annotated_set_ref[,"structure"], unique(annotated_set$structure))
	boxplot(angle~struct, yaxt="n", outline=T, range=0, las=2, border="gray", ylab="angle", xlab="protein")
	axis(2, at=c(0,45,90,135,180))
	abline(h=90, lty=2)
	
	
	nfams <- nlevels(structure)
	
	plotit <- function(data, col="black", pch=0, xaxt="n"){
		angle <- acos(data[,"cosine"])*180/pi
		struct <- factor(data[,"structure"], unique(annotated_set_ref$structure))

		stripchart(angle~struct, col=col, las=2, pch=pch, ylim=c(0,180), vertical=T, add=T)
	}
	
	plotit(annotated_set[annotated_set[,"manual"]==F & annotated_set[,"frac_predicted_verified"]<1,], pch=1)
	
	missing <- annotated_set[is.na(annotated_set[,"cosine"]),]
	if (length(missing[,1]) > 0){
		missing[,"cosine"] <- -1
		plotit(missing, pch=4)		
	}
		
	verified <- annotated_set[annotated_set[,"frac_predicted_verified"]==1,]
	plotit(verified, pch=8)

	#manual <- annotated_set[annotated_set[,"manual"]==T,]
	#plotit(manual, T, "orange")
}

plot_oligomerisation <- function(){
	diff <- oligomerisation[,"opm"] - oligomerisation[,"pisa"]
	hf <- hist(diff, plot=F, breaks=seq(-7.5,3.5,1))
	hf$counts <- hf$counts/sum(hf$counts)
	plot(hf, xlab="Difference in chain count", ylab="fraction of proteins", main="", col=color_periph(), cex.axis=0.8)
}

protrusion_density_histogram <- function(protrusions, ref, set, col="black", breaks=0:20/20, xmax=0.5, strat_stat=strat_mean, ylab="density", xlab = "mean fraction hydrophobic", density_tot=T, ylim=NULL, main="", xticks=T, yticks=T){
  densities <- surface_density(set,  protrusions, ref)
  
  binsize = breaks[2]-breaks[1]
  
  thist <- hist(unlist(strat_stat(densities, "surface_density")[,2]), breaks=breaks, plot=F, freq=F)
  
  leftout <- sum(thist$density[thist$breaks>xmax], na.rm=T)*binsize
  density.text = ""
  if (density_tot && leftout > 0){
    density.text = paste("density shown", format(1-leftout, digits=2))
  }
  
  thist$counts <- thist$counts / sum(thist$counts)
  
  plot(thist, col=col, xlab=xlab, ylab=ylab, xlim=c(0,xmax), ylim=ylim, main=main, axes=F)
  ticks <- seq(0,ylim[2], 0.2)
  axis(2, at=ticks, label=F)
  if (yticks){
    axis(2, at=ticks, label=ticks, las=2)
  }
  ticks <- c(0,0.25,0.5,0.75,1.0)
  lticks <- c(0,"",0.5,"",1.0)
  axis(1, at=ticks, label=F)
  if (xticks){
    axis(1, at=ticks, label=lticks)
  }
  
}

colwidth=6.68
tee_pdf <- function(filename, plotter, width=colwidth, height=colwidth, ps=10){
	m <- par(no.readonly=T)
	plotter()
	par(m)
	#par(ps=ps)
	filepath <- paste(outpath, filename, sep="/")
	pdf(filepath, width=width, height=height)
	plotter()
	dev.off()
	par(m)
	print(filename)
	print(warnings())
}

superhydrophobics=c("PHE", "TRP", "LEU", "ILE")
WIMLEY_WHITE_INTERFACIAL = c("TRP", "PHE", "TYR", "LEU", "ILE", "CYS", "MET")
WIMLEY_WHITE_OCT = c("TRP", "PHE", "TYR", "LEU", "ILE", "CYS", "MET", "VAL")
hydrophobics = WIMLEY_WHITE_INTERFACIAL
polar = c("SER", "THR", "ASN", "GLN")
non_hf = c("SER", "THR", "ASN", "GLN", "HIS", "ARG", "LYS", "ASP", "GLU")

exposed <- protrusion()
conc <- protrusion(convhull=F, exposed=T)
hull <- protrusion(convhull=T, type=NULL)
prot <- protrusion(max_neighbours=21, type=NULL, pre=hull)

flat <- conc
#flat <- protrusion(convhull=T, min_neighbours=28, type=NULL)

hf_exposed <- protrusion(type=hydrophobics, pre=exposed)
polar_exposed <- protrusion(type=polar, pre=exposed)

hf_prot <- protrusion(type=hydrophobics, pre=prot)
polar_prot <- protrusion(type=polar, pre=prot)

hf_hull <- protrusion(type=hydrophobics, pre=hull)
polar_hull <- protrusion(type=polar, pre=hull)
non_hf_hull <- protrusion(type=non_hf, pre=hull)

hf_flat <- protrusion(type=hydrophobics, pre=flat)
hf_conc <- protrusion(type=hydrophobics, pre=conc)

hf_prot_co <- function(limit){return(protrusion(min_facet_neighbours_ww_if=limit, pre=hf_prot))}
hf_prot_sole <- protrusion(max_facet_neighbours_ww_if=0, pre=hf_prot)


protrusion_density_histogram_panel <- function(periphset, refset, strat_stat, hist_ylim, labels=c("A", "B", "C", "D")){
  
  lf<-layout(matrix(c(1,2,3,4), 2, 2, byrow = TRUE))
  xmax=1.0
  par(mar=c(1,4,5,0))
  protrusion_density_histogram(hf_prot, prot, periphset, col=color_periph(), strat_stat=strat_stat, xmax=xmax, ylim=hist_ylim, main="protrusion", ylab="fraction periph.", xlab="", xticks=F)
  mtext(labels[1],3, adj=0)
  par(mar=c(1,3,5,1))
  protrusion_density_histogram(hf_exposed, exposed, periphset, col=color_periph(), strat_stat=strat_stat, xmax=xmax, ylim=hist_ylim, main="exposed", ylab="", xlab="", xticks=F, yticks=F)
  mtext(labels[2],3, adj=0)
  par(mar=c(5,4,1,0))
  protrusion_density_histogram(hf_prot, prot, refset, col=color_tm(), strat_stat=strat_stat, xmax=xmax, ylim=hist_ylim, ylab="fraction ref.", xlab="")
  mtext(expression(paste(hat(italic(f))["hydrophobe | protrusion"])), 1, family="serif", line=3)
  mtext(labels[3],3, adj=0)
  par(mar=c(5,3,1,1))
  protrusion_density_histogram(hf_exposed, exposed, refset, col=color_tm(), strat_stat=strat_stat, xmax=xmax, ylim=hist_ylim, ylab="", yticks=F, xlab="")
  mtext(expression(paste(hat(italic(f))["hydrophobe | exposed"])), 1, family="serif", line=3)
  mtext(labels[4],3, adj=0)
}

aa_comparison_panel <- function(periphset, refset, mean){
	lf<-layout(matrix(c(1,2), 2, 1, byrow = TRUE))
	par(mar=c(3,4.3,1,0))
	surface_proprotions(prot, prot, set1=periphset, set2=refset, aa=hydrophobics, strat_stat=mean, ylab="")
	mtext(expression(hat(F)["aa | protrusion"]), 2, family="serif", line=3)
	mtext("A",3, adj=0)
	par(mar=c(2.5,4.3,1,0))
	plot_aa_odds_ratio(prot, periphset, refset, strat_stat=mean)
	mtext("B",3, adj=0)
}

fig_coins_panel <- function(periphset, refset, mean, labels=c("A", "B", "C")){
	lf<-layout(matrix(c(1,2,1,2,3,3), 3, 2, byrow = TRUE))
	par(mar=c(2,5,2,0))
	coins_comparison(hf_prot_sole, hf_prot_co(1), prot, range=c(0,25), set1=periphset, set2=refset, strat_stat=mean, title="0-25")
	mtext(expression(hat(E)[paste("hydrophobe ", intersect(), " protrusion")]), 2, family="serif", line=3)
	mtext(labels[1],3, adj=0)
	par(mar=c(2,2,2,2))
	coins_comparison(hf_prot_sole, hf_prot_co(1), prot, range=c(25,50), set1=periphset, set2=refset, strat_stat=mean, title="25-50", ticklabels=F)
	mtext(labels[2],3, adj=0)
	par(mar=c(4.2,5,2,2))
	size_prot(prot, c(0,50), set1=periphset, set2=refset, xlab="# protrusions")
	mtext(labels[3],3, adj=0)
}

fig_neighbour_vs_odss_ratio <- function(periphset=residue_data_periph, refset=residue_data_tm, mean=strat_mean, ylim=c(-1.5,2)){
	par(mar=c(4,4.1,1,0))
	neighbour_vs_odds_ratio(hull, hydrophobics, 0, 7, 40, set1=periphset, set2=refset, strat_stat=mean, ylim=ylim)
	mtext("d", 1, family="serif", line=2)
	mtext("(local density)", 1, line=3)
}

fig_sse_panel <- function(periphset=residue_data_periph, refset=residue_data_tm, mean=strat_mean, dssp=sse_data){
	lf<-layout(matrix(c(1,2), 2, 1, byrow = TRUE))
	par(mar=c(3,4.1,1,5.1))
	coins_comparison_sse(hf_prot_sole, hf_prot_co(1), set1=periphset, strat_stat=mean, dssp=dssp, ylab="", legend.text=T)
	mtext(expression(N[s]), 2, family="serif", line=3)
	mtext("A",3, adj=0)
	par(mar=c(1.5,4.1,1,2.1))
	sse_vs_odds_ratio(prot, hydrophobics, set1=periphset, set2=refset, strat_stat=mean, dssp=dssp, ylim=c(-1.2,1.8), legend.labels=F)
	mtext("B",3, adj=0)
}

fig_compare_opm_panel <- function(median=strat_median, reference=prot){
	lf<-layout(matrix(c(1,2,3,4), 2, 2, byrow = TRUE))
	par(mar=c(1,4,5,0))
	largest_facet_location(hf_prot, "facet_neighbours_ww_if", cum=F, ylim=c(0,0.5), strat_stat=median, xticks=F)
	mtext("A",3, adj=0)
	par(mar=c(1,3,5,1))
	largest_facet_location(reference, NULL, col="white", cum=F, ylim=c(0,0.5), strat_stat=median, xticks=F, yticks=F)
	mtext("B",3, adj=0)
	par(mar=c(5,4,1,0))
	largest_facet_location(hf_prot, "facet_neighbours_ww_if", strat_stat=median)
	mtext("C",3, adj=0)
	par(mar=c(5,3,1,1))	
	largest_facet_location(reference, NULL, col="white", strat_stat=median, yticks=F)
	mtext("D",3, adj=0)
}

conditional_hydrophobics_panel <- function(set1=structure_data_periph, set2=structure_data_tm, strat_stat=strat_mean, names=c("Peripheral", "Reference")){
	par(mar=c(3,4,3,0))
	conditional_hydrophobics_or(set1=set1, set2=set2, strat_stat=strat_stat, names=names)
}

comp_v_exp <- function(){
	par(cex.axis=.8)
	comp_v_exp_cosines_on_ref()
}

compare_surface_plots <- function(mean=strat_mean, periphset=residue_data_periph, refset=residue_data_tm, suffix="", hist_ylim_prop_dens=c(0,0.6), labels4=c("A", "B", "C", "D"), labels3=c("A", "B", "C")){
	tee_pdf(paste("FigTmPeriphAaComparison", suffix, ".pdf", sep=""), function(){aa_comparison_panel(periphset=periphset, refset=refset, mean=mean)})
	tee_pdf(paste("FigProtrusionDensityHistograms", suffix, ".pdf", sep=""), function(){protrusion_density_histogram_panel(periphset=periphset, refset=refset, strat_stat=mean, hist_ylim=hist_ylim_prop_dens, labels=labels4)})	
	tee_pdf(paste("FigCoins", suffix, ".pdf", sep=""), function(){fig_coins_panel(mean=mean, periphset=periphset, refset=refset, labels=labels3)})
}

compare_opm_plots <- function(median=strat_median){
	tee_pdf("FigLargestFacetLocation.pdf", function(){fig_compare_opm_panel(median=median)})
}

characterize_protrusions_plots <- function(periphset=residue_data_periph, refset=residue_data_tm, periphstruct=structure_data_periph, refstruct=structure_data_tm, mean=strat_mean, suffix="", dssp=sse_data, setnames=c("Periphal", "Non-binding")){
	tee_pdf(paste("FigNeighboursVsOddsRatio", suffix, ".pdf", sep=""), function(){fig_neighbour_vs_odss_ratio(periphset=periphset, refset=refset, mean=mean, ylim=c(-1.5,2))}, height=0.7*colwidth)
	tee_pdf(paste("FigFacetPairConditionals", suffix, ".pdf", sep=""), function(){conditional_hydrophobics_panel(set1=periphstruct, set2=refstruct, strat_stat=mean, names=setnames)}, height=0.7*colwidth)	
	tee_pdf(paste("FigSSE", suffix, ".pdf", sep=""), function(){fig_sse_panel(periphset=periphset, refset=refset, mean=mean, dssp=dssp)})
}
compare_with_experiment_plots <- function(){
	tee_pdf("FigComparePredictionWithVerifiedCosine.pdf", function(){comp_v_exp()}, width=colwidth, height=colwidth)
}

misc_suppmat_plots <- function(){
	tee_pdf("FigCompareOligomerisation.pdf", plot_oligomerisation)
}

save_data_set <- function(residue_data, filename, comment){
	dset <- unique(residue_data[,"structure"])
	file <- paste(outpath, filename, sep="/")
	write(paste("#", comment), file=file)
	write.table(dset, file=file, append=T, col.names=F, row.names=F, quote=F)
}

save_data_sets <- function(){
	save_data_set(residue_data_periph, 
		"data_set_peripheral.csv",
		"OPM identifiers for proteins in the set of peripheral membrane proteins. Each code identifies a quaternary model in OPM (http://opm.phar.umich.edu), with chain coordinates determined by the corresponding PDB code."
		)
	save_data_set(residue_data_tm,
		"data_set_reference.csv",
		"OPM identifiers from which the protein fragments in the reference set were derived. Each code identifies a quaternary model in OPM (http://opm.phar.umich.edu), with chain coordinates determined by the corresponding PDB code."
		)	
	save_data_set(residue_data_alternate_periph,
		"data_set_alternative_peripheral.csv",
		"PDB codes for the alternative set of peripheral membrane proteins, presented in supplementary material."
		)	
	save_data_set(residue_data_alternate_reference,
		"data_set_alternative_reference.csv",
		"PDB codes for the alternative reference set, presented in supplementary material."
		)	
}

compare_flat_and_protruding_plots <- function(){
	protrusion_aa_frequencies(flat, flat, hydrophobics, xlab="flat reference", ylab="flat peripheral", col2=color_tm(F))
	protrusion_aa_frequencies(prot, prot, hydrophobics, xlab="protruding reference", ylab="protruding peripheral", col2=color_tm(F))
	protrusion_aa_frequencies(prot, flat, hydrophobics, set2=residue_data_periph, xlab="flat peripheral", ylab="protruding peripheral")
	protrusion_aa_frequencies(prot, flat, hydrophobics, set1=residue_data_tm, xlab="flat reference", ylab="protruding reference", col=color_tm(F), col2=color_tm(F))
	
	protrusion_aa_frequencies(prot, flat, hydrophobics, xlab="flat reference", ylab="protruding peripheral", col2=color_tm(F))
	protrusion_aa_frequencies(flat, prot, hydrophobics, xlab="protruding reference", ylab="flat peripheral", col2=color_tm(F))
}
aa_breakdown_coins <- function(periphset=residue_data_periph, refset=residue_data_tm, mean=strat_mean){
	for (aa in hydrophobics){
		aa_prot_co <- function(limit){return(protrusion(min_facet_neighbours_ww_if=limit, pre=protrusion(type=c(aa), pre=prot)))}
		coins_dependence(aa_prot_co, hull, set1=periphset, set2=refset, strat_stat=mean, title=aa)
	}
	aa_prot_co <- function(limit){return(protrusion(min_facet_neighbours_ww_if=limit, pre=protrusion(type=superhydrophobics, pre=prot)))}
	coins_dependence(aa_prot_co, hull, set1=periphset, set2=refset, strat_stat=mean, title="superhydrophobics")
}

poster_plots <- function(periphset=residue_data_periph, refset=residue_data_tm, periphstruct=structure_data_periph, refstruct=structure_data_tm, mean=strat_mean, suffix=""){
	coins_dependence(hf_prot_co, hull, set1=periphset, set2=refset, strat_stat=mean)
	conditional_hydrophobics_or(set1=periphstruct, set2=refstruct, strat_stat=mean, names=c("peripheral", "reference"), ylab="log odds ratio")

	par(mfrow=c(1,2))
	neighbour_vs_odds_ratio(hull, hydrophobics, 0, 7, 27, set1=periphset, set2=refset, strat_stat=mean, ylim=c(-0.5, 1.0), ylab="log odds ratio")
	neighbour_vs_sse(hf_hull, 0,7,27, set1=periphset, ylab="# insertables", legend.text=T, ylim=c(0,1400))
	
	par(mfrow=c(2,2))
	protrusion_density_histogram(hf_exposed, exposed, periphset, col=color_periph(), strat_stat=mean, xlab="", ylab="peripheral", density_tot=F)
	protrusion_density_histogram(hf_prot, prot, periphset, col=color_periph(), strat_stat=mean, xlab="", density_tot=F, ylab="")
	protrusion_density_histogram(hf_exposed, exposed, refset, col=color_tm(), strat_stat=mean, xlab="exposed hydrophobes", ylab="reference", density_tot=F)
	protrusion_density_histogram(hf_prot, prot, refset, col=color_tm(), strat_stat=mean, xlab="protruding hydrophobes", density_tot=F, ylab="")
}

paper_plots <- function(){
	compare_surface_plots()
	compare_opm_plots()
	compare_with_experiment_plots()
	characterize_protrusions_plots()
}
suppmat_plots <- function(){
	misc_suppmat_plots()
	compare_surface_plots(mean=strat_avg_alt, periphset=residue_data_alternate_periph, refset=residue_data_alternate_reference, suffix="Alternative", labels4=c("E", "F", "G", "H"), labels3=c("D", "E", "F"))
	characterize_protrusions_plots(periphset=residue_data_alternate_periph, refset=residue_data_alternate_reference, periphstruct=structure_data_alternate_periph, refstruct=structure_data_alternate_reference, mean=strat_avg_alt, suffix="Alternative", dssp=sse_data_alternate_set, setnames=c("Periph.-P", "Ref. Prot."))	
}
print(R.Version())
suppmat_plots()
paper_plots()
save_data_sets()
