junk/rage.R

# #######################################################################
#                R-based Analaysis of GEnomics data
#                              (rage)
#              written by Daniel Vera (dvera@fsu.edu)
# #######################################################################

cuffdiff.2.matrix <- function (cuffdiff.file , template.matrix.file , sample1name , sample2name , b73=FALSE){
	library(tools)
	matsuffix<-get.suffix(basename(template.matrix.file),"_")
	winsize=as.numeric(gsub("mat","",file_ext(template.matrix.file)))

	mat<-read.mat(template.matrix.file)
	cuf<-read.tsv(cuffdiff.file,header=T)
	matgenes<-unlist(lapply(strsplit(row.names(mat),";") , "[" , 3 ))
	if(b73){ matgenes<-remove.suffix(matgenes,"_T")}
	namematch<-match(matgenes,cuf[,3])
	 
	m1<-matrix(as.numeric(cuf[namematch,8]),nrow=nrow(mat),ncol=ncol(mat))
	row.names(m1)<-row.names(mat)
	
	m2<-matrix(as.numeric(cuf[namematch,9]),nrow=nrow(mat),ncol=ncol(mat))
	row.names(m2)<-row.names(mat)

	cuf$invq<-1/cuf[,13]
	gi2<-which(cuf[,9]-cuf[,8]>0)
	cuf$invq[gi2]<-cuf$invq[gi2]*-1
	invq<-matrix(as.numeric(cuf$invq[namematch]),nrow=nrow(mat),ncol=ncol(mat))
	row.names(invq)<-row.names(mat)

	dif<-matrix(as.numeric(cuf[namematch,8]-cuf[namematch,9]),nrow=nrow(mat),ncol=ncol(mat))
	row.names(dif)<-row.names(mat)

	invqname<-paste0(sample1name,"-",sample2name,"_inverseQval_",get.suffix(basename(template.matrix.file),"_"))
	m1name<-paste0(sample1name,"_FPKM_",matsuffix)
	m2name<-paste0(sample2name,"_FPKM_",matsuffix)
	difname<-paste0(sample1name,"Minus",sample2name,"_FPKM_",matsuffix)

	write.mat(m1,file=m1name)
	write.mat(m2,file=m2name)
	write.mat(dif,file=difname)
	write.mat(invq,file=invqname)
	
}
mat.tilegrid <- function (mat1 , mat2 , scoremats , mat1name = "x" , mat2name = "y" , LOG10=TRUE , abx = 0 , aby = 0 , legendnames = basename(removeext(scoremats)) , plotcolors=rainbow(length(scoremats)) , region = c(-1000,0) , tiles = 3 , ylims = c(-1,1) , breaks=NULL ){

	p=c(0,(1:(tiles-1))*1/tiles,1)
	s<-read.mat(mat1)
	r<-read.mat(mat2)
	sq<-quantile(s[,1],probs=p,na.rm=T)
	rq<-quantile(r[,1],probs=p,na.rm=T)
	cs<-cut(s[,1],sq,labels=F,include.lowest=T)
	cr<-cut(r[,1],rq,labels=F,include.lowest=T)
	
	sm2<-lapply(scoremats,read.mat)
	numscoremats<-length(scoremats)
	
	if(LOG10){
		ls=log10(s[,1]+1)
		lr=log10(r[,1]+1)
		xlabel=paste("log10 (",mat1name,")")
		ylabel=paste("log10 (",mat2name,")")
		lsq<-quantile(ls,probs=p,na.rm=T)
		lrq<-quantile(ls,probs=p,na.rm=T)
	} else{
		ls=s[,1]
		lr=r[,1]
		xlabel=mat1name
		ylabel=mat2name
		lsq<-sq
		lrq<-rq
	}

	plot(ls,lr,pch=16,xlab=xlabel,ylab=ylabel)
	abline(v=lsq[2:(tiles)],h=lrq[2:(tiles)],col="grey50")

	#X11()

	scatterdens(ls,lr,breaks=50)

	smoothScatter(ls,lr,xlab=xlabel,ylab=ylabel)
	abline(v=lsq[2:(tiles)],h=lrq[2:(tiles)],col="grey50")

	#X11()

	matcols<-ncol(sm2[[1]])
	xa<- ((0:4*(matcols/4)))
	ya<-c(ylims[1],(ylims[2]+ylims[1])/2,ylims[2])
	eg<-expand.grid(1:tiles,1:tiles)
	ml<-cbind(rbind(t(matrix(1:(tiles^2) , nrow=tiles)),(tiles^2+1):(tiles^2+tiles)),(tiles^2+tiles+1):((tiles+1)^2))
	ml<-ml[(tiles+1):1,]
	bottoms<-ml[tiles+1,]
	lefts<-ml[,1]
	layout(ml)
	par(oma=c(5,5,5,5),mar=c(0.2,0.2,0.2,0.2))

	for(i in 1:(tiles^2)){
		plot(0,type="n",xlim=c(0,matcols),ylim=ylims,yaxt='n',xaxt='n')
		if(i %in% lefts ){
			axis(2,at=ya,labels=ya)
			mtext(which(i==rev(lefts)),side=2,line=2,cex=2)
		}
		if(i %in% bottoms){
			axis(1,at=xa,labels=xa)
			mtext(i,side=1,line=4,cex=2)
		}
		abline(v=abx,h=aby,col="grey70")
		for(j in 1:numscoremats){
			lines(1:matcols,colMeans(sm2[[j]][which(cs==eg[i,1] & cr==eg[i,2]),],na.rm=T),col=plotcolors[j])
		}
		text(xa[1],ya[3],paste0("n=",length(which(cs==eg[i,1] & cr==eg[i,2]))),pos=4)
	}

	for(i in 1:tiles){
		plot(0,type="n",xlim=c(0,matcols),ylim=ylims,yaxt='n',xaxt='n')
		rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "grey90")
		abline(v=abx,h=aby,col="grey70")
		if(i == 1){ axis(2,at=ya,labels=ya) }
		for(j in 1:numscoremats){
			lines(1:matcols,colMeans(sm2[[j]][which(cr==i),],na.rm=T),col=plotcolors[j])
		}
		text(xa[1],ya[3],paste0("n=",length(which(cr==i))),pos=4)
	}
	for(i in 1:tiles){
		plot(0,type="n",xlim=c(0,matcols),ylim=ylims,yaxt='n',xaxt='n')
		rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "grey90")
		abline(v=abx,h=aby,col="grey70")
		if(i == 1){ axis(1,at=xa,labels=xa) }
		for(j in 1:numscoremats){
			lines(1:matcols,colMeans(sm2[[j]][which(cs==i),],na.rm=T),col=plotcolors[j])
		}
		text(xa[1],ya[3],paste0("n=",length(which(cs==i))),pos=4)
	}
	plot(0,type="n",xlim=c(0,matcols),ylim=ylims,yaxt='n',xaxt='n')
	rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "grey90")
	abline(v=abx,h=aby,col="grey70")
	for(j in 1:numscoremats){
			lines(1:matcols,colMeans(sm2[[j]],na.rm=T),col=plotcolors[j])
	}
	text(xa[1],ya[3],paste0("n=",nrow(sm2[[1]])),pos=4)
	legend("topright",legend=legendnames , col=plotcolors , lwd=3 , bty="n")
	

}

coords.2.matrix <- function ( coordfile , genomefile , outdir , windowsize = 10000 , cores="max" ){
	
	library(parallel)
	library(gtools)
	if(cores=="max"){cores=detectCores()-1}
	
	cat("finding chromosomes\n")
	chroms <-  mixedsort (readLines ( pipe ( paste ( "cut -f 1" , coordfile , "| sort | uniq"))))
	chroms <- chroms[which(chroms != "chrM")]
	chroms <- chroms[which(chroms != "chrY")]
	numchroms <- length(chroms)
	chromsizes <- read.tsv ( genomefile )
	if(sum(chroms %in% chromsizes[,1]) != length(chroms)){stop("one or more chromosomes not found in genome file")}
	chromsizes<-chromsizes[match(chroms,chromsizes[,1]),]

	dir.create(outdir)
	outnames<-paste0(outdir,"/",chroms,"_win",windowsize,".mat")
		

	mclapply(1:numchroms,function(r) {
		cat("getting",chroms[r],"data\n")
		numchromwins<-floor(chromsizes[r,2]/windowsize)
		brks <- (0:numchromwins)*windowsize
		coords <- read.delim ( pipe ( paste0 ( "awk '($1==\"",chroms[r],"\" && $3==\"",chroms[r],"\")' ",coordfile," | cut -f 2,4" ) ) , stringsAsFactors=F , header = F )
		coords <- coords[which(coords[,1] <= brks[length(brks)] & coords[,2] <= brks[length(brks)] ),]
		a=coords[,1]
		b=coords[,2]
		
	
		cat("binning x-axis data\n")
		xbinind<-cut(a,brks,labels=F)
		
		cat("binning y-axis data\n")
		h<-mclapply(1:numchromwins,function(x){
			hist(b[which(xbinind==x)],breaks=brks,plot=F)
		},mc.cores=cores)
		
		cat("creating interaction matrix\n")
		binmat<-data.matrix(as.data.frame(lapply(h,"[[",2)))

		cat("saving binmat\n")
		write.tsv(binmat,file=outnames[r])
	} , mc.cores=cores , mc.preschedule=F )
}

matrix.2.tracks <- function ( matdir , genomefile , outname , windowsize = 10000 , maxdist = windowsize * 40 , cores="max" ){


	library(parallel)
	library(gtools)
	if(cores=="max"){cores=detectCores()-1}
	
	cat("finding chromosomes\n")
	matfiles<-mixedsort(files(paste0(matdir,"/*.mat")))
	chroms <-  unique(  ( remove.suffix( basename(matfiles),"_" ) ) )
	numchroms <- length(chroms)
	chromsizes <- read.tsv ( genomefile )
	if(sum(chroms %in% chromsizes[,1]) != length(chroms) ) {stop("one or more chromosomes not found in genome file")}
	chromsizes<-chromsizes[match(chroms,chromsizes[,1]),]
	numdistances=floor(maxdist/windowsize)
	d<-(1:numdistances)*windowsize
	distancenames<-formatC(d,width=nchar(maxdist),format="d",flag="0")

	outnames<-lapply(1:numchroms, function(o) paste0(matdir,"/",outname,"_",chroms[o],"_",distancenames,".bg"))
	foutnames<-paste0(matdir,"/",outname,"_",distancenames,".bg")
	
	for(b in 1:numchroms){
		

		
		mat<-read.tsv(matfiles[b])
		matcols<-ncol(mat)
		if(ceiling(matcols/2) != floor(matcols/2)){mat<-mat[1:(matcols-1),1:(matcols-1)]}
		matcols<-ncol(mat)
		l1<-lapply(1:matcols,function(x) 1:x)
		l2<-lapply(1:matcols,function(x) matcols+1-(x:1))
		rotmat<-matrix(NA,nrow=matcols,ncol=matcols)
		starts<-(matcols/2)-floor((0:(matcols-1)/2))
		
		cat("rotating",chroms[b],"matrix\n")
		fillers<-mclapply(1:matcols , function(i){
			v<-vector(length=i)
			for(j in 1:i){
				v[j]<-mat[l1[[i]][j],l2[[i]][j]]
			}
			return(v)
		} , mc.cores=cores , mc.preschedule=F)

		
		for(i in 1:matcols){
			rotmat[i,starts[i]:(starts[i]+i-1)]<-fillers[[i]]
		}
		
		rotmat[is.na(rotmat)]<-0

		oddstarts <- (0:(matcols-1))*windowsize + 1
		oddstops <- oddstarts + windowsize
		evenstarts <- oddstarts - windowsize/2
		evenstops <- evenstarts + windowsize
		md<-which(d>maxdist)[1]-1


		for(i in ((1:(numdistances/2))*2)-1){
			df<-data.frame(chroms[b],evenstarts,evenstops,rotmat[(matcols+1-i),])
			write.tsv(df,file=outnames[[b]][i])
		}

		for(i in ((1:(numdistances/2))*2)){
			df<-data.frame(chroms[b],oddstarts,oddstops,rotmat[(matcols+1-i),])
			write.tsv(df,file=outnames[[b]][i])
		}
	}

	for(b in 1:numdistances){
		infiles<-paste(unlist(lapply(outnames,"[",b)),collapse=" ")
		system(paste("cat",infiles,"| awk '($2 > 0)' >",foutnames[b]))
		system(paste("bedGraphToBigWig",foutnames[b],genomefile,gsub(".bg",".bw",foutnames[b])))
	}




}

replicationdomains<-function(bgfile , lspan = 0 , mergewithin = 100000 , removeY = T ){
	
	if(lspan != 0){ r<-bg.loess(r,lspan=lspan) }
	

	r<-read.tsv(bgfile)
	
	if(removeY==TRUE){
		ttr<-ttr[which(ttr$V1 != "chrY"),]
		r<-r[which(r$V1 != "chrY"),]
	}

	r$Vprod=c(1,r$V4[1:(rl-1)]*r$V4[2:rl])
	r$diff<-c(1,diff(r$V4))
	r$slope=c(1,diff(r$V4)/diff(r$V2))
	r$slopeprod=c(1,r$slope[1:(rl-1)]*r$slope[2:rl])
	spc<-which(r$slopeprod<0)
	pc<-which(r$prod<0)
	ttrb1<-unlist(lapply(1:length(pc), function(x) pc[x]-which(r[pc[x]:1,8]<0)[1]+1 ))
	ttrb2<-unlist(lapply(1:length(pc), function(x) pc[x]+which(r[pc[x]:pc[length(pc)],8]<0)[1]-1 ))
	
	chroms<-sort(unique(r$V1))
	numchroms<-length(chroms)
	chromlines<-lapply(1:numchroms,function(x) which(r[,1]==chroms[x]))

	
	t<-data.frame(pc,ttrb1,ttrb2)
	t[which(r[t[,2],2] > r[t[,1],2]),2]<-NA
	t[which(r[t[,3],2] < r[t[,1],2]),3]<-NA
	t$strand <- NA
	t[which(r[pc,4]<0),4]<-"+"
	t[which(r[pc,4]>0),4]<-"-"


	ttr<-data.frame("V1"=r[t[,1],1],"V2"=r[t[,2],2],"V3"=r[t[,3],3],"V4"=1:nrow(t),"V5"=1,"V6"=t[,4],"V7"=r[t[,1],2],"V8"=r[t[,1],3])
	ttr<-ttr[which(complete.cases(ttr)),]


	rd<-data.frame("V1"=r[t[1:(nrow(t)-1),1],1],"V2"=r[t[1:(nrow(t)-1),1],2],"V3"=r[t[2:(nrow(t)),1],2], "V4"=1:(nrow(t)-1) , "V5"=1 , "V6"=t[2:(nrow(t)),4] , "V7"=r[t[1:(nrow(t)-1),3],2],"V8"=r[t[2:(nrow(t)),2],3])
	otherchrom=r[t[2:(nrow(t)),1],1]
	rd<-rd[which(rd$V1==otherchrom),]
	rm(otherchrom)
	rd<-rd[which(complete.cases(rd)),]
}
bed.ends<-function( bedfile ){
	if(length(bedfile) > 1){stop("this function can only take 1 file. use lapply.")}
	outname<-paste0(basename(removeext(bedfile)),"_ends.bed")
	system(paste("awk '{print $1,$2,$2+1;print $1,$3,$3+1}' OFS='\t'",bedfile,"| sort --parallel=4 -T . -S 10G -k1,1 -k2,2n >",outname))
	return(outname)
}
fastq.clip.paired <- function( fastq1 , fastq2=NULL , adapter = "GATCGGAA" , quality.cutoff=20 , minlength = 10 , cores="max"){
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	if(length(fastq1) < cores){cores=length(fastq1)}
	outnames<-paste0(basename(removeext(fastq1)),"_aclip.fastq")
	a<-mclapply(1:length(fastq1) , function(x) system(paste("cutadapt -O 1 -a",adapter,fastq1[x],">",outnames[x])) , mc.cores=cores , mc.preschedule=F)
	return(outnames)
}
hub.modify<- function(tracknames , hubloc , operation = "remove"){
	t<-grep("track ",a$V1)
	t2<-c(t[2:length(t)]-1,nrow(a))
	b<-strsplit(a$V1," ")
	bl<-unlist(lapply(b,length))
	d<-unlist(lapply(b,"[",1))
	e<-lapply(1:length(b), function(x) b[[x]][2:bl[x]] )
	e<-unlist(lapply(e,paste0,collapse=" "))
	f<-data.frame(V1=d,V2=e,stringsAsFactors=F)
	g<-lapply(1:length(t), function(x) data.frame(x=f[t[x]:t2[x],1] , y = f[t[x]:t2[x],2], stringsAsFactors=F))
	du<-unique(d)
	m<-lapply(1:length(g), function(x) match(g[[x]][,1],du))
	mat<-matrix(nrow=length(g),ncol=length(du))
	mat<-data.frame(mat,stringsAsFactors=F)
	colnames(mat) <- du
	for(i in 1:length(g)){
		mat[i,m[[i]]] = g[[i]][,2]
	}

	if(operation=="remove"){
		selected <- which(mat$track %in% tracknames | mat$parent %in% tracknames)
		if(length(selected) == 0 ){
			stop("no tracks found")
		} else{
			mat <- mat[-selected,]
			m <- m[-selected]
		}
	}

	if(operation=="duplicate"){
		selected <- which(mat$track %in% tracknames | mat$parent %in% tracknames)
		if(length(selected) == 0 ){
			stop("no tracks found")
		} else{
			mat <- mat[nrow(mat)+(1:length(selected)),] <- mat[selected,]
			m <- m[-selected]
		}
	}

	u<-unlist(
		lapply(
			1:nrow(mat), function(x) paste(colnames(mat)[m[[x]]],mat[x,m[[x]]])
		)
	)



	trackfile<-data.frame("V1"=u,stringsAsFactors=FALSE)
	write.tsv(trackfile,file="trackDb.txt")	

	# dont allow 'track' in track names
}
bgtocontigs<-function(bgfile,cores="max"){
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	cat("loading bedgraph\n")
	bg<-read.tsv(bgfile)
	cat("splitting pos/neg\n")
	p<-bg
	n<-bg
	rm(bg)
	cat("converting scores\n")
	p$V4[which(p$V4<0)]<-0
	n$V4[which(n$V4>0)]<-0
	cat("finding gaps\n")
	d<-diff(p$V2)
	stops<-which(d !=1)
	starts=c(1,stops+1)
	stops=c(stops,nrow(p))
	dir.create("pos")
	dir.create("neg")
	dir.create("int")
	cat("saving data\n")
	a<-mclapply(1:length(starts),function(x){
		write.tsv(p[starts[x]:stops[x],4],file=paste("pos/",paste(p[starts[x],1:2],collapse="-"),".pos",sep=""))
		write.tsv(n[starts[x]:stops[x],4],file=paste("neg/",paste(n[starts[x],1:2],collapse="-"),".neg",sep=""))
		write.tsv(n[starts[x]:stops[x],1:3],file=paste("int/",paste(n[starts[x],1:2],collapse="-"),".int",sep=""))
	},mc.cores=cores)
	write.tsv(p,file=paste(basename(removeext(bgfile)),"_pos.bg",sep=""))
	write.tsv(n,file=paste(basename(removeext(bgfile)),"_neg.bg",sep=""))
}
bed.addcolor		<- function( bed, color, strand="." ){
	ext<-get.file.extensions(bed)
	curcol<-paste(col2rgb(color),collapse=",")
	curbed<-read.tsv(bed)
	outname<-paste0(removeext(bed),"_",color,".",ext)
	if(ncol(curbed)<3){ stop ("less than 3 columns in file")}
	if(ncol(curbed)==3){ curbed$V4 <- 1:nrow(curbed)}
	if(ncol(curbed)==4){ curbed$V5 <- 1}
	if(ncol(curbed)==5){ curbed$V6 <- strand}
	if(ncol(curbed)==6){ curbed$V7  <- curbed$V2}
	if(ncol(curbed)==7){ curbed$V8  <- curbed$V3}
	if(ncol(curbed)==8){ curbed$V9  <- curcol}
	write.tsv(curbed,file=outname)
}
bed.addstrand		<- function( bed, strand="+" ){
	ext<-get.file.extensions(bed)
	curbed<-read.tsv(bed)
	if(strand=="+"){ strandname=="plus" }
	if(strand=="-"){ strandname=="minus"}
	outname<-paste0(removeext(bed),"_",strandname,".",ext)
	if(ncol(curbed)<3){ stop ("less than 3 columns in file")}
	if(ncol(curbed)==3){ curbed$V4 <- 1:nrow(curbed)}
	if(ncol(curbed)==4){ curbed$V5 <- 1}
	if(ncol(curbed)==5){ curbed$V6 <- strand}
	write.tsv(curbed,file=outname)
}
stdout.tsv		<- function( cmd, header=FALSE, ... ){
	read.delim(pipe(cmd),header=header,stringsAsFactors=FALSE, ... )
}
matgrid<-function(mat){
	png(file="test.png",height=5000,width=1000)
	m<-matrix(1:3,nrow=3)
	mat[which(mat>5)]<-5
	mat[which(mat<1)]<-1
	rb<-colorRampPalette(c("black","red"))
	layout(m,heights=c(1,20,5),widths=1)
	par(oma=c(10,0,10,0))
	par(mar=c(10,15,5,15))
	image(matrix(1:100),col=rb(100),breaks=0:100,axes=F,bty="o")
	axis(side=1,at=c(0,1),labels=c(0,50),cex.axis=8,lwd=10,padj=2,line=0,tcl=-3)
	mtext("sample1_0-200_scg3",side=3,cex=8,outer=T)
	par(mar=c(5,15,5,15),xpd=TRUE)
	image(t(mat),breaks=(1:50)/10,col=rb(49),axes=FALSE)
	lines(rep(-0.05,2),c(0,0.5),lwd=70,col="blue",lend=1)
	axis(side=1,at=c(0,0.5,1),labels=F,lwd=10,padj=1,line=1,tcl=-3)
	par(mar=c(20,15,0,15))
	plot(1:200,colMeans(mat,na.rm=T),type="l",lwd=10,xaxs="i",axes=FALSE)
	axis(side=1,at=c(1,100,200),labels=c(-1000,0,1000),cex.axis=8,lwd=10,padj=2,line=0,tcl=-3)
	axis(side=2,cex.axis=8,lwd=10,tcl=-3,padj=-1)
	mtext("Distance from TSS (bp)",side=1,cex=5,outer=T,line=2)
	dev.off()
}
mat.plotmats		<- function( matlist , tracknames , colorlist="auto", plottypes = "l" , matnamelist="auto", ymaxs = "auto" , ymins = "auto" , cores = "max", centername="TSS" ){
	library(tools)
	library(parallel)
	if(cores=="max"){cores<-detectCores()-1}
	
	numsets<-length(matlist)
	nummats<-unlist(lapply(matlist,length))
	
	cat("loading matrices\n")
	mats<-lapply(1:numsets, function(s) lapply(1:nummats[s], function(m) read.mat( matlist[[s]][m] ) ) )
	
	if(matnamelist=="auto"){ matnamelist<-lapply(1:numsets, function(s) basename(removeext(matlist[[s]] ) ) ) }
	if(colorlist=="auto"){ colorlist<-lapply(1:numsets, function(s) rainbow(nummats[s]) ) }
	if(length(plottypes)==1){plottypes=rep(plottypes,numsets)}
	
	windowsize=as.numeric( gsub("mat","",file_ext(matlist[[1]][1]) ) )
	matcols<-ncol(mats[[1]][[1]])
	matrows<-nrow(mats[[1]][[1]])
	x<-((1:matcols)-(matcols/2))*windowsize
	xw<-c(min(x),((max(x)-min(x))*0.25+max(x)))
	
	
	cat("calculating y maxs\n")
	matmax<-lapply(1:numsets, function(s) lapply(1:nummats[s], function(m) apply(mats[[s]][[m]],1,max,na.rm=TRUE)  )  )
	yautomax<-lapply(1:numsets, function(s) unlist(mclapply(1:matrows, function(r) max(unlist(lapply(matmax[[s]],"[",r)), na.rm=TRUE) ,mc.cores=cores ) ) )
	
	cat("calculating y mins\n")
	matmin<-lapply(1:numsets, function(s) lapply(1:nummats[s], function(m) apply(mats[[s]][[m]],1,min,na.rm=TRUE)  )  )
	yautomin<-lapply(1:numsets, function(s) unlist(mclapply(1:matrows, function(r) min(unlist(lapply(matmin[[s]],"[",r)), na.rm=TRUE) ,mc.cores=cores ) ) )
	
	pdf(file="plotmatstest.pdf")
	par(mfrow=c(numsets,1),oma=c(4,0,4,0),mar=c(0,4,0,4))
	
	cat("drawing plots\n")
	for(r in 1:matrows){
		for(s in 1:numsets){
			plot(
				0,
				type="n",
				xaxt=if(s==numsets){'s'} else{'n'},
				xlim=xw,
				ylim=c(yautomin[[s]][r],yautomax[[s]][r]),
				ylab=tracknames[s],
				xlab=paste("Distance from",centername,"(bp)")
				#main=row.names(mats[[s]][[m]])[r]
			)
			grid(lty="solid",ny=NA)
			abline(v=max(x),lwd=4,col="black")
			for(m in 1:nummats[s]){
				lines(x,mats[[s]][[m]][r,],col=colorlist[[s]][m],type=plottypes[s])
			}
			legend("topright",legend=matnamelist[[s]],col=colorlist[[s]],lwd=3, cex=0.5, bty="n")
		}
	}
	dev.off()
}
isegtobed<-function(posfile,negfile=NULL,intervalfiles,prefix="z3b1sens",cutoffs=c(0,1,1.5,2,3),genome="b73v2", fix=FALSE){
	library(parallel)
	numcutoffs<-length(cutoffs)
	cutoffcols<-unlist(lapply(1:numcutoffs, function(x) 2+(x-1)*3 ))
	inames<-basename(removeext(intervalfiles))
	bothnames<-paste(prefix,"_BC",cutoffs,".bed",sep="")
	
	cat("loading files\n")
	intervals<-mclapply(intervalfiles,read.tsv,header=T,mc.cores=detectCores())
	pos<-read.tsv(posfile,header=T)
	posrows<-lapply(1:numcutoffs, function(x) which( is.na(pos[,cutoffcols[x]])==FALSE ) )
	posbgs<-lapply(1:numcutoffs, function(x) as.data.frame(matrix(NA,nrow=length(posrows[[x]]),ncol=6)))
	possubs<-lapply(1:numcutoffs, function(x) pos[posrows[[x]],])
	posmatches<-lapply(1:numcutoffs, function(x) match(possubs[[x]][,1],inames ) )
	posnames<-paste(prefix,"_pos_BC",cutoffs,".bed",sep="")
	allnames=posnames
	
	if(is.null(negfile)==FALSE){
		neg<-read.tsv(negfile,header=T)
		negrows<-lapply(1:numcutoffs, function(x) which( is.na(neg[,cutoffcols[x]])==FALSE ) )
		negbgs<-lapply(1:numcutoffs, function(x) as.data.frame(matrix(NA,nrow=length(negrows[[x]]),ncol=6)))
		negsubs<-lapply(1:numcutoffs, function(x) neg[negrows[[x]],])
		negmatches<-lapply(1:numcutoffs, function(x) match(negsubs[[x]][,1],inames ) )
		negnames<-paste(prefix,"_neg_BC",cutoffs,".bed",sep="")
		allnames<-c(posnames,negnames,bothnames)
	}
	
	
	for(i in 1:numcutoffs){
		cat("finding genomic locations of segments for BC",cutoffs[i],"\n")
		posbgs[[i]][,1]<-unlist(lapply(1:nrow(posbgs[[i]]), function(x) intervals[[posmatches[[i]][x]]][possubs[[i]][x,cutoffcols[i]],1] ))
		posbgs[[i]][,2]<-unlist(lapply(1:nrow(posbgs[[i]]), function(x) intervals[[posmatches[[i]][x]]][possubs[[i]][x,cutoffcols[i]],2] ))
		posbgs[[i]][,3]<-unlist(lapply(1:nrow(posbgs[[i]]), function(x) intervals[[posmatches[[i]][x]]][possubs[[i]][x,cutoffcols[i]+1]-1,3] ))
		#posbgs[[i]][,4]<-unlist(lapply(1:nrow(posbgs[[i]]), function(x) possubs[[i]][x,cutoffcols[i]+2] ))
		posbgs[[i]][,4]<-1:nrow(posbgs[[i]])
		posbgs[[i]][,5]<-1
		posbgs[[i]][,6]="+"
		
		
		badrows<-which(is.na(posbgs[[i]][,2]))
		if(length(badrows)>0){posbgs[[i]]<-posbgs[[i]][-badrows,]}
		
		write.tsv(posbgs[[i]],file=posnames[i])
		
		
		if(is.null(negfile)==FALSE){
			negbgs[[i]][,1]<-unlist(lapply(1:nrow(negbgs[[i]]), function(x) intervals[[negmatches[[i]][x]]][negsubs[[i]][x,cutoffcols[i]],1] ))
			negbgs[[i]][,2]<-unlist(lapply(1:nrow(negbgs[[i]]), function(x) intervals[[negmatches[[i]][x]]][negsubs[[i]][x,cutoffcols[i]],2] ))
			negbgs[[i]][,3]<-unlist(lapply(1:nrow(negbgs[[i]]), function(x) intervals[[negmatches[[i]][x]]][negsubs[[i]][x,cutoffcols[i]+1]-1,3] ))
			negbgs[[i]][,4]<-1:nrow(negbgs[[i]])
			negbgs[[i]][,5]<-1
			negbgs[[i]][,6]="-"
			badrows<-which(is.na(negbgs[[i]][,2]))
			if(length(badrows)>0){negbgs[[i]]<-negbgs[[i]][-badrows,]}
			write.tsv(negbgs[[i]],file=negnames[i])
			
			#both<-rbind(posbgs[[i]],negbgs[[i]],stringsAsFactors=FALSE)
			system(paste("cat",posnames[i],negnames[i],">",bothnames[i]))
			bed.sort(bothnames[i])
			#write.tsv(both,file=bothnames[i])
			
		}
	}
	
	allnames<-unlist(mclapply(allnames,bed.sort,mc.cores=detectCores()))
	allnames<-unlist(mclapply(allnames,bedToBigBed,genome=genome,mc.cores=detectCores()))
	return(allnames)
}
# ####################################
#      SYSTEM SETTINGS               #
# ####################################
hostname<-Sys.info()["nodename"]
user<-Sys.info()["user"]
if(hostname %in% c("genome","chrom1","chrom2","chrom3","chrom4","chrom5")){
    datapath<-"/data/"
}
if(grepl("spear",hostname)==TRUE){datapath<-"/lustre/maize/home/dlv04c/data/"}
if(grepl("submit",hostname)==TRUE){datapath<-"/lustre/maize/home/dlv04c/data/"}
if(grepl("medicine",hostname)==TRUE){datapath<-"/lustre/maize/home/dlv04c/data/"}
#use whole X11 terminal window
if(.Platform$GUI=="X11"){
	.adjustWidth <- function(...){
		options(width=Sys.getenv("COLUMNS"))
		TRUE
	}
}
if(.Platform$GUI=="X11"){
	.adjustWidthCallBack <- addTaskCallback(.adjustWidth)
}
options(scipen=100000000)
# ####################################
#      BASIC CONVENIENCE FUNCTIONS   #
# ####################################
`%ni%`			<- Negate(`%in%`)
rot			<- function( x){(1:x %% x) +1}
rotvec			<- function( vec){ vec[rot(length(vec))] }
rotrow			<- function( vec){ vec[rot(nrow(vec)),] }
rotcol			<- function( vec){ vec[,rot(ncol(vec))] }
rotlist			<- function( l ){lapply(c(2:length(l),1),function(x) l[[x]] ) }
lsl			<- function( ){system(command="ls -l")}
lsltr			<- function( ){system(command="ls -ltr")}
lslsr			<- function( ){system(command="ls -lSr")}
printname		<- function( df){
	print(deparse(substitute(df)))
}
remove.prefix		<- function( names,prefix){
	for(i in 1:length(names)){
		names[i]<-unlist(strsplit(names[i],prefix))[2]
	}
	names
}
get.prefix		<- function( names,separator){
	for(i in 1:length(names)){
		names[i]<-unlist(lapply(strsplit(names[i],separator),"[",1))
	}
}
get.suffix		<- function( names,separator){
	for(i in 1:length(names)){
		stringvec<-unlist(strsplit(names[i],separator))
		names[i]<-stringvec[length(stringvec)]
	}
	names
}
remove.suffix		<- function( names,suffix){
	for(i in 1:length(names)){
		names[i]<-unlist(strsplit(names[i],suffix))[1]
	}
	names
}
removeext		<- function( filenames ){
	filenames<-as.character(filenames)
	for(i in 1:length(filenames)){
		namevector<-unlist(strsplit(filenames[i],"\\."))
		filenames[i]<-paste(namevector[1:(length(namevector)-1)],collapse=".")
	}
	filenames
}
write.tsv		<- function( tsv, colnames=FALSE, rownames=FALSE, ... ){
	write.table(tsv,sep="\t",quote=FALSE,col.names=colnames,row.names=rownames, ... )
}
read.tsv			<- function( tsv, ... ){
	read.table(tsv,stringsAsFactors=FALSE,sep="\t", ... )
}
sc			<- function( cmd){
	system(command=paste(cmd))
}
findfiles		<- function( string, path="." ){
	return(readLines(pipe(paste("find ",path," -name '",string,"'",sep=""))))
}
files			<- function( x,...){
	parts<-(unlist(strsplit(x,"/")))
	#cat("parts",parts,"\n")
	if(length(parts)>1){
		pth<-paste(parts[1:length(parts)-1],collapse="/")
	}
	else{
		pth="."
	}
	#cat("pth",pth,"\n")
	nm<-parts[length(parts)]
	#cat("name",nm,"\n")
	if(length(parts)>1){
		list.files(pattern=glob2rx(nm),path=pth,full.names=TRUE, ... )
	}
	else{
		list.files(pattern=glob2rx(nm), ... )
	}
}
files2			<- function( cmd ){
	readLines(pipe(paste("ls -d",cmd)))
}
write.mat		<- function( mat, ... ){
	write.table(mat,sep="\t",quote=FALSE,col.names=FALSE, ... )
}
read.mat			<- function( mat, ... ){
	data.matrix(read.table(mat,stringsAsFactors=FALSE,sep="\t",row.names=1, ... ))
}
read.fmat		<- function( mat, ... ){
	as.matrix(read.table(mat,stringsAsFactors=FALSE,sep="\t",row.names=1, ... ))
}
downloadfile		<- function( url){
	if(Sys.info()["sysname"]=="Linux"){
		sc(paste("wget -q ",url,sep=""))
	}
	if(Sys.info()["sysname"]=="Darwin"){
		sc(paste("curl -silent -O ",url,sep=""))
	}
}
renamefiles		<- function( filelist, pattern="", replacement=""){
	if(pattern==""){stop("YOU MUST SPECIFY 'pattern'\n")}
	filenames<-basename(filelist)
	outnames<-gsub(pattern,replacement,filenames)
	nametab<-data.frame("before"=filenames,"after"=outnames,stringsAsFactors=FALSE)
	print(nametab)
	for(i in 1:length(filelist)){
		cat("renaming ",filenames[i]," to ",outnames[i],"\n",sep="")
		system(paste("mv",filelist[i],outnames[i]))
	}
}
filelines		<- function( filename ){
	as.numeric(readLines(pipe(paste("wc -l",filename,"| awk '{print $1}'"))))
}
getfullpaths		<- function( paths ){
	unlist(lapply(1:length(paths), function(x) system(paste("readlink -f",paths[x]),intern=TRUE) ))
}
uniquefilename		<- function( filename ){
	library(tools)
	if(grepl("\\.",basename(filename))==TRUE){
		ext<-paste(".",file_ext(filename),sep="")
	} else{ ext="" }
	suffixes<-c("",paste("_",1:100,sep=""))
	filenames<-paste(removeext(filename),suffixes,ext,sep="")
	filename<-filenames[which(file.exists(filenames)==FALSE)[1]]
	return(filename)
}
totalmem			<- function( ){
	return(as.numeric(readLines(pipe("free -b | awk '{print $2}' | head -n 2 | tail -n 1"))))
}
shead			<- function( filename, n=10 ){
	system(paste("head -n",n,filename))
}
stail			<- function( filename, n=10 ){
	system(paste("tail -n",n,filename))
}
# ####################################
#   FORMAT CONVERSION                #
# ####################################
sra.2.fastq		<- function( sras, paired=FALSE){
	library(parallel)
	arguments<-""
	if(paired==TRUE){arguments<-"--split-3"}
	doit<-mclapply(1:length(sras), function(x) system(paste("fastq-dump",arguments,sras[x])), mc.cores=detectCores() )
}
sam.2.bam		<- function( samfile, q=10 ){
	bamname<-paste(removeext(samfile),".bam",sep="")
	cat(bamname,": converting sam to bam\n")
	system(paste("samtools view -q",q,"-Sb ",samfile,">",bamname))
	return(bamname)
}
bam.2.bedpe		<- function( bamfile ){
	bamname<-basename(removeext(bamfile))
	outname<-paste(bamname,".bed",sep="")
	cat(bamname,": converting bam to bed\n")
	system(paste("bedtools bamtobed -bedpe -i",bamfile," | cut -f 1,2,6 | sort -V -T . -k1,1 -k2,2n -k3,3n > ",outname))
	return(outname)
}
bam.2.bed		<- function( bamfile ){
	bamname<-basename(removeext(bamfile))
	outname<-paste(bamname,".bed",sep="")
	cat(bamname,": converting bam to bed\n")
	system(paste("bedtools bamtobed -i",bamfile,">",outname))
	return(outname)
}
dz.2.wig			<- function( dzfile, cores="max", splitchromosomes=FALSE ){
	dzname<-basename(removeext(dzfile))
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	
	cat("finding chromosomes\n")
	chroms<-readLines(pipe(paste("cut -f 1",dzfile,"| uniq")))
	
	cat("splitting chromosomes\n")
	chromfiles<-unlist(mclapply(1:length(chroms), function(x){
		outname<-paste(dzname,"_",chroms[x],".wig",sep="")
		system(paste("grep -e $'",chroms[x],"\\t' ",dzfile," | cut -f 2,3 > ",outname,sep=""))
		system(paste("sed -i '1s/^/variableStep chrom=",chroms[x],"\\n/' ",outname,sep=""))
		return(outname)
	},mc.cores=cores,mc.preschedule=F))
	
	if(splitchromosomes==FALSE){
		outname<-paste(dzname,".wig",sep="")
		cat("concatenating chromosome scores\n")
		system(paste("cat",paste(chromfiles,collapse=" "),">",outname))
		system(paste("rm",paste(chromfiles,collapse=" ")))
	}
	else{ outname=chromfiles }
	return(outname)
}
scorelist.2.mat		<- function( scorefiles, genebed, namecol=1, delimiter="", namenumber=1, scorecol=4, nametype="symbol",matcol=200){
	
	bed<-read.tsv(genebed)
	if(nametype=="symbol"){bedids<-bed$V13}
	if(nametype=="ucsc"){bedids<-bed$V4}
	bedids<-data.frame("V1"=bedids,stringsAsFactors=FALSE)
	matrownames<-paste(bed$V4,bed$V1,bed$V13,sep="_")
	numscores<-length(scorefiles)

	scores<-lapply(scorefiles,read.tsv)
	scores<-lapply(1:numscores, function(s){
		if(delimiter!=""){
			scores[[s]][,namecol]<-unlist(lapply(strsplit(scores[[s]][,namecol],delimiter, fixed=TRUE),"[",namenumber))
		}
		scores[[s]]<-scores[[s]][-which(duplicated(scores[[s]][,namecol])),]
		scoretable<-data.frame("V1"=scores[[s]][,namecol],"V2"=scores[[s]][,scorecol],stringsAsFactors=FALSE)
		scoretable
	})
	
	scoremats<-lapply(1:numscores, function(s){
		scoremat<-merge(bedids,scores[[s]],by="V1",all.x=TRUE,all.y=FALSE,sort=FALSE)
		scoremat<-scoremat[match(scoremat$V1,bedids$V1),]
		mat<-matrix(NA_real_,nrow=nrow(bed),ncol=matcol)
		mat[,]<-scoremat$V2
		row.names(mat)<-matrownames
		mat
	})
	
	unlist(lapply(1:numscores, function(s){
		write.mat(scoremats,file=paste(basename(removeext(scorefiles[s])), "_", basename(removeext(genebed)),".mat10", sep=""))
	}))
}
gff.2.bg			<- function( gff, extendbp=60, cores="max", genome="hg19"){
	library(gtools)
	cat("sorting gff\n")
	system(paste("sort -V -k1,1 -k4,4n",gff,"-o",gff))
	gffname<-basename(removeext(gff))
	outname<-paste(gffname,".bg",sep="")
	chroms<-readLines(pipe(paste("cut -f 1",getgenomefile(genome))))
	chroms<-chroms[mixedorder(chroms)]
	gff<-read.delim(pipe(paste("cut -f 1,4,5,6",gff)),stringsAsFactors=FALSE,header=FALSE)
	gff<-gff[which(gff$V1 %in% chroms),]
	if( identical(gff$V2,gff$V3)==TRUE ){
		cat(gffname,": start and stop coordinates identical, extending ends by",extendbp,"bp\n")
		gff$V3<-gff$V3+extendbp
	}
	cat(gffname,": saving to",outname,"\n")
	write.tsv(gff,file=outname)
	return(outname)
}
gff.2.bed			<- function( gff, extendbp=60, cores="max", genome="hg19", strand=TRUE , bedname=basename(removeext(gff)) ){
	library(gtools)
	cat("converting",gff,"\n")
	gffname<-basename(removeext(gff))
	outname<-paste(gffname,".bed",sep="")
	system(paste0("grep -v '#' ",gff," | awk 'BEGIN{i=0};{i+=1;print $1,$4,$5,\"",gffname,"_\"i,1,$7}' OFS='\t' > ",outname))




	# chroms<-readLines(pipe(paste("cut -f 1",getgenomefile(genome))))
	# chroms<-chroms[mixedorder(chroms)]
	# gff<-read.tsv(gff)
	# gff<-gff[order(gff$V1,gff$V4),]
	# gff<-gff[which(gff$V1 %in% chroms),]
	# if( identical(gff$V2,gff$V3)==TRUE ){
	# 	cat(gffname,": start and stop coordinates identical, extending ends by",extendbp,"bp\n")
	# 	gff$V3<-gff$V3+extendbp
	# }
	# gff<-data.frame(V1=gff$V1,V2=gff$V4,V3=gff$V5,V4=paste0(bedname,"_",1:nrow(gff)),V5=1,if(strand==T){V6=gff$V7},stringsAsFactors=F)
	# write.tsv(gff,file=outname)
	return(outname)
}
# ####################################
#   FASTQ FUNCTIONS                  #
# ####################################
fastq.fastqc		<- function( fastq ){
	system(paste("fastqc",fastq))
}
fastq.clip.fastx		<- function( fastq , adaptor="GATCGGAA", minlength=10  ){
	outname<-paste(basename(removeext(fastq)),"_clipped.fastq",sep="")
	system(paste("fastx_clipper -Q33 -v -n -k -l",minlength,"-a",adaptor,"-i",fastq,"-o",outname))
	return(outname)
}
fastq.clip		<- function( fastqs, paired=FALSE, filter=FALSE){
	r1names<-fastqs
	for(i in 1:length(r1names)){
	cat("trimming/filtering ",remove.suffix(r1names[i],"_R1.fastq"),"\n")
		r2name<-paste(remove.suffix(r1names[i],"_R1.fastq"),"_R2.fastq",sep="",collapse="")
		system(paste("/lustre/maize/home/dlv04c/software/trim_galore_zip/trim_galore --paired --length 10 --fastqc -q 25 ",r1names[i]," ",r2name,sep="",collapse=""))
		
	}
}
fastq.filter.fastx	<- function( fastq, qscore=20, percent=90, paired=FALSE){
	outname<-paste(basename(removeext(fastq)),"_filtered-q",qscore,"p",percent,".fastq",sep="")
	system(paste("fastq_quality_filter -Q33 -v -q",qscore,"-p",percent,"-i",fastq,"-o",outname))
	return(outname)
}
fastq.bt2		<- function( read1files, read2files=NULL,cores="max",genome="hg19",mode="end-to-end",input="-q", q=10, makebam = TRUE , makebed = TRUE , all=FALSE , dovetail=TRUE , discordant=FALSE , mixed=FALSE , unaligned=FALSE ){
	
	library(parallel)
	if(cores=="max"){cores<-detectCores()-1}
	
	indexfile<-getbt2index(genome)

	read1names<-basename(removeext(read1files))
	
	if(is.null(read2files)){
		paired=FALSE
		outnames<-paste(basename(removeext(read1files)),".sam",sep="")
	} else{
		paired=TRUE
		outnames<-paste(basename(removeext(read1files)),".sam",sep="")
	}
	
	for(i in 1:length(read1files)){
		cat(read1names[i],": aligning to genome\n")
		if(paired==TRUE){
			print(paste(
				"bowtie2",
				"-p",cores,
				if(dovetail){"--dovetail"},
				if(mixed==FALSE){"--no-mixed"},
				if(discordant==FALSE){"--no-discordant"},
				if(unaligned==FALSE){"--no-unal"},
				if(all){"-a"},
				input,
				"-x",indexfile,
				"-1",read1files[i],
				"-2",read2files[i],
				"-S",outnames[i]
			))
			system(paste(
				"bowtie2",
				"-p",cores,
				if(dovetail){"--dovetail"},
				if(mixed==FALSE){"--no-mixed"},
				if(discordant==FALSE){"--no-discordant"},
				if(unaligned==FALSE){"--no-unal"},
				if(all){"-a"},
				input,
				"-x",indexfile,
				"-1",read1files[i],
				"-2",read2files[i],
				"-S",outnames[i]
			))
		}
		else{
			system(paste(
				"bowtie2",
				"-p",cores,
				if(all){"-a"},
				input,
				"-x",indexfile,
				"-U",read1files[i],
				"-S",outnames[i]
			))
		}
	}
	
	if(cores > length(read1files)) { cores <- length(read1files) }
	if(makebam) { outnames<-unlist(mclapply(outnames,sam.2.bam,mc.cores=cores,q=q)) }
	if(makebed) { outnames<-unlist(mclapply(outnames,if(paired){bam.2.bedpe} else{bam.2.bed},mc.cores=cores))}

	return(outnames)
}


fastq.bt		<- function( read1files, read2files=NULL , cores="max" , genome="hg19" , input="-q", q=10, makebam = TRUE , makebed = TRUE , chunkmbs=64){
	
	library(parallel)
	if(cores=="max"){cores<-detectCores()-1}
	
	indexfile<-getbtindex(genome)

	read1names<-basename(removeext(read1files))
	
	if(is.null(read2files)){
		paired=FALSE
		outnames<-paste(basename(removeext(read1files)),".sam",sep="")
	} else{
		paired=TRUE
		outnames<-paste(basename(removeext(read1files)),".sam",sep="")
	}
	
	for(i in 1:length(read1files)){
		cat(read1names[i],": aligning to genome\n")
		if(paired==TRUE){
			system(paste(
				"bowtie",
				"-S",
				"--chunkmbs",chunkmbs,
				"-p",cores,
				input,
				indexfile,
				"-1",read1files[i],
				"-2",read2files[i],
				outnames[i]
			))
		}
		else{
			system(paste(
				"bowtie",
				"-S",
				"--chunkmbs",chunkmbs,
				"-p",cores,
				input,
				indexfile,
				read1files[i],
				outnames[i]
			))
		}
	}

	if(cores > length(read1files)) { cores <- length(read1files) }
	if(makebam) { outnames<-unlist(mclapply(outnames,sam.2.bam,mc.cores=cores,q=q)) }
	if(makebed) { outnames<-unlist(mclapply(outnames,if(paired){bam.2.bedpe} else{bam.2.bed},mc.cores=cores))}

	return(outnames)
}



fastq.bwasw		<- function( read1files, read2files=NULL , cores="max" , genome="hg19" , mode="end-to-end", input="-q", q=10, makebam = TRUE , makebed = TRUE ){
	
	library(parallel)
	if(cores=="max"){cores<-detectCores()-1}
	
	indexfile<-getbwaindex(genome)

	read1names<-basename(removeext(read1files))
	
	if(is.null(read2files)){
		paired=FALSE
		outnames<-paste(basename(removeext(read1files)),".sam",sep="")
	} else{
		paired=TRUE
		outnames<-paste(basename(removeext(read1files)),".sam",sep="")
	}
	
	for(i in 1:length(read1files)){
		cat(read1names[i],": aligning to genome\n")
		if(paired==TRUE){
			system(paste(
				"bwa bwasw",
				"-t",cores,
				indexfile,
				read1files[i],
				read2files[i],
				">",
				outnames[i]
			))
		}
		else{
			system(paste(
				"bwa bwasw",
				"-t",cores,
				indexfile,
				read1files[i],
				">",
				outnames[i]
			))
		}
	}
	
	if(cores > length(read1files)) { cores <- length(read1files) }
	if(makebam) { outnames<-unlist(mclapply(outnames,sam.2.bam,mc.cores=cores,q=q)) }
	if(makebed) { outnames<-unlist(mclapply(outnames,if(paired){bam.2.bedpe} else{bam.2.bed},mc.cores=cores))}

	return(outnames)
}




topcuff			<- function( read1file, read2file="", genome="hg19", librarytype="fr-firststrand", speed="--b2-very-fast", outdir=basename(removeext(read1file)) , cores="max"){
	library(parallel)
	if(cores=="max"){cores<-detectCores()-1}
	read1name<-basename(removeext(read1file))
	indexfile<-getbt2index(genome)
	tindexfile<-gettindex(genome)
	gtf<-getgtf(genome)
	cat(read1name,": aligning to genome\n")
	cat(paste("tophat -T -o ",outdir," --library-type ",librarytype," -p ",cores," --transcriptome-index=",tindexfile," ",indexfile," ",read1file," ",read2file,sep=""))
	cat("\n\n")
	system(paste("tophat -T -o ",outdir," --library-type ",librarytype," -p ",cores," --transcriptome-index=",tindexfile," ",indexfile," ",read1file," ",read2file,sep=""))
	bamfile<-(paste(outdir,"/accepted_hits.bam",sep=""))
	cat(paste("cufflinks","-G",gtf,"--library-type",librarytype,"-p",cores,"-o",outdir,bamfile))
	cat("\n\n")
	system(paste("cufflinks","-G",gtf,"--library-type",librarytype,"-p",cores,"-o",outdir,bamfile))
	return(paste(outdir,"/genes.fpkm_tracking",sep=""))
}
bam.sort			<- function( bamfile, memory=0.5*totalmem(), sortby="position" ){
	bamname<-basename(removeext(bamfile))
	#cat(bamname,": using",memory/1000000000,"Gb memory to sort bam by",sortby,"\n")
	if(sortby=="name"){extraargs="-n";suffix="_nsort"}
	if(sortby=="position"){extraargs="";suffix="_psort"}
	outname<-paste(bamname,suffix,sep="")
	#cat(paste("samtools sort",extraargs,bamfile,outname))
	
	system(paste("samtools sort",extraargs,bamfile,outname))
	return(outname)
}
bam.index		<- function( bamfile ){
	bamname<-basename(removeext(bamfile))
	cat(bamname,": indexing\n")
	system(paste("samtools index",bamfile))
}
# ####################################
#      BED FUNCTIONS                 #
# ####################################
cfbg.make		<- function( bedfile ){
	bedname<-basename(removeext(bedfile))
	outname<-paste(bedname,".cfbg",sep="")
	cat(bedname,": calculating fragment centers\n")
	system(paste("awk '{a=int(($2+$3)/2+0.5); $4=$3-$2; $2=a; $3=a+1;print}' OFS='\t' ",bedfile," | sort -V -T . -k1,1 -k2,2n > ",outname,sep=""))
	return(outname)
}
bed.wigaverages		<- function( beds, bigwigs, bednames=basename(removeext(beds)), wignames=basename(removeext(bigwigs)), targetregions="" ){
	numbeds<-length(beds)
	numwigs<-length(bigwigs)
	shufbeds<-unlist(lapply(beds,bed.shuffle,include=targetregions))
	shufbednames<-paste(bednames,"_shuffled",sep="")
	beds<-c(beds,shufbeds)
	bednames<-c(bednames,shufbednames)
	numbeds<-length(beds)
	bedlist<-lapply(beds,read.tsv)
	beds<-unlist(lapply(1:numbeds,function(x) {
		bed<-bedlist[[x]][,1:3]
		bed[,4]<-1:nrow(bed)
		outname<-paste(bednames[x],".tmp",sep="")
		write.tsv(bed,file=outname)
		outname
	}))

	scoresbybed<-lapply(1:numbeds,function(x){
		lapply(1:numwigs,function(y){
			outname<-paste(bednames[x],"_avg_",wignames[y],".tmp",sep="")
			system(paste("bigWigAverageOverBed",bigwigs[y],beds[x],outname))
			as.numeric(readLines(pipe(paste("cut -f 6",outname))))
		})
	})
	
	scoresbywig<-lapply(1:numwigs,function(x){
		lapply(lapply(scoresbybed,"[",x),unlist)
	})
	pdf(file="bedwigaverages.pdf")
	par(mar=c(10,4,4,2))
 	
	for(i in 1:numbeds){
		boxplot(scoresbybed[[i]],names=wignames,main=bednames[i],las=2,cex.names=0.5,outline=F)
	}
	for(i in 1:numwigs){
		boxplot(scoresbywig[[i]],names=bednames,main=wignames[i],las=2,cex.names=0.5,outline=F)
	}
	dev.off()
}
bed.shuffle		<- function( bedfile, include=NULL, exclude=NULL, samechrom=FALSE, chromfirst=FALSE, genome="hg19" , outname=NULL ){
	library(tools)
	bedname<-basename(removeext(bedfile))
	ext<-file_ext(bedfile)
	cat(bedname,": shuffling features\n")
	extraargs<-""
	if(samechrom==TRUE){extraargs<-"-chrom"}
	if(chromfirst==TRUE){extraargs<-paste(extraargs,"-chromFirst")}
	if(is.null(include) == FALSE){extraargs<-paste(extraargs,"-incl",include)}
	if(is.null(exclude) == FALSE){extraargs<-paste(extraargs,"-excl",exclude)}
	if(is.null(outname)){outname<-paste(bedname,"_shuffled.",ext,sep="")}
	print(paste("bedtools shuffle",extraargs,"-i",bedfile,"-g",getgenomefile(genome),">",outname))
	
	system(paste("bedtools shuffle",extraargs,"-i",bedfile,"-g",getgenomefile(genome),">",outname))
	return(outname)
}
bed.words		<- function( bedfiles, shufflewithin=NULL, genomefa="/data/hg19/igenome/Sequence/WholeGenomeFasta/genome.fa", numbases=50, sizerange=c(1,500), numfrags="all", reference="center", cores="max", strand=FALSE, lspan=0.07, slop=0 ){
	library(parallel)
	if(reference %in% c("center","end") == FALSE){stop("not a valid reference point, use 'center' or 'end'")}
	if(cores=="max"){cores=detectCores()-1}
	numbeds<-length(bedfiles)
	bednames<-basename(removeext(bedfiles))
		
	cat("determining dinucleotide words\n")
	nwords<-expand.grid(c("A","C","G","T"),c("A","C","G","T"))
	nnums<-expand.grid(1:4,1:4)
	dwords<-unique(paste(nwords[,1],nwords[,2],sep=""))
	numwords<-length(dwords)
	beds<-bedfiles
	beds<-unlist(mclapply(1:numbeds, function(i){
		
		cat(bednames[i],": processing\n")
		
		if(is.infinite(sizerange[2]) == FALSE & sizerange[1]>1){
			beds[i]<-bed.parselengths(beds[i],brks=sizerange)
		}

		if(numfrags != "all"){
			cat(bednames[i],": grabbing",numfrags,"random fragments\n")
			newname<-paste(bednames[i],"_",numfrags,"random.bed",sep="")
			system(paste("randomLines",beds[i],numfrags,newname))
			beds[i]<-newname
		}

		if(reference=="center"){
			cat(bednames[i],": calculating fragment centers\n")
			newname<-paste(bednames[i],"_centersforwords.bed",sep="")
			system(paste("awk '{a=int(($2+$3)/2);b=int(",numbases,"/2); $2=a-b-1-",slop,"; $3=a+b+1+",slop,";print}' OFS='\t' ",beds[i],"| sort -T . -k1,1 -k2,2n >",newname))
			beds[i]<-newname
		}
		
		return(beds[i])
	},mc.cores=detectCores()))
	
	
	
	basemats<-mclapply(1:numbeds, function(i){
		cat(bednames[i],": extracting sequences from bed\n")
		# FIX TO GET DIFFERENT GENOMES
		seqbedname<-paste(bednames[i],".seqbed",sep="")
		system(paste("bedtools getfasta -tab -fi",paste(genomefa,sep=""),"-bed",beds[i],"-fo",seqbedname))
		
		cat(bednames[i],": reading in sequences\n")
		basemat<-toupper(readLines(pipe(paste("cut -f 2",seqbedname))))
		basemat<-basemat[grep("N",basemat,invert=T)]
		numseqs<-length(basemat)
		
		cat(bednames[i],": creating matrix from sequences\n")
		basemat<-t(simplify2array(lapply(1:numseqs,function(x) substring(basemat[x],1:numbases,1:numbases))))
		if(strand==TRUE){
			cat("adjusting for strand\n")
			curbed<-read.tsv(beds[i])
			negrows<-which(curbed[,6] == "-")
			cat(length(negrows),"minus-stranded features found\n")
			basemat[negrows,1:numbases]<-basemat[negrows,numbases:1]
		}
		basemat
	},mc.cores=cores)
	
	factormats<-lapply(basemats,as.data.frame)
	nucfreqs<-mclapply(1:numbeds, function(i){
		freqs<-as.data.frame(lapply(1:ncol(factormats[[i]]),function(x) as.vector(table(factormats[[i]][,x]))))
		freqsums<-matrix(rep(apply(freqs,2,sum),4),nrow=4)
		freqs<-freqs/freqsums
		row.names(freqs)<-c("A","C","G","T")
		colnames(freqs)<-1:ncol(freqs)
		freqs
	},mc.cores=cores)
	
	

	
	expectedwords<-lapply(1:numbeds, function(b){
		ew<-as.data.frame(mclapply(1:(numbases-1),function(n){
			unlist(lapply(1:16,function(x){
				nucfreqs[[b]][nnums[x,1],n]*nucfreqs[[b]][nnums[x,2],n+1]
			}))	
		},mc.cores=cores))
		ew<-rbind(ew,colSums(ew[c(1,4,13,16),]))
		ew<-rbind(ew,colSums(ew[c(6,7,10,11),]))
		row.names(ew)<-c(dwords,"TT/AA/TA/AT","CC/GG/CG/GC")
		colnames(ew)<-1:(numbases-1)
		ew
	})
	
	
	
	
	fragnums<-unlist(lapply(basemats,nrow))
	
	freqmats<-list()
	for(j in 1:numbeds){

		cat(bednames[j],": finding dinucleotide frequencies\n")
		freqmat<-simplify2array(mclapply(1:(numbases-1), function(x) {
			curwords<-paste(basemats[[j]][,x], basemats[[j]][,x+1], sep="")
			unlist(lapply(1:numwords, function(y) length(which(curwords==dwords[y]))))
		},mc.cores=detectCores()))
		numseqs<-nrow(basemats[[j]])
		#freqmat<-(freqmat/nrow(basemats[[j]]))*100
		freqmat<-rbind(freqmat,colSums(freqmat[c(1,4,13,16),]))
		freqmat<-rbind(freqmat,colSums(freqmat[c(6,7,10,11),]))
		row.names(freqmat)<-c(dwords,"TT/AA/TA/AT","CC/GG/CG/GC")
		freqmats[[j]]<-freqmat
		#cat("saving dinucleotide frequency matrix\n")
		#write.mat(freqmat,file=paste(bednames[j],".dnfmat",sep=""))
		
	}
	
	
	
	
	
	
	
	
	shufbeds<-unlist(mclapply(beds,bed.shuffle,include=shufflewithin))
	
	shufbednames<-paste(bednames,"_shuffled",sep="")
	shufbasemats<-mclapply(1:numbeds, function(i){
		cat(shufbednames[i],": extracting sequences from bed\n")
		# FIX TO GET DIFFERENT GENOMES
		seqbedname<-paste(shufbednames[i],".seqbed",sep="")
		system(paste("bedtools getfasta -tab -fi",paste(datapath,"hg19/igenome/Sequence/WholeGenomeFasta/genome.fa",sep=""),"-bed",shufbeds[i],"-fo",seqbedname))
		
		cat(shufbednames[i],": reading in sequences\n")
		basemat<-toupper(readLines(pipe(paste("cut -f 2",seqbedname))))
		numseqs<-length(basemat)
		

		cat(shufbednames[i],": creating matrix from sequences\n")
		basemat<-t(simplify2array(lapply(1:numseqs,function(x) substring(basemat[x],1:numbases,1:numbases))))
		if(strand==TRUE){
			cat("adjusting for strand\n")
			curbed<-read.tsv(shufbeds[i])
			negrows<-which(curbed[,6] == "-")
			cat(length(negrows),"minus-stranded features found\n")
			basemat[negrows,1:numbases]<-basemat[negrows,numbases:1]
		}
		basemat
	},mc.cores=cores)
	
	
	
	# ##################################
	#	SWITCH TO USING TABLE() ? MUST ADD ALL WORDS AT END TO MAKE SURE TABLE WORKS PROPERLY
	# #################################
	
	shuffreqmats<-list()
	for(j in 1:numbeds){

		cat(shufbednames[j],": finding dinucleotide frequencies\n")
		freqmat<-simplify2array(mclapply(1:(numbases-1), function(x) {
			curwords<-paste(shufbasemats[[j]][,x], shufbasemats[[j]][,x+1], sep="")
			unlist(lapply(1:numwords, function(y) length(which(curwords==dwords[y]))))
		},mc.cores=detectCores()))
		numseqs<-nrow(shufbasemats[[j]])
		#freqmat<-(freqmat/nrow(basemats[[j]]))*100
		freqmat<-rbind(freqmat,colSums(freqmat[c(1,4,13,16),]))
		freqmat<-rbind(freqmat,colSums(freqmat[c(6,7,10,11),]))
		row.names(freqmat)<-c(dwords,"TT/AA/TA/AT","CC/GG/CG/GC")
		shuffreqmats[[j]]<-freqmat
		#cat("saving dinucleotide frequency matrix\n")
		#write.mat(freqmat,file=paste(bednames[j],".dnfmat",sep=""))
		
	}
	logfreqmats<-freqmats
	logfreqmats<-lapply(1:numbeds,function(x) log2(freqmats[[x]]/shuffreqmats[[x]]))
	
	
	
	freqmats<-lapply(1:numbeds,function(x) (freqmats[[x]]/fragnums[x]))
	adjfreqmats<-lapply(1:numbeds,function(x) freqmats[[x]]/expectedwords[[x]])
	pdf(file="worddens.pdf")
	rb<-rainbow(numbeds)



	cat("plotting absolute nucleotide frequencies\n")
	for(j in 1:4){
		all<-unlist(lapply(1:numbeds,function(f) nucfreqs[[f]][j,]))
		plot(0,type="n",xlim=c(0,numbases),ylim=c(min(all),max(all)*1.2),main=row.names(nucfreqs[[1]]),xaxt='n',ylab="Frequency",xlab="nucleotide #")
		abline(v=seq(from=0,to=numbases,by=10),lwd=1,col="grey80")
		if(reference=="center"){
			abline(v=numbases/2,lwd=1,col="black")
		}
		for(k in 1:numbeds){
			x<-1:(numbases)
			y<-unlist(nucfreqs[[k]][j,])
			if(lspan > 0){
				y<-loess(y~x,span=lspan)$fitted
			}
			lines(x,y,col=rb[k])
		}
		legend("topleft",legend=paste(bednames,"(",fragnums," fragments)"),col=rb,lwd=2)
		axis(side=1,at=seq(from=0,to=numbases,by=10))
	}


	for(j in 1:numbeds){
		plot(0,type="n",xlim=c(0,numbases),ylim=c(min(nucfreqs[[j]]),max(nucfreqs[[j]])),main=bednames[j],xaxt='n',ylab="Frequency",xlab="nucleotide #")
		abline(v=seq(from=0,to=numbases,by=10),lwd=1,col="grey80")
		if(reference=="center"){
			abline(v=numbases/2,lwd=1,col="black")
		}
		lines(1:numbases,nucfreqs[[1]][1,],type="l",col="red")
		lines(1:numbases,nucfreqs[[1]][2,],type="l",col="green")
		lines(1:numbases,nucfreqs[[1]][3,],type="l",col="blue")
		lines(1:numbases,nucfreqs[[1]][4,],type="l",col="purple")
		legend("topright",legend=row.names(nucfreqs[[1]]),col=c("red","green","blue","purple"),lwd=3)
	}

	cat("plotting absolute dinucleotide frequencies\n")
	for(j in 1:18){
		all<-unlist(lapply(1:numbeds,function(f) freqmats[[f]][j,]))
		plot(0,type="n",xlim=c(0,numbases),ylim=c(min(all),max(all)*1.2),main=row.names(freqmat)[j],xaxt='n',ylab="Frequency (%)",xlab="nucleotide #")
		abline(v=seq(from=0,to=numbases,by=10),lwd=1,col="grey80")
		if(reference=="center"){
			abline(v=numbases/2,lwd=1,col="black")
		}
		for(k in 1:numbeds){
			x<-1:(numbases-1)
			y<-freqmats[[k]][j,]
			if(lspan > 0){
				y<-loess(y~x,span=lspan)$fitted
			}
			#lines(x,y,type="s",col=rgb(0,0,0,75,maxColorValue=255)
			lines(x,y,col=rb[k])
		}
		legend("topleft",legend=paste(bednames,"(",fragnums," fragments)"),col=rb,lwd=2)
		axis(side=1,at=seq(from=0,to=numbases,by=10))
	}
	
	cat("plotting composite dinucleotide frequencies\n")
	
	for(j in 1:numbeds){
		all<-freqmats[[j]][c(17,18),]
		plot(0,type="n",xlim=c(0,numbases),ylim=c(min(all,na.rm=TRUE),max(all,na.rm=TRUE)*1.2),main=bednames[j],xaxt='n',ylab="Frequency (%)",xlab="nucleotide #")
		abline(v=seq(from=0,to=numbases,by=10),lwd=1,col="grey80")
		if(reference=="center"){
			abline(v=numbases/2,lwd=1,col="black")
		}
		x<-1:(numbases-1)
		y1<-freqmats[[j]][17,]
		y2<-freqmats[[j]][18,]
		if(lspan > 0){
			y1<-loess(y1~x,span=lspan)$fitted
			y2<-loess(y2~x,span=lspan)$fitted
		}
		lines(x,y1,col="blue")
		lines(x,y2,col="red")
		legend("topleft",legend=c("AA/TT/AT/TA","CC/GG/CG/GC"),col=c("blue","red"),lwd=2)
		axis(side=1,at=seq(from=0,to=numbases,by=10))
	}
	
	




	cat("plotting adjusted dinucleotide frequencies\n")
	for(j in 1:18){
		all<-unlist(lapply(1:numbeds,function(f) adjfreqmats[[f]][j,]))
		plot(0,type="n",xlim=c(0,numbases),ylim=c(min(all),max(all)*1.2),main=row.names(adjfreqmats[[1]])[j],xaxt='n',ylab="log2 (observed / expected from nucleotide frequency)",xlab="nucleotide #")
		abline(v=seq(from=0,to=numbases,by=10),lwd=1,col="grey80")
		if(reference=="center"){
			abline(v=numbases/2,lwd=1,col="grey50")
		}
		for(k in 1:numbeds){
			x<-1:(numbases-1)
			y<-unlist(adjfreqmats[[k]][j,])
			if(lspan > 0){
				y<-loess(y~x,span=lspan)$fitted
			}
			#lines(x,y,type="s",col=rgb(0,0,0,75,maxColorValue=255)
			lines(x,y,col=rb[k])
		}
		legend("topleft",legend=paste(bednames,"(",fragnums," fragments)"),col=rb,lwd=2)
		axis(side=1,at=seq(from=0,to=numbases,by=10))
	}
	
	cat("plotting adjusted composite frequencies\n")
	
	for(j in 1:numbeds){
		all<-adjfreqmats[[j]][c(17,18),]
		plot(0,type="n",xlim=c(0,numbases),ylim=c(min(all,na.rm=TRUE),max(all,na.rm=TRUE)*1.2),main=bednames[j],xaxt='n',ylab="log2 (observed / expected from nucleotide frequency)",xlab="nucleotide #")
		abline(v=seq(from=0,to=numbases,by=10),lwd=1,col="grey80")
		if(reference=="center"){
			abline(v=numbases/2,lwd=1,col="grey50")
		}
		x<-1:(numbases-1)
		y1<-unlist(adjfreqmats[[j]][17,])
		y2<-unlist(adjfreqmats[[j]][18,])
		if(lspan > 0){
			y1<-loess(y1~x,span=lspan)$fitted
			y2<-loess(y2~x,span=lspan)$fitted
		}
		lines(x,y1,col="blue")
		lines(x,y2,col="red")
		legend("topleft",legend=c("AA/TT/AT/TA","CC/GG/CG/GC"),col=c("blue","red"),lwd=2)
		axis(side=1,at=seq(from=0,to=numbases,by=10))
	}
	cat("plotting normalized dinucleotide frequencies\n")
	
	for(j in 1:18){
		all<-unlist(lapply(1:numbeds,function(f) logfreqmats[[f]][j,]))
		plot(0,type="n",xlim=c(0,numbases),ylim=c(min(all,na.rm=TRUE),max(all,na.rm=TRUE)*1.2),main=row.names(freqmat)[j],xaxt='n',ylab="log2 ( observed / expected )",xlab="nucleotide #")
		abline(v=seq(from=0,to=numbases,by=10),lwd=1,col="grey80")
		if(reference=="center"){
			abline(v=numbases/2,lwd=1,col="black")
		}
		for(k in 1:numbeds){
			x<-1:(numbases-1)
			y<-unlist(logfreqmats[[k]][j,])
			if(lspan > 0){
				y<-loess(y~x,span=lspan)$fitted
			}
			#lines(x,y,type="s",col=rgb(0,0,0,75,maxColorValue=255)
			lines(x,y,col=rb[k])
		}
		legend("topleft",legend=paste(bednames,"(",fragnums," fragments)"),col=rb,lwd=2)
		axis(side=1,at=seq(from=0,to=numbases,by=10))
	}
	
	
	cat("plotting composite normalized dinucleotide frequencies\n")
	
	for(j in 1:numbeds){
		all<-logfreqmats[[j]][c(17,18),]
		plot(0,type="n",xlim=c(0,numbases),ylim=c(min(all,na.rm=TRUE),max(all,na.rm=TRUE)*1.2),main=bednames[j],xaxt='n',ylab="log2 ( observed / expected )",xlab="nucleotide #")
		abline(v=seq(from=0,to=numbases,by=10),lwd=1,col="grey80")
		if(reference=="center"){
			abline(v=numbases/2,lwd=1,col="black")
		}
		x<-1:(numbases-1)
		y1<-logfreqmats[[j]][17,]
		y2<-logfreqmats[[j]][18,]
		if(lspan > 0){
			y1<-loess(y1~x,span=lspan)$fitted
			y2<-loess(y2~x,span=lspan)$fitted
		}
		#lines(x,y,typ
		#lines(x,y,type="s",col=rgb(0,0,0,75,maxColorValue=255)
		lines(x,y1,col="blue")
		lines(x,y2,col="red")
		legend("topleft",legend=c("AA/TT/AT/TA","CC/GG/CG/GC"),col=c("blue","red"),lwd=2)
		axis(side=1,at=seq(from=0,to=numbases,by=10))
	}
	
	
	dev.off()
}
bed.sample		<- function( bed , count ){
	totallines<-filelines(bed)
	if(count > totallines ){stop("sample quantity is greater than available lines")}
	outname<-paste(basename(removeext(bed)),"_random",count,".",get.file.extensions(bed),sep="")
	system(paste("randomLines",beds[i],numfrags,newname))
}
bed.spacing		<- function( beds, sizerange=c(1,500), distrange=c(1,1000), numfrags="all", legendnames=basename(removeext(beds)),plotcolors=rainbow(length(beds)),reference="center", cores="max", readsperblock=1000 ){
	library(parallel)
	if(reference %in% c("center","end") == FALSE){stop("not a valid reference point, use 'center' or 'end'")}
	if(cores=="max"){cores=detectCores()-1}
	numbeds<-length(beds)
	bednames<-basename(removeext(beds))
	
	
	beds<-unlist(mclapply(1:numbeds, function(i){
		
		if(is.infinite(sizerange[2]) == FALSE & sizerange[1]>1){
			beds[i]<-bed.parselengths(beds[i],brks=sizerange)
		}

		if(numfrags != "all"){
			cat(bednames[i],": grabbing",numfrags,"random fragments\n")
			newname<-paste(bednames[i],"_",numfrags,"random.bed",sep="")
			system(paste("randomLines",beds[i],numfrags,newname))
			beds[i]<-newname
		}

		if(reference=="center"){
			beds[i]<-bed.centers(beds[i])
		}
		beds[i]
	},mc.cores=detectCores()))
	
	
	bedcoords<-mclapply(1:numbeds, function(i){
		cat(bednames[i],"loading read coordinates\n")
		as.numeric(readLines(pipe(paste("cut -f 2",beds[i]))))
	},mc.cores=detectCores())
	
	dbedcoords<-mclapply(1:numbeds, function(i){
		cat(bednames[i],"loading duplicate-start read coordinates\n")
		as.numeric(readLines(pipe(paste("cut -f 1,2",beds[i],"| uniq -D | cut -f 2"))))
	},mc.cores=detectCores())
	
	distances<-lapply(1:numbeds, function(i){
		coords<-bedcoords[[i]]
		cat(bednames[i],"measuring distances for all reads\n")
		numblocks<-floor(length(coords)/1000)
		dh<-as.data.frame(mclapply(1:numblocks,function(x){
			d<-dist(coords[((((x-1)*1000):(x*1000))+1)])
			d<-na.omit(as.vector(d[d<=distrange[2] & d>=distrange[1] ]))
			h<-hist(d,breaks=distrange[1]:(distrange[2]+1),plot=FALSE)$counts/1000
			return(h)
		},mc.cores=cores))
		rowMeans(dh)
		
	})
	dmax<-quantile(unlist(distances),probs=0.98)
	
	ddistances<-lapply(1:numbeds, function(i){
		coords<-dbedcoords[[i]]
		cat(bednames[i],"measuring distances for all duplicate-start reads\n")
		numblocks<-floor(length(coords)/readsperblock)
		dh<-as.data.frame(mclapply(1:numblocks,function(x){
			d<-dist(coords[((((x-1)*readsperblock):(x*readsperblock))+1)])
			d<-na.omit(as.vector(d[d<=distrange[2] & d>=distrange[1] ]))
			h<-hist(d,breaks=distrange[1]:(distrange[2]+1),plot=FALSE)$counts/readsperblock
			return(h)
		},mc.cores=cores))
		rowMeans(dh)
		
	})
	ddmax<-quantile(unlist(ddistances),probs=0.98)

	cat("plotting all-read distogram\n")
	plot(0,type="n",xlim=c(distrange[1],distrange[2]),ylim=c(0,dmax),xlab="Distance (bp)",ylab="count", main="phasogram for all reads")
	abline(v=c((0:15)*10),lwd=1,col="grey80")
	for(i in 1:numbeds){
		lines(distrange[1]:distrange[2],distances[[i]],col=plotcolors[i],lwd=2)
	}
	legend("topright",legend=legendnames, col=plotcolors, lwd=3)
	
	X11()
	
	cat("plotting duplicate-start read distogram\n")
	plot(0,type="n",xlim=c(distrange[1],distrange[2]),ylim=c(0,ddmax),xlab="Distance (bp)",ylab="count", main="phasogram for duplicate-start reads")
	abline(v=c((0:15)*10),lwd=1,col="grey80")
	for(i in 1:numbeds){
		lines(distrange[1]:distrange[2],ddistances[[i]],col=plotcolors[i],lwd=2)
	}
	legend("topright",legend=legendnames, col=plotcolors, lwd=3)
}
bed.hist			<- function( beds , xlims=c(0,500), plotcolors=rainbow(length(beds)), legendnames=basename(removeext(beds)) ) {
	library(parallel)
	numbeds<-length(beds)
	
	cat("reading in beds\n")
	bedlist<-mclapply( 1:numbeds, function(x){
		as.numeric(readLines(pipe(paste("awk '{print $3-$2}'",beds[x]))))
	}, mc.cores=detectCores() )
	cat("calculating score distributions\n")
	densitylist<-mclapply( bedlist, density, from=xlims[1], to=xlims[2], mc.cores=detectCores() )
	numfrags<-unlist(lapply(bedlist, length)) 
	ymax<-max(unlist(lapply(1:numbeds, function(x) max(densitylist[[x]]$y ))))
	cat("plotting score distributions\n")
	plot(0,type="n",ylim=c(0,ymax), xlim=xlims, xlab="Fragment Size", ylab="Density")
	for(i in 1:numbeds){
		lines(densitylist[[i]],col=plotcolors[i],lwd=2)
	}
	legend("topright",legend=paste(legendnames,", n=",numfrags), col=plotcolors, lwd=3)
}
bed.structures <- function( bedfile , promoter5 = c(-1000,0) , promoter3 = c(0,1000) , genebody = c(200,-200) , bedname = basename(removeext(bedfile) ) ) {
	bed<-read.tsv(bedfile)
	bed<-bed[order(bed$V1,bed$V2),]
	if(ncol(bed) < 6){stop("no strand information found")}
	pos<-which(bed$V6=="+")
	neg<-which(bed$V6=="-")
	posbed<-bed[pos,]
	negbed<-bed[neg,]
	utr5<-rbind(
		data.frame(V1=posbed$V1,V2=posbed$V2,V3=posbed$V7,stringsAsFactors=F),
		data.frame(V1=negbed$V1,V2=negbed$V8,V3=negbed$V3,stringsAsFactors=F)
	)
	utr5<-utr5[which(utr5$V3-utr5$V2 > 0),]
	utr3<-rbind(
		data.frame(V1=posbed$V1,V2=posbed$V8,V3=posbed$V3,stringsAsFactors=F),
		data.frame(V1=negbed$V1,V2=negbed$V2,V3=negbed$V7,stringsAsFactors=F)
	)
	utr3<-utr3[which(utr3$V3-utr3$V2 > 0),]
	prom5<-rbind(
		data.frame(V1=posbed$V1,V2=posbed$V2+promoter5[1],V3=posbed$V2+promoter5[2],stringsAsFactors=F),
		data.frame(V1=negbed$V1,V2=negbed$V3-promoter5[2],V3=negbed$V3-promoter5[1],stringsAsFactors=F)
	)
	prom3<-rbind(
		data.frame(V1=posbed$V1,V2=posbed$V3+promoter3[1],V3=posbed$V3+promoter3[2],stringsAsFactors=F),
		data.frame(V1=negbed$V1,V2=negbed$V2-promoter3[2],V3=negbed$V2-promoter3[1],stringsAsFactors=F)
	)
	orf<-data.frame(V1=bed$V1,V2=bed$V7,V3=bed$V8,stringsAsFactors=F)
	gene<-data.frame(V1=bed$V1,V2=bed$V2+genebody[1],V3=bed$V3+genebody[2],stringsAsFactors=F)
	gene<-gene[which(gene$V3-gene$V2 > 0),]

	write.tsv(utr5,file=paste0(bedname,"_utr5.bed"))
	write.tsv(utr3,file=paste0(bedname,"_utr3.bed"))
	write.tsv(prom5,file=paste0(bedname,"_prom5.bed"))
	write.tsv(prom3,file=paste0(bedname,"_prom3.bed"))
	write.tsv(gene,file=paste0(bedname,"_genebody.bed"))
	write.tsv(orf,file=paste0(bedname,"_orf.bed"))


}
bed.dist			<- function( featurefiles,annotationfiles,suffix,numshuffles=100,bpylim=5,b73=FALSE,genebed=NULL, scoremats=NULL, targetregions=NULL, cores="max", featnames=basename(removeext(featurefiles)), annonames=basename(removeext(annotationfiles)),usefeaturecenter=FALSE,useannocenter=FALSE,plotcolors=rainbow(length(featurefiles))){
	
	#gather info
	numannos<-length(annotationfiles)
	numfeats<-length(featurefiles)
	feats<-featurefiles
	annos<-annotationfiles
	if(length(featnames) != numfeats){stop("# of feature names must match # of features")}
	if(length(annonames) != numannos){stop("# of annotation names must match # of annotations")}
	
	tmpdir<-paste0("tmp_",paste(sample(letters,6),collapse=""))
	dir.create(tmpdir)
	
	if(is.null(suffix)){stop("must specify suffix")}
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	
	if(is.null(targetregions) == FALSE){
		cat("merging target regions\n")
		targets<-bed.merge(targetregions)
		cat("counting total annotations\n")
		numtotalannos<-unlist(mclapply(annos,filelines,mc.cores=cores))
		numtotalfeats<-unlist(mclapply(feats,filelines,mc.cores=cores))
		cat("pruning annotations\n")
		annos<-unlist(mclapply(annos,bed.intersect, b=targets, input="file", output="file", mc.cores=cores))
		feats<-unlist(mclapply(feats,bed.intersect, b=targets, input="file", output="file", mc.cores=cores))
	}
	
	cat("shuffling features\n")
	sfeats<-mclapply(1:numshuffles,function(z) unlist(lapply(feats,function(w) bed.shuffle(w,outname=paste0(tmpdir,"/",basename(removeext(w)),"_",z,"_shuffled.bed"),include=if(is.null(targetregions)==FALSE){targetregions} else{NULL}))) , mc.cores=cores , mc.preschedule=F)
	sfeatnames<-paste(featnames,"_shuffled",sep="")
	
	cat("counting features\n")
	numeachanno<-unlist(mclapply(annos,filelines,mc.cores=cores))
	numeachfeat<-unlist(mclapply(feats,filelines,mc.cores=cores))
	
	
	cat("calculating centers\n")
	if(usefeaturecenter==TRUE){
		feats<-unlist(mclapply(feats,bed.centers,mc.cores=cores))
		sfeats<-unlist(mclapply(sfeats,bed.centers,mc.cores=cores))
	}
	if(useannocenter==TRUE){
		annos<-unlist(mclapply(snnos,bed.centers,mc.cores=cores))
	}
	
	cat("finding nearby genes\n")
	if(is.null(genebed)==FALSE){
		feats<-unlist(mclapply(feats,bed.closest,bed2=genebed,mc.cores=cores))
		sfeats<-unlist(mclapply(sfeats,bed.closest,bed2=genebed,mc.cores=cores))
	}
	
	
	
	cat("finding overlaps\n")
	fxa<-as.data.frame(lapply(1:numannos, function(a){
		unlist(lapply(1:numfeats,function(f){
			as.numeric(system(paste("bedtools intersect -u -a",feats[f],"-b",annos[a],"| wc -l"),intern=TRUE))
			}))
		}))
	
	sfxa<-as.data.frame(mclapply(1:numannos, function(a){
		unlist(lapply(1:numfeats,function(f){
			mean(as.numeric(unlist(lapply(1:numshuffles, function(z){
				system(paste("bedtools intersect -u -a",sfeats[[z]][f],"-b",annos[a],"| wc -l"),intern=TRUE)
			}))))
		}))
	},mc.cores=cores,mc.preschedule=F))

	#bp<-matrix(numeachfeat,nrow=2,ncol=numannos)
	#bp<-sweep(bp,1,fxa,"-")

	#absolute #
	absolute<-t(
		matrix(
			c(
				unlist(as.data.frame(t(fxa))),
				rep(numeachfeat,each=numannos)-unlist(as.data.frame(t(fxa)))
			),
			nrow=numfeats*numannos , ncol=2)
	)
	fxa.pct<-100*fxa/numeachfeat
	sfxa.pct<-100*sfxa/numeachfeat
	fxa.ratio<-fxa.pct/sfxa.pct

s<-rep(0,numfeats*numannos)
s[(1:numfeats)*numannos+1]<-1
s<-s[1:(numfeats*numannos)]
barplot(unlist(as.data.frame(t(fxa.ratio))), col=rainbow(numannos),las=3,xaxt='n',space=s)
axis(1,(1:numfeats)*ceiling(numannos+1)-ceiling(numannos/2),labels=featnames,las=3)
legend("topleft",legend=annonames,fill=rainbow(numannos))


barplot(unlist(as.data.frame(t(fxa.pct))), col=rainbow(numannos),las=3,xaxt='n',space=s)




	#names=rep(annonames,numfeats) 
	barplot(absolute , names=rep(annonames,numfeats) , las=3)
	barplot(unlist(as.data.frame(t(fxa.pct))), col=rainbow(numannos),las=3,xaxt='n')
	axis(1,(1:numfeats)*numannos-numannos/2,labels=featnames)
	barplot(unlist(as.data.frame(t(sfxa.pct))), names=rep(annonames,numfeats) , las=3)
	barplot(unlist(as.data.frame(t(fxa.ratio))), names=rep(annonames,numfeats) , las=3)
	

	row.names(fxa.pct)=row.names(sfxa.pct)=row.names(fnxa.pct)=row.names(sfnxa.pct)=annonames
	colnames(fxa.pct)=featnames
	colnames(sfxa.pct)=sfeatnames
	colnames(fnxa.pct)=featnames
	colnames(sfnxa.pct)=sfeatnames
	a<-cbind(fxa.pct,sfxa.pct)
	b<-fxa.pct/sfxa.pct
	b[is.infinite(b)]<-NA
	pdf(file=paste("beddist_",suffix,".pdf",sep=""))
	par(mar=c(10,4,4,2))
	barplot(a,beside=T,las=2,ylab="% features overlapping with annotation",ylim=c(0,100),col=rainbow(numannos))
	legend("topright",legend=annonames,col=rainbow(numannos),lwd=3)
	
	bp<-barplot(b,beside=T,las=2,ylab="enrichment over shuffled",col=rainbow(numannos))
	bp<-barplot(b,beside=T,las=2,ylab="enrichment over shuffled",col=rainbow(numannos),ylim=c(0,bpylim))
	legend("topright",legend=annonames,col=rainbow(numannos),lwd=3)
	abline(h=1,col="grey30",lwd=1)
	text(x=bp,y=b,labels=as.numeric(b),xpd=TRUE,srt=90,adj=0)
	cat("check 1\n")
	
	
	if(is.null(scoremats) == FALSE & is.null(genebed)==FALSE){
		cat("pass1\n")
		matlist<-mclapply(scoremats,read.mat,mc.cores=cores)
		#if(do.call(identical,lapply(matlist,nrow))==FALSE){stop("matrices have different numbers of rows\n")}
		if(b73==TRUE){
			matgenes<-row.names(matlist[[1]])
		} else{matgenes<-unlist(lapply(strsplit(row.names(matlist[[1]]),"_"),"[",1))}
		
		fxa.genes<-lapply(1:numfeats, function(f) {
			lapply(1:numannos, function(a) {
				unique(readLines(pipe(paste("cut -f 4",fxa[a,f]))))
			})
		})
		sfxa.genes<-lapply(1:numfeats, function(f) {
			lapply(1:numannos, function(a) {
				unique(readLines(pipe(paste("cut -f 4",sfxa[a,f]))))
			})
		})
		fnxa.genes<-lapply(1:numfeats, function(f) {
			lapply(1:numannos, function(a) {
				unique(readLines(pipe(paste("cut -f 4",fnxa[a,f]))))
			})
		})
		
		sfnxa.genes<-lapply(1:numfeats, function(f) {
			lapply(1:numannos, function(a) {
				unique(readLines(pipe(paste("cut -f 4",sfnxa[a,f]))))
			})
		})
		
		
		all.genes<-unique(readLines(pipe(paste("cut -f 4",genebed))))
		all.featgenes<-lapply(1:numfeats,function(x) unique(readLines(pipe(paste("cut -f 4",feats[x])))))
		all.sfeatgenes<-lapply(1:numfeats,function(x) unique(readLines(pipe(paste("cut -f 4",sfeats[x])))))
		cat("checking if scoremats has values\n")
		for(s in 1:length(scoremats)){
			cat("finding gene scores for scoremats",s,"\n")
			fxa.genescores<-lapply(1:numfeats, function(f) {
				lapply(1:numannos, function(a) {
					if(fxa.counts[a,f]>0){
						rowMeans(as.matrix(matlist[[s]][which(matgenes %in% fxa.genes[[f]][[a]]),]),na.rm=TRUE)
					}
					else{
						NA
					}
				})
			})
			sfxa.genescores<-lapply(1:numfeats, function(f) {
				lapply(1:numannos, function(a) {
					if(sfxa.counts[a,f]>0){
						rowMeans(as.matrix(matlist[[s]][which(matgenes %in% sfxa.genes[[f]][[a]]),]),na.rm=TRUE)
					}
					else{
						NA
					}
				})
			})
			fnxa.genescores<-lapply(1:numfeats, function(f) {
				lapply(1:numannos, function(a) {
					if(fnxa.counts[a,f]>0){
						rowMeans(as.matrix(matlist[[s]][which(matgenes %in% fnxa.genes[[f]][[a]]),]),na.rm=TRUE)
					}
					else{
						NA
					}
				})
			})
			
			
			all.genescores<-rowMeans(matlist[[s]][which(matgenes %in% all.genes),],na.rm=TRUE)
			
			all.featgenescores<-lapply(1:numfeats,function(x) rowMeans(matlist[[s]][which(matgenes %in% all.featgenes[[x]]),],na.rm=TRUE))
			all.sfeatgenescores<-lapply(1:numfeats,function(x) rowMeans(as.matrix(matlist[[s]][which(matgenes %in% all.sfeatgenes[[x]]),]),na.rm=TRUE))
			
			
			#all<-list(1:numfeats, function(f) lapply(c(fxa.genescores,fnxa.genescores,sfxa.genescores), "[" ,  f ) )
			#all<-c(list(all.genescores),all.featgenescores,unlist(fxa.genescores,recursive=F),unlist(fnxa.genescores,recursive=F))
			
			all<-unlist(lapply(1:numfeats,function(f) unlist(lapply(1:numannos,function(a) list(fxa.genescores[[f]][[a]],fnxa.genescores[[f]][[a]],sfxa.genescores[[f]][[a]])),recursive=F)),recursive=F)
			#all<-unlist(lapply(1:numfeats,function(f) unlist(lapply(c(fxa.genescores,fnxa.genescores,sfxa.genescores), lapply , "[" ,  f ),recursive=F)),recursive=F)
			all<-c(list(all.genescores),all.featgenescores,all.sfeatgenescores,all)
			allnames<-basename(removeext(unlist(t(cbind(unlist(fxa),unlist(fnxa),unlist(sfxa))))))
			ats=c(1,1+(1:(numfeats*2)),unlist(lapply(1:(numfeats*numannos),function(x) x*3+1:3+x))+2*numfeats-2)
			par(mar=c(10,4,4,2))
			b<-boxplot(all,plot=F)
			boxplot(all,at=ats,col=c("white",rainbow(numfeats),rainbow(numfeats),rep(rainbow(numannos*numfeats),each=3)),names=c("all genes",featnames,featnames,allnames),las=3,cex.axis=0.7,outline=F,main=basename(removeext(scoremats[s])))
			#axis,1,at=ats,labels=
			points(ats,unlist(lapply(all,mean,na.rm=TRUE)))
			ats2<-b$stats[5,]
			text(ats,ats2,labels=paste(" ",lapply(all,length)),xpd=T,cex=0.7,srt=90,adj=0)
			
		}
		
		
	}
	dev.off()
}
bed.recenter		<- function( bed,regionsize,center=FALSE,strand=TRUE,start=2,stop=3,input="object",mergebed=FALSE,buffer=0){
	if(input=="object"){
		bedname<-deparse(substitute(bed))
		windowbed<-bed
	}
	if(input=="file"){
		bedname<-basename(removeext(bed))
		windowbed<-read.tsv(bed)
	}
	if(strand==TRUE){
		negrows<-which(windowbed[,6]=="-")
		windowbed[negrows,2]<-windowbed[negrows,stop]
		windowbed[-negrows,2]<-windowbed[-negrows,start]
	}
	else{windowbed[,2]=windowbed[,start]}
	if(center==TRUE){
		windowbed[,2]=round((windowbed[,start]+windowbed[,stop])/2)
	}

	windowbed[,2]<-windowbed[,2]-regionsize/2
	windowbed[,3]<-windowbed[,2]+regionsize
	windowbed<-windowbed[which(windowbed[,2]> buffer),]
	winbedname<-paste(bedname,".winbed",sep="")
	write.tsv(windowbed,file=winbedname)
	return(winbedname)
}
bed.split		<- function( bed,regionsize,windowsize, output="file", input="object"){

	numwindows<-(regionsize/windowsize)
	
	if(input=="object"){
		bedname<-deparse(substitute(bed))
		curbed<-bed
	}
	if(input=="file"){
		bedname<-basename(removeext(bed))
		curbed<-read.tsv(bed)
	}
	
	
	bedrows<-nrow(curbed)
	bedcols<-ncol(curbed)
	
	#check parameters
	if(ceiling(numwindows)!=floor(numwindows)){stop("regionsize is not a multiple of windowsize")}
	if(ceiling(regionsize)!=floor(regionsize)){stop("regionsize must be an even number")}
	
	#make covbed
	flanks<- (  0:(numwindows-1)  ) * windowsize
	winstarts<-as.numeric( unlist( lapply( curbed[,2] , function(x) x + flanks ) ) )
	covbed<-data.frame("V1"=rep(curbed[,1],each=numwindows),"V2"=winstarts,"V3"=winstarts+windowsize,"V4"=rep(1:bedrows,each=numwindows))
	covbedname<-paste(bedname,".covbed",sep="")
	
	#return covbed
	write.tsv(covbed,file=covbedname)
	system(paste("sort -T . -S 10000000000b -k1,1 -k2,2n",covbedname,"-o",covbedname))
	return(covbedname)
}
bed.split.meta		<- function( bed, metasize, windowsize, flank, input="file", output="file", start=2, stop=3 ){
	library(parallel)
	if(input=="object"){
		bedname<-deparse(substitute(bed))
		curbed<-bed
	}
	if(input=="file"){
		bedname<-basename(removeext(bed))
		curbed<-read.tsv(bed)
	}

	bedrows<-nrow(curbed)
	bedcols<-ncol(curbed)

	numwindows<-metasize/windowsize
	numflankwindows<-flank/windowsize
	leftwinstarts<-0:(numflankwindows-1) * windowsize - flank
	leftwinends<-1:numflankwindows * windowsize - flank
	rightwinstarts<-0:(numflankwindows-1) * windowsize
	rightwinends<-1:numflankwindows * windowsize
	genesizes<-curbed[,stop]-curbed[,start]
	sizecos<-genesizes/metasize
	genewinstarts<-mclapply(1:bedrows, function(x) curbed[x,start] + 0:(numwindows-1) * windowsize * sizecos[x], mc.cores=detectCores())
	genewinends<-mclapply(1:bedrows, function(x) curbed[x,start] + 1:numwindows * windowsize * sizecos[x], mc.cores=detectCores())
	genewinstarts<-mclapply(genewinstarts,round,mc.cores=detectCores())
	genewinends<-mclapply(genewinends,round,mc.cores=detectCores())
	
	covbed<-data.frame(
		"V1"=rep(curbed[,1],each=numwindows+numflankwindows*2),
		"V2"=unlist(lapply(1:bedrows,function(x){ c(leftwinstarts+curbed[x,start],genewinstarts[[x]],rightwinstarts+curbed[x,stop]) } )),
		"V3"=unlist(lapply(1:bedrows,function(x){ c(leftwinends+curbed[x,start], genewinends[[x]],rightwinends+curbed[x,stop]) } )),
		"V4"=rep(1:bedrows,each=numwindows+numflankwindows*2)
	)
	covbedname<-paste(bedname,".covbed",sep="")
	#return covbed
	write.tsv(covbed,file=covbedname)
	system(paste("sort -T . -S 10000000000b -k1,1 -k2,2n",covbedname,"-o",covbedname))
	return(covbedname)
}
bed.sort			<- function( bedfile ){
	library(tools)
	ext<-file_ext(bedfile)
	system(paste("sort -T . -k1,1 -k2,2n",bedfile,"-o",bedfile))
	return(bedfile)
}
bed.parselengths		<- function( fragfile,brks=c(0,70,140,200),cores=1 ){
	
	#outputnames<-gsub("[[:punct:]]","",removeext(fragfiles))
	library(parallel)
	library(tools)
	if(cores=="max"){cores=detectCores()-1}
	if(length(fragfile) > 1){stop("bed.parselengths can only take 1 file")}
	numbreaks<-length(brks)
	brknames<-formatC(brks,width=nchar(brks[numbreaks]),format="d",flag="0")
	brknames[which(is.infinite(brks))]<-"Inf"
	print(brknames)
	ext<-file_ext(fragfile)
	fragname<-basename(removeext(fragfile))
	numfrags<-filelines(fragfile)
	scalar<-1000000/numfrags
	if(numbreaks==1){stop("need two or more breakpoints")}
	cat(fragname,": ",numfrags/1000000," million fragments\n",sep="")
	#parse fragments by length
	outnames<-unlist(mclapply(1:(numbreaks-1),function(i){
		cat(fragname,": ","processing fragments of length ",brks[i],"-",brks[i+1],"\n",sep="")
		subfragname<-paste0(fragname,"_",brknames[i],"-",brknames[i+1],".bed",sep="")
		if(brks[i+1]==Inf){
			print(paste("awk '$3-$2 >=",brks[i],"'",fragfile,">",subfragname))
			system(paste("awk '$3-$2 >=",brks[i],"'",fragfile,">",subfragname))
		} else{
			system(paste("awk '$3-$2 >=",brks[i],"&& $3-$2 <",brks[i+1],"'",fragfile,">",subfragname))
		}
		numsubfrags<-filelines(subfragname)
		cat(fragname,": ",numsubfrags,"/",numfrags," found (",100*numsubfrags/numfrags,"%)\n",sep="")
		return(subfragname)
	},mc.cores=cores))
	return(outnames)
}
bed.centers		<- function( bedfile ){
	if(length(bedfile) > 1){stop("bed.centers can only take 1 file")}
	#MAKE FILE OF FRAGMENT CENTERS
	outname<-paste(basename(removeext(bedfile)),"_centers.bed",sep="")
	cat("calculating fragment centers\n")
	system(paste("awk '{a=int(($2+$3)/2+0.5); $2=a; $3=a+1;print}' OFS='\t' ",bedfile," | sort -T . -k1,1 -k2,2n > ",outname,sep=""))
	return(outname)
}
bed.windowcov		<- function( bedfile, windowbed="", scalar="auto", windowsize=25, stepsize=windowsize, genome="hg19"){
	
	#check if only 1 file
	if(length(bedfile) > 1){stop("bed.windowcov can only take 1 file")}
	
	#get base name
	bedname<-basename(removeext(bedfile))
	outname<-paste(basename(removeext(bedfile)),"_win",windowsize,if(stepsize<windowsize){paste("_step",stepsize,sep="")},".bg",sep="")
	overlapsize<-(windowsize-stepsize)/2
	lsub=floor(overlapsize)
		
	#calculate scaling factor
	if(scalar=="auto"){
		cat(bedname,": calculating scalar\n")
		scalar=1000000/filelines(bedfile)
	}
	
	#make windows to calculate coverage over
	if(windowbed==""){
		cat(bedname,": making window file\n")
		wbedfile<-bedfile
		if(stepsize < windowsize){
			expand=round((windowsize-stepsize)/2)
			wbedfile<-bed.slop(wbedfile,expand,expand,genome=genome)
			wbedfile<-bed.merge(wbedfile,flank=expand)
		}
		windowbed=bed.makewindows(wbedfile,windowsize=windowsize,stepsize=stepsize)
	}
	#calculate coverage over windows
	cat(bedname,": calculating coverage\n")
	#system(paste("bedtools coverage -counts -a",bedfile,"-b",windowbed,"| awk '{print $1,$2,$3,$4*",scalar,"}' OFS='\\t' | sort -T . -k1,1 -k2,2n >",outname))
	
	system(paste(
		"bedtools coverage -counts -a",
		bedfile,
		"-b",
		windowbed,
		"| awk '{print $1,$2,$3,$4*",
		scalar,
		"}' OFS='\\t' |",
		if(stepsize<windowsize){paste("awk '{print $1,$2+",lsub,",$2+",lsub,"+",stepsize,",$4}' OFS='\t' |")},
		"sort -T . -k1,1 -k2,2n >",
		outname
	))
	
	
	cat(bedname,": making bigWig\n")
	outname<-bedGraphToBigWig(outname,genome=genome)
	return(outname)
}
bed.utrs			<- function( bedfile ){
}
bed.closest		<- function( bed1, bed2, strand=TRUE ){
	
	
	#ADD:
	# check if files exist
	# check if bed2 has a name column
	library(tools)
	ext<-file_ext(bed1)
	bedname<-basename(removeext(bed1))
	refname<-basename(removeext(bed2))
	curbed<-read.tsv(bed1)
	
	
	curbed$V4<-readLines(pipe(paste("cut -f 1,2,3",bed1,"| bedtools closest -t first -a /dev/stdin -b",bed2,"| cut -f 7")))
	if(strand==TRUE){
		curbed$V5<-1
		curbed$V6<-readLines(pipe(paste("cut -f 1,2,3",bed1,"| bedtools closest -t first -a /dev/stdin -b",bed2,"| cut -f 9")))
	}
	
	
	outname<-paste(bedname,"_closest-",refname,".",ext,sep="")
	write.tsv(curbed,file=outname)
	return(outname)
}
# ####################################
#      BEDTOOLS WRAPPER FUNCTIONS    #
# ####################################
bed.intersect		<- function( a, b, type="-u", fraction="", output="file", sorted=FALSE, input="file"){
	#TAKES (A) BED FILENAME AND KEEPS ONLY FEATURES THAT TOUCH (B), RETURNS RESULT OBJECT
	#if(input=="object"){
	#	bed1name<-deparse(substitute(a))
	#	bed2name<-deparse(substitute(b))
	#	write.tsv(paste(a,"*.bed",sep=""),file=bed1name)
	#	write.tsv(paste(b,"*.bed",sep=""),file=bed2name)
	#}
	
	
	extraargs<-""
	if(type != ""){extraargs<-type}
	if(fraction != ""){extraargs<-paste(extraargs,"-f",fraction)}
	if(sorted == TRUE){extraargs<-paste(extraargs,"-sorted")}
	
	
	if(input=="file"){
		bed1name<-basename(removeext(a))
		bed2name<-basename(removeext(b))
	}
	if(output=="object"){
		read.delim(pipe(paste("bedtools intersect ",extraargs," -a ",a," -b ",b,sep="")),header=FALSE,stringsAsFactors=FALSE)
	}
	if(output=="file"){
		filename<-paste(bed1name,"_x_",bed2name,".bed",sep="")
		system(paste("bedtools intersect ",extraargs," -a ",a," -b ",b," > ",filename,sep=""))
		return(filename)
	}
}
bed.merge		<- function( bedfile, flank=0, operation="none"){
    library(tools)
    ext<-file_ext(bedfile)
    if(flank==0){suffix=""}
    else{suffix=flank}
    if(operation=="none"){moreargs=""}
    else{
	suffix=c(suffix,operation)
	moreargs=c("-scores",operation)
    }
    
    outname<-paste(basename(removeext(bedfile)),"_merged",suffix,".",ext,sep="")
    
    system(paste("bedtools merge -d",flank,moreargs,"-i",bedfile,">",outname))
    return(outname)
}
bed.makewindows		<- function( bedfile, windowsize=25, stepsize=windowsize, mergebed=TRUE, mergeflank=500, outname="default", genome=FALSE){
	cat(bedfile,": making windows\n")
	if(outname=="default"){outname<-paste(basename(removeext(bedfile)),"_win",windowsize,".bed",sep="")}
	if(mergebed==TRUE & genome==FALSE){
		bedfile<-bed.merge(bedfile,flank=mergeflank)
	}
	if(windowsize>stepsize){
		extraargs<-"| awk 'x !~ $3; {x=$3}'"
	}
	else{
		extraargs<-""
	}
	if(genome==TRUE){
		bedfile<-getgenomefile(bedfile)
		inputarg="-g"
	} else{
		inputarg="-b"
	}
	print(paste("bedtools makewindows -w",windowsize,"-s",stepsize,inputarg,bedfile,extraargs,">",outname))
	
	system(paste("bedtools makewindows -w",windowsize,"-s",stepsize,inputarg,bedfile,extraargs,">",outname))
	return(outname)
}
bed.genomecov		<- function( bedfile, covmode="-bg" , genome="hg19", scalar="auto", makebigwig=TRUE, bam=FALSE ){
	if(length(bedfile) > 1){stop("bed.genomecov can only take 1 file")}
	if(scalar=="auto" & bam == FALSE){cat(bedfile,": counting fragments\n");scalar<-1000000/filelines(bedfile)}
	if(scalar=="auto" & bam == FALSE){cat(bedfile,": counting fragments\n");scalar<-1000000/bam.count(bedfile)}
	if(covmode=="-d"){pipes<-"| awk '{print $1,$2,$2+1,$3}' OFS='\t'"}
	if(covmode=="-dz"){pipes<-"| awk '{print $1,$2+1,$2+2,$3}' OFS='\t'"}
	if(covmode=="-bg"){pipes<-""}
	outname<-paste(basename(removeext(bedfile)),".bg",sep="")
	cat(bedfile,": calculating genome coverage\n")
	if(bam==TRUE){inputarg="-ibam"} else{inputarg="-i"}
	system(paste("bedtools genomecov",covmode,"-g",getgenomefile(genome),"-scale",scalar,inputarg,bedfile,pipes,">",outname))
	cat(bedfile,": genome coverage complete\n")
	if(makebigwig==TRUE){outname<-bedGraphToBigWig(outname,genome=genome)}
	return(outname)
}
bed.coverage		<- function( bedfile, covbedfile, numwindows, input="file", output="file"){
	if(length(bedfile) > 1){stop("bed.coverage can only take 1 file")}
	covlist<-read.delim(pipe(paste("bedtools coverage -counts -a ",bedfile," -b ",covbedfile," | sort -V -T . -k4,4n -k2,2n | cut -f 5",sep="")),header=FALSE,stringsAsFactors=FALSE)
	covlist<-t(matrix(covlist$V1,nrow=numwindows))
	covlist
}
bed.slop			<- function( bedfile, expandleft, expandright, genome="hg19" ){
	library(tools)
	bedname<-basename(removeext(bedfile))
	ext<-file_ext(bedfile)
	outname<-paste(basename(removeext(bedfile)),"_sl",expandleft,"_sr",expandright,".",ext,sep="")
	system(paste("bedtools slop","-i",bedfile,"-g",getgenomefile(genome),"-l",expandleft,"-r",expandright,">",outname))
	return(outname)
}
# ####################################
#      BEDGRAPH FUNCTIONS            #
# ####################################
bg.uniondiff		<- function( bglist1, bglist2, pattern="", replacement="", cores="max" ){
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	if(pattern=="" | replacement==""){stop("YOU MUST SPECIFY 'pattern' AND 'replacement' FOR OUTPUT FILE NAMES")}
	bglist1names<-basename(bglist1)
	bglist2names<-basename(bglist2)
	outbgnames<-gsub(pattern,replacement,bglist1names)
	nametab<-data.frame("BG1"=bglist1names,"BG2"=bglist2names,"OUTPUT"=outbgnames,stringsAsFactors=FALSE)
	print(nametab)
	if(length(which(bglist1names %in% outbgnames)==TRUE)>0){stop("AT LEAST ONE OUTPUT FILE WILL REPLACE AN INPUT FILE")}
	cat("making unionbg\n")
	system(paste("bedtools unionbedg -i",paste(c(bglist1,bglist2),collapse=" "),"> tmpunionbg.tmp"))
	#cat(paste("bedtools unionbedg -i",paste(c(bglist1,bglist2),collapse=" "),"> tmpunionbg.tmp\n"))
	cat("calculating differences\n")
	mclapply(1:length(bglist1), function(x){
		#cat(paste("awk '{print $1,$2,$3,$",3+x,"-$",3+x+length(bglist1),"' OFS='\\t' tmpunionbg.tmp | cut -f 1,2,3,4 > ",outbgnames[x],"\n",sep=""))
		system(paste("awk '{print $1,$2,$3,$",3+x,"-$",3+x+length(bglist1),"}' OFS='\\t' tmpunionbg.tmp | cut -f 1,2,3,4 > ",outbgnames[x],sep=""))
	},mc.cores=cores)
}
bg.diff			<- function( bglist1, bglist2, operation="log2", pattern="", replacement="", cores="max" ){
	if(length(bglist1) != length(bglist2)){stop("number of files in bg lists don't match")}
	numbgs<-length(bglist1)
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	if(pattern=="" | replacement==""){stop("YOU MUST SPECIFY 'pattern' AND 'replacement' FOR OUTPUT FILE NAMES")}
	outbgnames<-gsub(pattern,replacement,basename((bglist1)))
	nametab<-data.frame("BG1"=bglist1,"BG2"=bglist2,"OUTPUT"=outbgnames,stringsAsFactors=FALSE)
	print(nametab)
	if(length(which(bglist1 %in% outbgnames)==TRUE)>0){stop("AT LEAST ONE OUTPUT FILE WILL REPLACE AN INPUT FILE")}
	cat("loading files\n")
	bg1<-mclapply(bglist1,read.tsv,mc.cores=cores)
	bg2<-mclapply(bglist2,read.tsv,mc.cores=cores)
	bg1lines<-unlist(lapply(bg1,nrow))
	bg2lines<-unlist(lapply(bg2,nrow))
	if(bg1lines != bg2lines){stop("files do not match in rows")}
	cat("calculating differences\n")
	bg3<-mclapply(1:numbgs,function(x){
		if(operation=="difference"){
			bg1[[x]][,4]<-bg1[[x]][,4]-bg2[[x]][,4]
		}
		if(operation=="sum"){
			bg1[[x]][,4]<-bg1[[x]][,4]+bg2[[x]][,4]
		}
		if(operation=="log2"){
			bg1[[x]][,4]<-log2(bg1[[x]][,4]/bg2[[x]][,4])
		}
		if(operation=="mean" | operation=="average"){
			bg1[[x]][,4]<-(bg1[[x]][,4]+bg2[[x]][,4])/2
		}
		write.tsv(bg1[[x]],file=outbgnames[x])
	},mc.cores=cores)
	return(outbgnames)
}
bg.ops <- function(bglist , outputname , operation = "mean" , cores="max"){
	numbgs<-length(bglist)
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	bgs<-mclapply(bglist,read.tsv,mc.cores=cores)
	scores<-as.data.frame(lapply(bgs,"[",4))
	if(operation=="mean"){
		outscores<-rowMeans(scores,na.rm=T)
	}
	outbg<-data.frame(bgs[[1]])
	outbg$V4<-outscores
	write.tsv(outbg,file=outputname)

}
bg.qnorm			<- function( bgfiles, cores="max", comparable=TRUE ){
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	numfiles<-length(bgfiles)
	bgnames<-basename(removeext(bgfiles))
	outbgnames<-paste(bgnames,"_qnorm.bg",sep="")
	cat("loading files\n")
	bglist<-mclapply(bgfiles,read.tsv,mc.cores=cores)
	cat("determining if files are comparable\n")
	#if(unique(unlist(lapply(bglist,nrow))) > 1){comparable=FALSE}else{comparable=TRUE}
	if(comparable==FALSE){
		cat("making unionbg\n")
		allscores<-read.delim(pipe(paste("bedtools unionbedg -filler noscore -i ",paste(bgfiles,collapse=" ")," | grep -v 'noscore'",collapse=" ")),stringsAsFactors=FALSE,header=FALSE)
		
		cat("normalizing data\n")
		for(i in 2:numfiles){
			allscores[,3+i][order(allscores[,3+i])]<-allscores[,4][order(allscores[,4])]
		}
		
		cat("saving normalized data\n")
		for(i in 1:numfiles){
			write.tsv(cbind(allscores[,1:3],allscores[,3+i]), file=outbgnames[i])
		}
	}
	if(comparable==TRUE){
		cat("files are comparable\n")
		rankscores<-bglist[[1]][order(bglist[[1]][,4],na.last=F),4]
		bglist<-mclapply(1:numfiles, function(x){
			bglist[[x]][order(bglist[[x]][,4],na.last=F),4]<-rankscores
			write.tsv(bglist[[x]],file=outbgnames[x])
		},mc.cores=cores)
	}
}
bg.hist			<- function( bgfiles,X=TRUE,cores="max",xlims=NULL , legendnames=basename(removeext(bgfiles)),plotcolors=rainbow(length(bgfiles))){
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	bgnames<-basename(removeext(bgfiles))
	numbgs<-length(bgfiles)
	densities<-mclapply(1:numbgs,function(x) density(as.numeric(readLines(pipe(paste("cut -f 4",bgfiles[x])))),na.rm=T),mc.cores=cores)
	ymax<-max(unlist(lapply(1:numbgs,function(x) densities[[x]]$y)))
	x<-unlist(lapply(1:numbgs,function(x) densities[[x]]$x))
	if(is.null(xlims)){xlims<-quantile(x,probs=c(0.05,0.95))}
	if(X==FALSE){pdf(file=paste(prefix,"_","densities.pdf",sep=""))}
	plot(0,type="n",xlim=xlims,ylim=c(0,ymax),ylab="Density",xlab="score")
	for(i in 1:numbgs){
		lines(densities[[i]],col=plotcolors[i],lwd=3)
	}
	legend("topleft",legend=legendnames, col=plotcolors, lwd=2)
	if(X==FALSE){dev.off()}
}
bg.dist			<- function( bgfile, bedfiles, cutoff = 1, piefeatures=c("intergenic","promoter5","5utr","CDS","3utr","promoter3"),cores="max" ){
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	
	numbeds<-length(bedfiles)
	bednames<-basename(removeext(bedfiles))
	
	cat("finding scores in annotations\n")
	annoscores<-mclapply(1:numbeds, function(a){
		as.numeric(readLines(pipe(paste("bedtools intersect -u -a",bgfile,"-b",bedfiles[a]," | cut -f 4"))))
	},mc.cores=cores)
	annoscores[[numbeds+1]]<-as.numeric(readLines(pipe(paste("cut -f 4",bgfile))))
	print(lapply(annoscores,length))
	
	bednames<-c(bednames,"all")
	numbeds<-numbeds+1
	
	#tabulate HS and HR bases
	cat("tabulating score summaries\n")
	scoredistmat<-matrix(ncol=numbeds,nrow=3)
	colnames(scoredistmat)<-bednames
	row.names(scoredistmat)<-c("neither","HS","HR")
	
	#counttotals<-unlist(mclapply(annoscores, length, mc.cores=cores) )
	#scoredistmat[2,]<-unlist(mclapply(1:numbeds,function(x) length(which(annoscores[[x]] >= cutoff)),mc.cores=cores ))
	#scoredistmat[3,]<-unlist(mclapply(1:numbeds,function(x) length(which(annoscores[[x]] <= -1*cutoff )),mc.cores=cores ))
	#scoredistmat[1,]<-counttotals-scoredistmat[2,]-scoredistmat[3,]
	#scoredistmat<sweep(scoredistmat,1, counttotals, "/")
	#scoredistmat<-scoredistmat[3:1,]
	#print(scoredistmat)
	
	
	pdf(file=paste(basename(removeext(bgfile)),"_cutoff",cutoff,"_annoscoredist.pdf",sep=""))
	
	cat("barplot\n")
	#barplot(scoredistmat,las=2,legend=rownames(scoredistmat),ylab="% genomic space")
	
	
	cat("density plot\n")
	rbc<-rainbow(numbeds)
	plot(0,type="n",ylim=c(0,1),xlim=c(-3,3),xlab="Light - Heavy Score",ylab="density")
	for(p in 1:numbeds){
		lines(density(annoscores[[p]],from=-3,to=3),col=rbc[p])
	}
	legend("topleft",legend=bednames, col=rbc, lwd=3)
	
	#cat("boxplot\n")
	#boxplot(annoscores,names=annos,las=2)
	
	cat("pie\n")
	pieannos<-which(bednames %in% piefeatures)
	pie(counttotals[pieannos],labels=paste(bednames[pieannos],  round(100*(counttotals[pieannos]))  /  sum(counttotals[pieannos])    ,"%"),clockwise=TRUE,main=paste("representation of each annotation out of the", sum(counttotals[pieannos])/1000000,"Mb"),col=rainbow(length(piefeatures)))
	
	dev.off()
	cat("finding max length of scores\n")
	maxlength<-max(unlist(mclapply(annoscores,length,mc.cores=detectCores())),na.rm=TRUE)
	cat("adjusting list lengths\n")
	annoscores<-mclapply(1:length(annoscores),function(x){ length(annoscores[[x]])=maxlength; annoscores[[x]]},mc.cores=detectCores())
	cat("making table of annoscores\n")
	annoscores<-do.call(cbind,annoscores)
	colnames(annoscores)<-bednames
	cat("saving table\n")
	write.table(annoscores,file="annoscores.txt",row.names=FALSE,col.names=TRUE,quote=FALSE,sep="\t")
}
bg.window		<- function( bgfile, windowbed="", operation="sum" , mergebed=TRUE, genome="hg19", printzero=TRUE , windowsize=25, stepsize=windowsize, filler=0 ){
	bgname<-basename(removeext(bgfile))
	if(windowbed==""){
		cat(bgname,": windowing bedGraph regions\n")
		windowbed<-bed.makewindows(bgfile,windowsize=windowsize, stepsize=stepsize )
		outname<-paste(bgname,"_win",windowsize,".bg",sep="")
		prewin=FALSE
	}
	else{
		prewin=TRUE
		outname<-paste(bgname,"_win_",basename(removeext(windowbed)),".bg",sep="")
	}
	cat(bgname,": calculating",operation,"over window\n")
	
	windowedbg<-bg.map(bgfile,windowbed,operation=operation,filler=filler, outname=outname , printzero=printzero )
	if(stepsize < windowsize){
		overlapsize<-(windowsize-stepsize)/2
		lsub=floor(overlapsize)
		outname<-paste(removeext(outname),"_win",windowsize,"_step",stepsize,".bg",sep="")
		system(paste("awk '{print $1,$2+",lsub,",$2+",lsub,"+",stepsize,",$4}' OFS='\t' ",windowedbg," > ",outname,sep=""))
		windowedbg<-outname
	}
	cat(bgname,": making bigWig\n")
	outname<-bedGraphToBigWig(outname,genome=genome)
	return(windowedbg)
}
bg.loess			<- function( bgfile, lspan=0.05, cores="max" ){
	library(parallel)
	library(gtools)
	if(cores=="max"){cores=detectCores()-1}
	bgname<-basename(removeext(bgfile))
	cat(bgname,": loading file\n")
	curbg<-read.tsv(bgfile)
	curbg$V4[is.infinite(curbg$V4)]<-NA
	chroms<-unique(curbg$V1)
	chroms<-mixedsort(chroms)
	cat(bgname,": reference chromosome for span will be",chroms[1],"\n")
	numchroms<-length(chroms)
	pointsperchrom<-unlist(lapply(1:numchroms, function(x) nrow(curbg[which(curbg$V1==chroms[x]),])))
	outname<-paste(bgname,"_loess",gsub("\\.","-",lspan),".bg",sep="")
	cat(bgname,": smoothing chromosome data\n")
	lscores<-mclapply(1:numchroms, function(i){
		curchrom<-curbg[which(curbg$V1==chroms[i]),]
		if(i==1){clspan=lspan} else{clspan=lspan*(pointsperchrom[i]/pointsperchrom[1])}
		goodpoints<-which(complete.cases(curchrom[,4]))
		y<-curchrom[goodpoints,4]
		x<-curchrom[goodpoints,2]
		curchrom[goodpoints,4]<-loess(y~x,span=clspan)$fitted
		curchrom
	},mc.cores=cores,mc.preschedule=FALSE)
	curbg<-do.call(rbind,lscores)
	curbg<-curbg[order(curbg$V1,curbg$V2),]
	cat(bgname,": saving file\n")
	write.tsv(curbg,file=outname)
}
bg.plotchroms		<- function( bgfiles, outpdfname, overlay=TRUE, linewidth=2, ylabel="log2 ( early / late )" , ylims=c("auto","auto") , plotcolors=rainbow(length(bgfiles)) , legendnames = basename(removeext(bgfiles)) , cores="max", pdfdims=c(25,5)){
	library(parallel)
	library(gtools)
	if(cores=="max"){cores=detectCores()-1}
	numbgs<-length(bgfiles)
	bglist<-mclapply(bgfiles,read.tsv,mc.cores=cores)
	chroms<-mixedsort(unique(bglist[[1]][,1]))
	numchroms<-length(chroms)
	pdf(file=outpdfname, width=pdfdims[1], height=pdfdims[2])
	for(i in 1:numchroms){
		curchroms<-lapply(1:numbgs, function(z) bglist[[z]][which(bglist[[z]][,1]==chroms[i]),])
		chromsize<-max(curchroms[[1]][,3])
		allscores<-unlist(lapply(1:numbgs, function(z) curchroms[[z]][,4] ) )
		autoylims<-c(min(allscores,na.rm=TRUE),max(allscores,na.rm=TRUE))
		
		if(overlay==TRUE){
			plot(
				0,
				type="n",
				ylim=if(ylims[1]=="auto"){autoylims} else{ylims},
				xlim=c(0,chromsize),
				ylab=ylabel,
				xlab="chromosome coordinate (bp)",
				main=chroms[i],
				
			)
			abline(h=0,lwd=1,col="black")
			for(j in 1:numbgs){
				lines(curchroms[[j]][,2],curchroms[[j]][,4],lwd=linewidth, col=plotcolors[j])
			}
			legend("topright",legend=legendnames,col=plotcolors,lwd=4)
		}
		if(overlay==FALSE){
			par(mfrow=c(numbgs,1),oma=c(4,2,4,0),mar=c(0,4,0.75,10))
			for(j in 1:numbgs){
				plot(
					curchroms[[j]][,2],
					curchroms[[j]][,4],
					type="h",
					ylim=if(ylims[1]=="auto"){autoylims} else{ylims},
					xlim=c(0,chromsize),
					ylab="",
					xlab=if(j==numbgs){"chromosome coordinate (bp)"} else{""},
					xaxt=if(j==numbgs){'s'} else{'n'},
					col=plotcolors[j]
					
				)
				axis(side=1,labels=F)
				grid(lty="solid",ny=NA)
				abline(h=0,lwd=1,col="black")
				mtext(legendnames[j],side=4,las=1)
			}
			mtext(ylabel,side=2,outer=T,cex=1.5)
			mtext(chroms[i],side=3,outer=T,cex=1.5)
		}
	}
	dev.off()
}
bg.selectscores		<- function( bgfile, range=c(1,Inf) ){
	library(tools)
	bgname<-basename(removeext(bgfile))
	ext<-file_ext(bgfile)
	if(range[1]==-Inf){
		outname<-paste(bgname,"_lt",range[2],".",ext,sep="")
		system(paste("awk '$4 <=",range[2],"'",bgfile,">",outname))
	}
	if(range[2]==Inf){
		outname<-paste(bgname,"_gt",range[1],".",ext,sep="")
		system(paste("awk '$4 >=",range[1],"'",bgfile,">",outname))
	}
	if(range[1] != -Inf & range[2] != Inf){
		outname<-paste(bgname,"_gt",range[1],"_lt",range[2],".",ext,sep="")
		system(paste("awk '$4 >=",range[1],"'",bgfile,">",outname))
	}
	return(outname)
}
bg.threshold		<- function( bgfile, segmentnames=c("HS","HR"), segmergedist=75,cutoff=1, positive=TRUE, negative=FALSE){
	
	bgname<-basename(removeext(bgfile))
	
	cat("finding HS and HR regions\n")
	if(positive==TRUE){
		hsname<-paste(bgname,"_Min",cutoff,".bg",sep="")
		hs<-bg.selectscores(bgfile,range=c(cutoff,Inf))
		hs<-bed.merge(hs,flank=segmergedist)
		hs<-bg.map(bgfile,hs,operation="max",outname=hsname)
	}
	if(negative==TRUE){
		hrname<-paste(bgname,"_Max",cutoff,".bg",sep="")
		hr<-bg.selectscores(bgfile,range=c(-Inf,-1*cutoff))
		hr<-bed.merge(hr,flank=segmergedist)
		hr<-bg.map(bgfile,hr,operation="min",outname=paste(bgname,"_HR",cutoff,".bg",sep=""))
	}
	outnames<-c(if(positive==TRUE){hsname},if(negative==TRUE){hrname})
	return(outnames)
}
bg.averages		<- function( bedfile, covbedfile, numwindows, filler, input="file", output="file"){
	bedfile<-bed.sort(bedfile)
	#covbedfile<-bed.sort2(covbedfile)
	covlist<-read.delim(pipe(paste("bedtools map -c 4 -o mean -null \"",filler,"\" -a ",covbedfile," -b ",bedfile," | sort -T . -k4,4n -k2,2n | cut -f 5",sep="")),header=FALSE,stringsAsFactors=FALSE)
	covlist<-t(matrix(covlist$V1,nrow=numwindows))
	covlist
}
bg.map			<- function( bgfile, bedfile, operation="sum", filler=0, printzero = TRUE , outname="default" ){
    if(outname=="default"){outname<-paste(basename(removeext(bgfile)),"_map_",basename(removeext(bedfile)),"_",operation,".bg",sep="")}
    system(paste("bedtools map -c 4 -o",operation,"-null",filler,"-a",bedfile,"-b",bgfile,if(printzero==F){"| awk '{if ($4 != 0) print'}"},">",outname))
    return(outname)
}
bg.cors			<- function( bgfiles, outpdfname, bgnames=basename(removeext(bgfiles)), cores="max"  ){
	library(parallel)
	library(gplots)
	if(cores=="max"){cores=detectCores()-1}
	bgnames<-basename(removeext(bgfiles))
	numbgs<-length(bgfiles)
	
	colramp <- colorRampPalette(c("red","black","green"), space = "rgb")
	brks=seq(-1,1,by=2/100)
	hcolors<-colramp(100)
	
	#make list of matrices
	cat("reading in bedgraphs\n")
	bglist<-mclapply(bgfiles,read.tsv,mc.cores=cores)
	
	#make pairwise matrix correlations
	cat("calculating global correlations\n")
	pairs<-expand.grid(1:numbgs,1:numbgs)
	bgcors<-as.numeric(unlist(mclapply(1:nrow(pairs),function(x) cor(bglist[[pairs[x,1]]][,4] , bglist[[pairs[x,2]]][,4] , use="complete.obs" ),mc.cores=cores)))
	cormat<-matrix(bgcors,nrow=numbgs,ncol=numbgs)
	row.names(cormat)<-bgnames
	colnames(cormat)<-bgnames
	#for(i in 1:nrow(pairs)){
	#	cormat[pairs[i,1],pairs[i,2]]<-matcors[i]
	#}
	#globaltablename<-paste(matnames[1],"_globalcors.tsv",sep="")
	#cat("saving global correlation table to",globaltablename,"\n")
	#write.table(cormat,file=globaltablename,sep="\t",row.names=TRUE,col.names=TRUE,quote=FALSE)
	
	#plot pairwise global correlations as a heatmap
	cat("saving bg cross-correlation heatmap to",outpdfname,"\n")
	pdf(file=outpdfname)
	heatmap.2(cormat,Rowv=NA,Colv=NA,dendrogram="none",trace="none",col=hcolors,breaks=brks,main="Global cross-correlations")
	dev.off()
}
bg.transform		<- function( bgfiles,operation="unlog2",cores="max"){
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	numbgs<-length(bgfiles)
	bgnames<-basename(removeext(bgfiles))
	outnames<-paste(bgnames,"_",operation,".bg",sep="")
	bgs<-mclapply(bgfiles,read.tsv,mc.cores=cores)
	bgs<-mclapply(1:numbgs,function(x){
		if(operation=="unlog2"){
			bgs[[x]][,4]=2^bgs[[x]][,4]
		}
		if(operation=="log2"){
			bgs[[x]][,4]=log2(bgs[[x]][,4])
		}
		if(operation=="sqrt"){
			bgs[[x]][,4]=sqrt(bgs[[x]][,4])
		}
		if(operation=="log10" | operation=="log"){
			bgs[[x]][,4]=log(bgs[[x]][,4])
		}
		if(operation=="unlog10"){
			bgs[[x]][,4]=10^bgs[[x]][,4]
		}
		write.tsv(bgs[[x]],file=outnames[x])
	},mc.cores=cores,mc.preschedule=FALSE)
	return(outnames)
}
# ####################################
#      VPLOT FUNCTIONS               #
# ####################################
vplot.make		<- function( fragmentfiles,features, regionsize=1000, windowsize=10, prunefeaturesto="", featurecenter=TRUE, strand=FALSE, start=2, stop=3, ylims=c(0,200), narrowpeak=FALSE){
	library(tools)
	library(parallel)
	numwindows<-regionsize/windowsize
	
	for(i in 1:length(features)){
		
		featname<-basename(removeext(features[i]))
		
		cat("\nstarting",features[i],"\n")
		if(narrowpeak==TRUE){   # ADD CODE TO CHECK IF REALLY NARROWPEAK (INSTEAD OF -1 IN COL 10)
			npbedname<-paste(basename(removeext(features[i])),".npbed",sep="")
			system(paste("awk '{$2=$2+$10; $3=$2+1;print}' OFS='\t'",features[i],">",npbedname))
			features[i]<-npbedname
		}
		
		numfeats<-filelines(features[i])
		cat(numfeats,"features\n")
		
		cat("making windowbed of features\n")
		features[i]<-bed.recenter(features[i],regionsize,center=featurecenter,strand=strand,start=start,stop=stop, input="file")
		
		if(prunefeaturesto != ""){
			cat("pruning features\n")
			features[i]<-bed.intersect(features[i],prunefeaturesto,input="file",output="file")
		}
		
		cat("reading in features\n")
		feats<-read.tsv(features[i])
		numfeats<-nrow(feats)
		cat(numfeats,"features\n")
		
		featnames<-feats$V4
		if(strand==TRUE){
			negrows<-which(feats[,6]=="-")
		}
		
		fragments<-fragmentfiles
		cat("making covbed\n")
		features[i]<-bed.split(features[i],regionsize,windowsize,input="file")
		
		for(j in 1:length(fragments)){
			fragname<-basename(removeext(fragments[j]))
			cat("\nstarting",fragname,"\n\t")
			
			
			
			if(grepl("tp://",fragments[j]) == TRUE ){
				cat("downloading file to current directory\n")
				system(paste("wget",fragments[j]))
				fragments[j]<-basename(fragments[j])
			}
			if(file_ext(fragments[j]) == "gz"){
				cat("extracting with gunzip to current directory\n")
				system(paste("gunzip -c",fragments[j],">",fragname ) )
				fragments[j]<-paste(basename(removeext(fragments[j])),sep="")
				fragname<-basename(removeext(fragments[j]))
				scoretype="bed"
			}
			if(file_ext(fragments[j]) %in% c("cbed","bed","cfbg","broadPeak","broadpeak","narrowPeak","narrowpeak") ==TRUE){
				scoretype="bed"
			}
			if(file_ext(fragments[j]) %in% c("bg","bedgraph","bedGraph") ==TRUE){
				scoretype="bedgraph"
			}
			if(file_ext(fragments[j]) %in% c("bb","bigbed","bigBed") == TRUE){
				cat("converting bigBed to bed\n")
				bigBedToBed(fragments[j])
				fragments[j]<-paste(fragname,".bed",sep="")
				scoretype="bed"
			}
			if(file_ext(fragments[j]) %in% c("bw","bigwig","bigWig") == TRUE){
				cat("converting bigWig to bedGraph\n")
				bigWigToBedGraph(fragments[j])
				fragments[j]<-paste(fragname,".bg",sep="")
				scoretype="bedgraph"
			}
			
			
			fragname<-basename(removeext(fragments[j]))
			
			
			
			
			vplotname<-paste(fragname,"_",featname,"_f",numfeats,".fmat",sep="")
			
			if(prunefragments==TRUE){
				cat("pruning fragments\n\t")
				fragments[j]<-bed.intersect(fragments[j],features[i],input="file",output="file")
			}
			
			cat("counting fragments...")
			numfrags<-filelines(fragments[j])
			cat(numfrags,"\n")
			



			cat("calculating fragment coverage around features\n\t")
			fmat<-read.delim(pipe(paste("bedtools map -c 4 -o collapse -null \"NA\" -a ",features[i]," -b ",fragments[j]," | sort -V -T . -k4,4 -k2,2n | cut -f 5",sep="")),header=FALSE,stringsAsFactors=FALSE)
			fmat<-t(matrix(fmat$V1,nrow=numwindows))
			
			if(strand==TRUE){
				cat("adjusting for strand\n")
				fmat[negrows,1:numwindows]<-fmat[negrows,numwindows:1]
			}
			
			#save fmat
			row.names(fmat)<-featnames
			write.mat(fmat,file=paste(basename(removeext(fragments[j])),"_",basename(featname),".fmat",sep=""))
			
			cat("processing fragments\n\t")
			fragsizes<-mclapply(1:numwindows,function(x) as.numeric(na.omit(as.numeric(unlist(strsplit(paste(fmat[,x],collapse=","),","))))),mc.cores=detectCores())
			
			cat("plotting fragment sizes in matrix\n\t")
			vmat<-matrix(0,ncol=numwindows,nrow=ylims[2]-ylims[1])
			for(h in 1:numwindows){
				vmat[,h]<-hist(fragsizes[[h]],breaks=ylims[1]:ylims[2],plot=F)$counts
			}
			
			#normalize counts
			if(rpm==TRUE){
				scalar<-1000000/numfrags
				vmat<-vmat*scalar
			}
			
			#vertically flip matrix
			vmat<-vmat[ylims[2]:1,]
			
			write.mat(,file=vplotname)
			vplot.draw(vplotname,png=TRUE)
			cat("vplot saved in",vplotname,"\n\n")
		}
	}
}
vplot.draw		<- function( vmat,hm.max="default",png=FALSE, plotcolors=c("black","green")){
	colramp <- colorRampPalette(plotcolors, space = "rgb")
	library(gplots)
	matname<-basename(removeext(vmat))
	m<-read.mat(vmat)
	if(hm.max=="default"){hm.max=as.numeric(round(quantile(m[m!=0],prob=0.99)))}
	filename<-paste(matname,"_max",round(hm.max),"_vplot.png",sep="")
	if(png==TRUE){
		png(file=filename,width=1000,height=1000)
	}
	heatmap.2(m,Colv=NA,Rowv=NA,dendrogram="none",trace="none",breaks=seq(0,hm.max,by=hm.max/100),labRow=NA,labCol=NA,col=colramp(100))
	if(png==TRUE){
		dev.off()
	}
}
vplot.rank		<- function( peakfiles, bgfiles, fragments, cellline="Gm12878", cores=16, np=TRUE, ... ){
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	cat("matching bedgraphs to peaks\n")
	bgfiles<-bgfiles[grep(cellline,bgfiles)]
	bgfiles<-bgfiles[grep("Rna",bgfiles,invert=T)]
	bgfiles<-bgfiles[grep("Tfbs",bgfiles)]
	peakfiles<-peakfiles[grep(cellline,peakfiles)]
	peakfiles<-peakfiles[grep("Rna",peakfiles,invert=T)]
	peakfiles<-peakfiles[grep("Tfbs",peakfiles)]
	bgnames<-basename(removeext(bgfiles))
	bgnames<-remove.prefix(bgnames,cellline)
	bgnames<-remove.suffix(bgnames,"Raw")
	bgnames<-remove.suffix(bgnames,"Sig")
	peaknames<-basename(removeext(peakfiles))
	peaknames<-remove.prefix(peaknames,cellline)
	peaknames<-remove.suffix(peaknames,"UniPk")
	peaknames<-remove.suffix(peaknames,"Pk")
	peaknames<-remove.suffix(peaknames,"Std")
	matches<-match(peaknames,bgnames)
	nobgs<-which(is.na(matches))
	matches<-na.omit(matches)
	cat(length(matches),"matches found\n")
	peakfiles<-peakfiles[-nobgs]
	bgfiles<-bgfiles[matches]
	numpeaks<-length(peakfiles)
	numbgs<-length(bgs)
	numfrags<-length(fragments)
	source("~/lus/mat.heatmap3.R")
	
	#for(p in 1:numpeaks){
	mclapply(1:numpeaks, function(p){
		
		dname<-mat.make( c(fragments, bgfiles[p]) , peakfiles[p], narrowpeak=np, fragmats=1:numfrags, cores=1, prunescores=T, featurecenter=T,prunefeaturesto="~/hg19/seqcap/seqcap_targets_merged.bed", maskbed="~/hg19/seqcap/seqcap_targets_merged.bed",regionsize=1000,closest="~/hg19/misc/scg3.bed" )
		hmbase<-basename(removeext(bgfiles[p]))
		hmname<-files(paste(dname,"/",hmbase,"*.mat10",sep=""))[1]
		hmnames<-files(paste(dname,"/*.mat10",sep=""))
		fmats<-files(paste(dname,"/*.fmat*",sep=""))
		mat.heatmap3( c(hmname,hmnames), fragmats=fmats,sorting=rep("mean,-50,50",2), plotcolors=c("deepskyblue black yellow"), cores=1 )
	},mc.cores=cores,mc.preschedule=F)
	#}
}
# ####################################
#      MATRIX FUNCTIONS              #
# ####################################
mat.make			<- function( scorefiles, features, closest=NULL, cores="max",meta=FALSE, metaflank=1000, maskbed=NULL,prunefeaturesto=NULL,rpm=TRUE,regionsize=2000,windowsize=10,start=2,stop=3,strand=TRUE,bgfiller=0,prunescores=FALSE,featurecenter=FALSE,suffix=NULL,scoremat=TRUE,fragmats=0, narrowpeak=FALSE){
        
        # TO DO
        # #######
        # add bed info to mat rownames
        # move tmp files to a tmp directory
        
        library(tools)
        #multicore setup
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	
	numwindows<-regionsize/windowsize
	numfeats<-length(features)
	
	#check parameters
	if(ceiling(numwindows)!=floor(numwindows)){stop("regionsize is not a multiple of windowsize")}
	if(ceiling(regionsize)!=floor(regionsize)){stop("regionsize must be an even number")}
	
	#process features
	for(i in 1:numfeats){
		
		featname<-basename(removeext(features[i]))
		
		#make directory for saving files
		dname<-paste(featname,"_mat",windowsize,sep="",collapse="")
		dname<-uniquefilename(dname)
		system(paste("mkdir",dname))
		
		if(is.null(closest) == FALSE){
			features[i]<-bed.closest(features[i],closest,strand=strand)
		}
		
		#recenter narrowpeaks
		if(file_ext(features[i]) %in% c("narrowPeak","narrowpeak","np") == TRUE & featurecenter==TRUE & narrowpeak==TRUE){
			cat("narrowPeak file detected\n")
			
			#check if peak location is in narrowPeak file and recenter features if OK
			headlines<-as.numeric(readLines(pipe(paste("head",features[i],"| awk '{print $10}'"))))
			if(length(which(headlines %in% (-1)>0))){
				cat("bad peak coordinate detected in narrowPeak file, treating as bed\n")
			} else{
				cat("peak coordinates detected in narrowPeak file, recentering bed features\n")
				outname<-paste(featname,"_recentered.np",sep="")
				system(paste("awk '{$2=$2+$10;$3=$2+$10+1;print}' OFS='\t' ",features[i]," | sort -T . -k1,1 -k2,2n > ",outname,sep=""))
				features[i]<-outname
			}
		}
		
		#create window bed for pruning
		cat("creating window bed\n")
		if(meta==FALSE){
			features[i]<-bed.recenter(features[i],regionsize,center=featurecenter,strand=strand,start=start,stop=stop,input="file" )
		}
		
		#prune features to specified file
		if(is.null(prunefeaturesto) == FALSE){
			cat("pruning features\n")
			features[i]<-bed.intersect(features[i],prunefeaturesto,input="file",output="file")
		}
		
		#copy processed features to output directory for later use
		system(paste("cp ",features[i]," ",dname,"/",sep=""))
		
		#read in bed and get dimensions
		cat("reading in bed and getting info\n")
		curbed<-read.tsv(features[i])
		
		if(meta==TRUE){
			curbed<-curbed[which(curbed[,2]>metaflank+1),]
			write.tsv(curbed,file=features[i])
		}
		
		bedcols<-ncol(curbed)
		bedrows<-nrow(curbed)
		
		# FEATURES SHOULD NOT BE MODIFIED AFTER THIS POINT #

		#find minus-stranded features
		if(bedcols < 6 & strand==TRUE){strand=FALSE;cat("WARNING: RUNNING WITH strand=FALSE BECAUSE NO STRAND COLUMN EXISTS\n")}
		if(strand==TRUE){negrows<-which(curbed[,6]=="-")}
		
		#make windows to calculate scores for matrix
		cat("making covbed\n")
		if(meta==FALSE){
			features[i]<-bed.split(features[i],regionsize,windowsize,input="file")
		} else{
			features[i]<-bed.split.meta(features[i],regionsize,windowsize, metaflank, input="file", start=start, stop=stop )
			numwindows<-(metaflank*2+regionsize)/windowsize
		}
		
		#make matrix of regions to (not) mask with NAs
		if(is.null(maskbed) == FALSE){
			cat("finding masking regions\n")
			maskmat<-bed.coverage(maskbed,features[i],numwindows)
			if(strand==TRUE){
				maskmat[negrows,1:numwindows]<-maskmat[negrows,numwindows:1]
			}
		}
		
		#assign row names to matrix
		cat("naming features\n")
		namebed<-curbed
		if(bedcols<4){
			namebed$V4<-1:bedrows
		}
		if(length(unique(namebed[,4])) != bedrows){
			namebed$V4<-paste(namebed$V4,1:bedrows,sep="-")
		}
		if(bedcols>12){
			colnames(namebed)[13]<-"symbol"
		} else{
			namebed$symbol<-namebed$V4
		}
		curbed[,4]<-paste(namebed$V4,namebed[,1],namebed$symbol,sep=";")
		rm(namebed)
		scores<-scorefiles
		

		
		#process each score file
		outs<-mclapply(1:length(scores), function(j) {
			
			scorename<-basename(removeext(scores[j]))
			
			#DOWNLOAD, EXTRACT, AND/OR FORMAT CONVERSION
			if(grepl("tp://",scores[j]) == TRUE ){
				cat(scorename,": downloading file to current directory\n")
				system(paste("wget",scores[j]))
				scores[j]<-basename(scores[j])
			}
			if(file_ext(scores[j]) == "gz"){
				cat(scorename,": extracting with gunzip to current directory\n")
				system(paste("gunzip -c",scores[j],">",scorename ) )
				scores[j]<-paste(basename(removeext(scores[j])),sep="")
				scorename<-basename(removeext(scores[j]))
			}
			if(file_ext(scores[j]) %in% c("wig","Wig") == TRUE){
				cat(scorename,": converting wig to bigWig\n")
				wigToBigWig(scores[j])
				scores[j]<-paste(scorename,".bw",sep="")
			}
			if(file_ext(scores[j]) %in% c("bb","bigbed","bigBed") == TRUE){
				cat(scorename,": converting bigBed to bed\n")
				bigBedToBed(scores[j])
				scores[j]<-paste(scorename,".bed",sep="")
			}
			if(file_ext(scores[j]) %in% c("bw","bigwig","bigWig") == TRUE){
				cat(scorename,": converting bigWig to bedGraph\n")
				bigWigToBedGraph(scores[j])
				scores[j]<-paste(scorename,".bg",sep="")
			}
			if(file_ext(scores[j]) %in% c("cbed","bed","cfbg","broadPeak","broadpeak","narrowPeak","narrowpeak") ==TRUE){
				scoretype="bed"
			}
			if(file_ext(scores[j]) %in% c("bg","bedgraph","bedGraph") ==TRUE){
				scoretype="bedgraph"
			}
			
			scorename<-basename(removeext(scores[j]))
			
			cat(scorename,": counting scores\n")
			numfrags<-filelines(scores[j])
			cat(scorename,":",numfrags,"fragments\n")
			
			#PRUNE READS/SCORES TO REGIONS AROUND FEATURES
			if(prunescores==TRUE){
				cat(scorename,": pruning scores to regions of interest\n")
				scores[j]<-bed.intersect(scores[j],features[i],input="file",output="file")
				cat(scorename,": counting pruned scores\n")
				prunedscorecount<-filelines(scores[j])
				cat(scorename,":",prunedscorecount,"scores after pruning\n")
			}
			
			if(scoremat == TRUE){
				#get coverage/average
				if(scoretype=="bed"){
					cat(scorename,": finding coverage of",scorename,"on",featname,"\n")
					curmat<-bed.coverage(scores[j],features[i],numwindows)
				}
				if(scoretype=="bedgraph"){
					cat(scorename,": mapping scores in",scorename,"to",featname,"\n")
					curmat<-bg.averages(scores[j],features[i],numwindows,bgfiller)
				}
				
				#add row names to matrix
				row.names(curmat)<-curbed[,4]
				
				#flip rows of negative-stranded features
				if(strand==TRUE){
					curmat[negrows,1:numwindows]<-curmat[negrows,numwindows:1]
				}
				
				#normalize by total fragments
				if(rpm==TRUE & scoretype=="bed"){
					cat(scorename,": normalizing data\n")
					scalar<-1000000/numfrags
					curmat<-curmat*scalar
				}
				
				#mask uncovered regions with NA and remove poorly-covered regions
				if(is.null(maskbed) == FALSE){
					cat(scorename,": masking data\n")
					curmat[maskmat==0]<-NA
				}
				
				#save matrix
				outfilename<-paste(scorename,"_",featname,suffix,".mat",windowsize,sep="")
				write.mat(curmat,file=paste(dname,"/",outfilename,sep=""))
				cat(scorename,": matrix saved to",paste(dname,"/",outfilename,sep=""),"\n")
				return(paste(dname,"/",outfilename,sep=""))
			}
			return(outs)
			if(j %in% fragmats & scoretype == "bed"){
				cat("boing\n")
				cfbg<-cfbg.make(scores[j])
				cat(scorename,": calculating fragment coverage around features for fragmat\n")
				
				fmat<-readLines(pipe(paste("bedtools map -c 4 -o collapse -null \"NA\" -a ",features[i]," -b ",cfbg," | sort -V -T . -k4,4 -k2,2n | cut -f 5",sep="")))
				fmat<-t(matrix(fmat,nrow=numwindows))
				
				if(strand==TRUE){
					cat("adjusting for strand\n")
					fmat[negrows,1:numwindows]<-fmat[negrows,numwindows:1]
				}
				print(dim(fmat))
				#save fmat
				row.names(fmat)<-curbed[,4]
				#headername<-paste(removeext(fmatname),".header",sep="")
				#tmpname<-paste(scorename,"_",featname,suffix,".tmp",sep="")
				#fmatname<-paste(scorename,"_",featname,suffix,".fmat",windowsize,sep="")
				fmatname<-paste(scorename,"_",featname,suffix,".fmat",windowsize,sep="")
				cat("saving matrix to",fmatname,"\n")
				write.mat(fmat,file=paste(dname,"/",fmatname,sep=""))
				#cat("adding header to",fmatname,"\n")
				#system(paste("echo '#",numfrags,"' > ",headername," && cat ",headername," ",tmpname," > ",fmatname,sep=""))
			}
		#})
		},mc.cores=cores, mc.preschedule=FALSE)
	}
	
}
mat.diff			<- function( matlist1,matlist2,operation="subtract",pattern="",replacement=""){
	if(pattern=="" | replacement==""){stop("YOU MUST SPECIFY 'pattern' AND 'replacement' FOR OUTPUT FILE NAMES")}
	matlist1names<-basename(matlist1)
	matlist2names<-basename(matlist2)
	outmatnames<-gsub(pattern,replacement,matlist1names)
	nametab<-data.frame("MATRIX1"=matlist1names,"MATRIX2"=matlist2names,"OUTPUT"=outmatnames,stringsAsFactors=FALSE)
	print(nametab)
	if(length(which(matlist1names %in% outmatnames)==TRUE)>0){stop("AT LEAST ONE OUTPUT FILE WILL REPLACE AN INPUT FILE")}
	for(i in 1:length(matlist1)){
		cat("processing ",outmatnames[i],"\n",sep="")
		mat1<-read.mat(matlist1[i])
		mat2<-read.mat(matlist2[i])
		if(operation=="subtract"){
			mat3<-mat1-mat2
		}
		if(operation=="log2"){
			mat3<-log2(mat1/mat2)
			mat3[is.infinite(mat3)]<-NA
		}
		write.mat(mat3,file=outmatnames[i])
	}
}
mat.cors			<- function( mats, numgroups=3,hcluster=FALSE,plotcolors=c("red","green","black"),sorton=2,legendnames=basename(removeext(mats))){
	library(parallel)
	library(gplots)
	colramp <- colorRampPalette(plotcolors, space = "rgb")
	brks=seq(-1,1,by=2/100)
	hcolors<-colramp(100)
	matnames<-basename(removeext(mats))
	nummats<-length(mats)
	
	#make list of matrices
	cat("reading in matrices\n")
	matlist<-mclapply(1:nummats,function(x){  read.mat(mats[x]) },mc.cores=detectCores() )
	numcols<-ncol(matlist[[1]])
	#remove rows with no variance or no scores
	cat("finding genes with no variance\n")
	badvarrows<-unique(unlist(lapply(1:nummats, function(x){ which(apply(matlist[[x]],1,sd, na.rm=TRUE)==0) } )))
	
	cat("finding genes with no scores\n")
	badrows<-unique(unlist(lapply(1:nummats, function(x){ which(rowSums(is.na(matlist[[x]]))==numcols) })))
	
	badrows<-c(badvarrows,badrows)
	
	if(length(badrows)>0){
	    cat("removing bad rows\n")
	    matlist<-lapply(1:nummats, function(x){ matlist[[x]][-badrows,] })
	}
	
	numcols<-ncol(matlist[[1]])
	numgenes<-nrow(matlist[[1]])
	genenames<-row.names(matlist[[1]])
	
	#make pairwise matrix correlations
	cat("calculating global correlations\n")
	pairs<-expand.grid(1:nummats,1:nummats)
	matcors<-as.numeric(unlist(lapply(1:nrow(pairs),function(x) cor(as.vector(matlist[[pairs[x,1]]]) , as.vector(matlist[[pairs[x,2]]]) , use="complete.obs" ))))
	cormat<-matrix(nrow=nummats,ncol=nummats)
	row.names(cormat)<-legendnames
	colnames(cormat)<-legendnames
	for(i in 1:nrow(pairs)){
		cormat[pairs[i,1],pairs[i,2]]<-matcors[i]
	}
	globaltablename<-paste(matnames[1],"_globalcors.tsv",sep="")
	cat("saving global correlation table to",globaltablename,"\n")
	write.table(cormat,file=globaltablename,sep="\t",row.names=TRUE,col.names=TRUE,quote=FALSE)
	
	#plot pairwise global correlations as a heatmap
	globalhmname<-paste(matnames[1],"_matcors.pdf",sep="")
	cat("saving matrix cross-correlation heatmap to",globalhmname,"\n")
	pdf(file=globalhmname)
	heatmap.2(cormat,Rowv=NA,Colv=NA,dendrogram="none",trace="none",col=hcolors,breaks=brks,main="Global cross-correlations")
	
	#calculate pairwise gene correlations
	cat("calculating pairwise gene correlations\n")
	genecormat<-as.data.frame(
		lapply(1:nummats,function(y)
			unlist(
				lapply(
					1:numgenes, function(x){
						cor( matlist[[y]][x,],matlist[[1]][x,], use="complete.obs" )
					}
				)
			)
		)
		
	)
	row.names(genecormat)<-genenames
	colnames(genecormat)<-legendnames
	genecormat<-data.matrix(genecormat)
	
	genemeta<-data.frame(
	     "symbol"=unlist(lapply(strsplit(row.names(genecormat),"_"), "[", 3 ) ),
	     "id"=unlist(lapply(strsplit(row.names(genecormat),"_"), "[", 1 ) ),
	     "chrom"=unlist(lapply(strsplit(row.names(genecormat),"_"), "[", 2 ) ),
	     stringsAsFactors=FALSE
	)
	
	goodrows<-complete.cases(genecormat)
	genecormat<-genecormat[goodrows,]
	genemeta<-genemeta[goodrows,]
	
	#kmeans cluster and plot gene correlations
	cat("kmeans clustering data and plotting heatmap\n")
	k<-kmeans(genecormat,numgroups)
	groupcolors<-unlist(lapply(k$cluster,function(x) rainbow(numgroups)[x]))[order(k$cluster)]
	heatmap.2(genecormat[order(k$cluster),],trace="none",Colv=NA,Rowv=NA,dendrogram="none",RowSideColors=groupcolors,col=hcolors,breaks=brks,labRow=NA,margins=c(10,10),cexCol=1,main=paste("clustered gene-by-gene correlations with",legendnames[1]))
	cat("plotting sorted heatmap\n")
	for(i in 2:nummats){
		heatmap.2(genecormat[order(genecormat[,i]),],trace="none",Colv=NA,Rowv=NA,dendrogram="none",col=hcolors,breaks=brks,labRow=NA,main=paste("gene-by-gene correlations with ",legendnames[1],"sorted on",legendnames[sorton]))
	}
	if(hcluster==TRUE){
		cat("heirarchical clustering data and plotting heatmap\n")
		heatmap.2(genecormat,Colv="none",dendrogram="row",trace="none",col=hcolors,breaks=brks,labRow=NA,main=paste("hclustered gene-by-gene correlations with ",legendnames[1]))
	}
	
	#save file
	rbc<-rainbow(nummats-1)
	plot(density(genecormat[,2],from=-1,to=1),col=rbc[1],xlim=c(-1,1),ylim=c(0,10),xlab=paste("gene-by-gene correlation with",legendnames[1]))
	if(nummats>2){
		for(i in 3:(nummats)){
			lines(density(genecormat[,i],from=-1,to=1),col=rbc[i-1])
		}
	}
	legend("topleft",legend=legendnames[2:nummats],col=rbc,lwd=3)
	
	par(mar=c(7,5,1,1))
	boxplot(genecormat[,2:nummats],ylab=paste("gene-by-gene correlation with",legendnames[1]),las=2)
	
	dev.off()
	genecormat<-cbind(genecormat,genemeta)
	write.table(genecormat,file=paste(matnames[1],"_gene_correlations.tsv",sep=""),sep="\t",row.names=TRUE,col.names=TRUE,quote=FALSE)
}
mat.strandtosense	<- function( topstrandmats,botstrandmats,bed){
	curbed<-read.tsv(bed)
	negrows<-which(curbed[,6]=="-")
	for(i in 1:length(topstrandmats)){
		cat("processing",topstrandmats[i],"\n")
		curplus<-read.mat(topstrandmats[i])
		curminus<-read.mat(botstrandmats[i])
		newplus<-curplus
		newminus<-curminus
		newplus[negrows,]<-curminus[negrows,]
		newminus[negrows,]<-curplus[negrows,]
		write.mat(newplus,file=gsub("\\.mat","_sense.mat",topstrandmats[i]))
		write.mat(newminus,file=gsub("\\.mat","_antisense.mat",botstrandmats[i]))
	}
}
mat.plotrows		<- function( mats,genes,geneid="symbols",sep="_",prunerows=FALSE,lspan=0,normalize=F,legendnames=basename(removeext(mats)),ylims=c(0,"auto"),plotcolors=rainbow(length(mats)),filename="orphan",start="beginning",stop="end",center="TSS",allforward=TRUE,linewidth=3,blockheight=20,grid=TRUE,X=F){
	library(tools)
	library(parallel)
	nummats<-length(mats)
	
	#read in matrices and get info
	cat("loading matrices...\n")
	matlist<-mclapply(mats,read.mat,mc.cores=detectCores())
	matcols<-unlist(lapply(matlist,ncol))
	numgenes<-length(genes)
	if(numgenes>16 & X==T){X=F;cat("more than 16 genes, forcing X=FALSE\n")}
	matnames<-basename(removeext(mats))
	genenames<-unlist(lapply(strsplit(row.names(matlist[[1]]),sep),"[",3))
	windowsizes=as.numeric( gsub("mat","",file_ext(mats) ) )
	xs<-lapply(1:length(mats),function(x) ((1:matcols[x])-(matcols[x]/2))*windowsizes[x])
	outname<-uniquefilename("mat_plotrows.pdf")
	
	if(prunerows==TRUE){
		cat("finding empty rows\n")
		badrows<-unique(unlist(lapply(1:nummats, function(x){ which(rowSums(is.na(matlist[[x]]))==matcols[x]) })))
		if(length(badrows)>0){
			cat("removing empty rows\n")
			matlist<-lapply(1:nummats,function(x){
				matlist[[x]][-badrows,]
			})
		}
	}
	
	
	#normalize data
	if(normalize==TRUE){
		cat("normalizing data\n")
		normat<-function(curmat){t(simplify2array(lapply(1:nrow(curmat),function(x) curmat[x,]/mean(curmat[x,],na.rm=TRUE))))}
		matlist<-lapply(matlist,normat)
	}
	
	#find ylims
	cm=1
	if(ylims[1]=="auto" | ylims[2]=="auto"){
		cat("gathering data to calculate ylims\n") 
		cm<-do.call(cbind,matlist)
	}
	
	if(ylims[1]=="auto"){
		cat("calculating ymins\n")
		ymins<-apply(cm,1,min,na.rm=TRUE)
		#print(ymins)
	}
	else{
		ymins<-as.numeric(rep(ylims[1],length(genenames)))
	}
	if(ylims[2]=="auto"){
		cat("calculating ymaxs\n")
		ymaxs<-1.1*apply(cm,1,max,na.rm=TRUE)
		#ok, shprint(ymaxs)
	}
	else{
		ymaxs<-as.numeric(rep(ylims[2],length(genenames)))
	}
	rm(cm)
	
	#identify genes to plot
	cat("Finding genes\n")
	if(geneid=="symbols"){
		matrows<-unlist(lapply(1:numgenes,function(x) which(genenames==genes[x])[1]))
	}
	if(geneid=="ucsc"){
		matrows<-unlist(lapply(1:numgenes,function(x) which(genenames==genes[x])[1]))
	}
	if(geneid=="rows"){
		matrows<-genes
	}
	
	if(X==F){
		pdf(file=outname)
	}
	else{
		parnum<-ceiling(sqrt(numgenes))
		par(mfrow=c(parnum,parnum))
	}
	
	#plot averages
	plot(
			0,
			type="n",
			xlim=c(min(xs[[1]]),max(xs[[1]])),
			ylim=c(mean(ymins),mean(ymaxs)),
			xlab="Distance from TSS (bp)",
			ylab="Average FPM",
			main=paste("Average score for",numgenes,"genes"),
			cex.lab=1.5,
			cex.axis=1.5,
			cex.main=1.5,
			cex.sub=1.5
	)
		
	for(m in 1:length(mats)){
			x<-1:matcols[m]
			y<-colMeans(matlist[[m]][matrows,],na.rm=TRUE)
			lines(
				xs[[m]],
				y,
				col=plotcolors[m],
				lwd=3
			)
	}
		
	legend("topright",legend=legendnames,col=plotcolors,lwd=3)
	
	
	
	for(i in 1:numgenes){
		plot(
			0,
			type="n",
			xlim=c(min(xs[[1]]),max(xs[[1]])),
			ylim=c(ymins[matrows[i]],ymaxs[matrows[i]]),
			xlab="Distance from TSS (bp)",
			ylab="Normalized FPM",
			main=genenames[matrows[i]],
			cex.lab=1.5,
			cex.axis=1.5,
			cex.main=1.5,
			cex.sub=1.5
		)
		
		for(m in 1:length(mats)){
			if(lspan!=0){
				x<-1:matcols[m]
				y<-matlist[[m]][matrows[i],]
				y[-which(is.na(y))]<-loess(y~x,span=lspan)$fitted
			}
			else{
				y<-matlist[[m]][matrows[i],]
			}
			lines(
				xs[[m]],
				y,
				col=plotcolors[m],
				lwd=3
			)
		}
		
		legend("topright",legend=legendnames,col=plotcolors,lwd=3)
	}
	cat("pdf saved to",outname,"\n")
	
	if(X==F){dev.off()}
}
mat.colcors		<- function( matfile1,matfile2,center="TSS",continue=FALSE,color="black"){
	
	mat1<-data.matrix(read.table(paste(matfile1),stringsAsFactors=FALSE,row.names=NULL))
	mat2<-data.matrix(read.table(paste(matfile2),stringsAsFactors=FALSE,row.names=NULL))
	correlations<-as.numeric(vector(length=ncol(mat1)))
	for(i in 1:ncol(mat1)){
		correlations[i]<-cor(mat1[,i],mat2[,i],use="p")
	}

	if(continue==FALSE){plot(((1:ncol(mat1))-(ncol(mat1)/2)),correlations,xlab=paste("Distance from",center),ylab="Correlation",type="l",col=color,main=paste("Correlation between",removeext(matfile1),"and",removeext(matfile2),"along",center),ylim=c(0,1))}
	else{lines(((1:ncol(mat1))-(ncol(mat1)/2)),correlations,xlab=paste("Distance from",center),ylab="Correlation",col=color,main=paste("Correlation between",removeext(matfile1),"and",removeext(matfile2),"along",center),ylim=c(0,1))}
}
mat.plotaverages		<- function( matrixlist,centername="TSS",plottype="l",ylims=c("auto","auto"),X=TRUE,prefix="unnamed",plotcolors=rainbow(length(matrixlist)),legendnames=basename(removeext(matrixlist))){
	library(tools)
	library(parallel)
	cores=detectCores()-1
	nummats<-length(matrixlist)
	matlist<-mclapply(matrixlist,read.mat,mc.cores=cores)
	mataverages<-mclapply(matlist,colMeans,na.rm=TRUE,mc.cores=cores)
	ymax<-max(unlist(mataverages),na.rm=TRUE)
	ymin<-min(unlist(mataverages),na.rm=TRUE)
	if(ylims[1]=="auto"){ylims[1]=ymin}
	if(ylims[2]=="auto"){ylims[2]=ymax}
	ylims<-as.numeric(ylims)
	windowsizes<-as.numeric(gsub("mat","",file_ext(matrixlist)))
	matcols<-unlist(lapply(matlist,ncol))
	xs<-lapply(1:nummats,function(x) ((1:matcols[x])-(matcols[x]/2))*windowsizes[x])
	if(X==FALSE){pdf(file=paste(prefix,"_averages.pdf",sep=""))}
	plot(
        	0,
		type="n",
    		xlim=c(min(xs[[1]]),max(xs[[1]])),
		ylim=ylims,
    		xlab=paste("Distance from",centername,"(bp)"),
    		ylab="Average score",
    		cex=0.5
		#cex.lab=1.5,
    		#cex.axis=1.5,
    		#cex.main=1.5,
    		#cex.sub=1.5
    	)
	for(k in 1:nummats){
		lines(
		      xs[[k]],
		      mataverages[[k]],
		      col=plotcolors[k],
		      lwd=1,
		      type=plottype
		)
    	}
	legend("topleft",legend=legendnames, col=plotcolors, lwd=3)
	if(X==FALSE){dev.off()}
}
mat.plotaverages2	<- function( matfilelist,suffixes,legendnames,plotcolors=rainbow(length(matfilelist)), windowsize=10, matcols=100, xname="Distance from Feature", yname="Average Score"){
	numlists<-length(matfilelist)
	if(length(suffixes==1)){suffixes<-rep(suffixes,numlists)}
	prefixes<-lapply(1:numlists, function(x){
		remove.suffix(basename(matfilelist[[x]]),suffixes[x])
	})
	feats<-unique(unlist(prefixes))
	matches<-lapply(1:numlists,function(x) match(feats,prefixes[[x]]) )
	pdfname<-paste("mataverages_",length(feats),"centers_",numlists,"scores.pdf",sep="")
	pdf(file=pdfname)
	xaxis<-windowsize*(1:matcols) - (windowsize*matcols)/2
	for(i in 1:length(feats)){
		cat("plotting feature #",i,"/",length(feats),":",feats[i],"\n")
		refs<-unlist(lapply(matches,"[",i))
		nomats<-which(is.na(refs))
		goodmats<-which(1:numlists %ni% nomats)
		curmats<-lapply(goodmats,function(x) read.mat(matfilelist[[x]][matches[[x]][i]]))
		counts<-unlist(lapply(curmats,nrow))
		colmeans<-lapply(curmats,colMeans,na.rm=TRUE)
		all<-unlist(colmeans)
		ylims<-c(min(all),max(all))
		plot(0,type="n",xlim=c(min(xaxis),max(xaxis)),ylim=ylims,main=feats[i],xlab=xname,ylab=yname)
		n<-rep(0,numlists)
		n[goodmats]<-counts
		for(j in 1:numlists){
			if(j %in% goodmats == TRUE){ 
				lines(xaxis,colmeans[[which(goodmats==j)]],col=plotcolors[j],lwd=3)
			}
			
		}
		legend("topleft",legend=paste(legendnames,n),col=plotcolors,lwd=3)
	}
	dev.off()
	cat("pdf saved to",pdfname,"\n")
}
mat.plotsamplerows	<- function( matname,samples=5,ylims=c(0,30)){
	mat<-read.mat(matname)
	rows<-sample(1:nrow(mat),samples)
	plotcolors=rainbow(samples)
	plot(1:ncol(mat),mat[rows[1],],col=plotcolors[1],type="l",ylim=ylims)
	for(i in 2:samples){
		lines(1:ncol(mat),mat[rows[i],],col=plotcolors[i])
	}
	legend("topright",legend=row.names(mat)[rows],col=plotcolors,lwd=2)
}
mat.loess		<- function( mats, lspan=0.05 ){
	library(parallel)
	matnames<-basename(removeext(mats))
	print(matnames)
	for(i in 1:length(mats)){
		cat("reading mat\n")
		m<-read.mat(mats[i])
		y<-1:ncol(m)
		ml<-data.matrix(t(as.data.frame(mclapply(1:nrow(m),function(x){  m[x,-which(is.na(m[x,]))]<-loess(m[x,]~y,span=lspan)$fitted ; m[x,]   },mc.cores=detectCores()))))
		cat("smoothing\n")
		row.names(ml)<-row.names(m)
		cat("saving mat\n")
		write.mat(ml,file=paste(matnames[i],"_loess",gsub("\\.","-",as.character(lspan)),".mat10",sep=""))
	}
}
mat.transform		<- function( mats, from="log2", to="ratio"){}
mat.qnorm		<- function( mats, normalizeto=1 ){
	library(tools)
	library(parallel)
	matnames<-basename(removeext(mats))
	exts<-file_ext(mats)
	cat("loading matrices\n")
	matlist<-mclapply(mats,read.mat,mc.cores=detectCores())
	nummats<-length(mats)
	cat("normalizing matrices\n")
	matlist<-mclapply(1:nummats,function(x) {matlist[[x]][order(matlist[[x]])]<-matlist[[normalizeto]][order(matlist[[normalizeto]])];matlist[[x]]} )
	cat("saving matrices\n")
	outnames<-paste(matnames,"_qnorm.",exts, sep="") 
	mclapply(1:nummats, function(x) write.mat(matlist[[x]],file=outnames[x]) ,mc.cores=detectCores() )
	return(outnames)
}
mat.hist			<- function( mats, xlims=c("auto","auto"), plotcolors=rainbow(length(mats)), legendnames=basename(removeext(mats)) ){
	library(parallel)
	nummats<-length(mats)
	
	cat("reading in matrices\n")
	matlist<-mclapply( mats, read.mat, mc.cores=detectCores() )
	
	cm=1
	if(xlims[1]=="auto" | xlims[2]=="auto"){
		cat("calculating xlims\n")
		cm<-do.call(cbind,matlist)
		q<-quantile(cm,probs=c(0.05,0.95),na.rm=T)
	}
	if(xlims[1]=="auto"){xlims[1]=q[1]}
	if(xlims[2]=="auto"){xlims[2]=q[2]}
	xlims<-as.numeric(xlims)
	print(xlims)
	
	cat("calculating score distributions\n")
	densitylist<-lapply( 1:nummats, function(x) density( matlist[[x]], from=xlims[1], to=xlims[2], na.rm=T))
	ymax<-max(unlist(lapply( 1:nummats, function(x) density( matlist[[x]], from=xlims[1], to=xlims[2],na.rm=T)$y  )),na.rm=TRUE)
	print(ymax)
	cat("plotting score distributions\n")
	plot(0,type="n",ylim=c(0,ymax), xlim=xlims, xlab="Fragment Size", ylab="Density")
	for(i in 1:nummats){
		lines(densitylist[[i]],col=plotcolors[i],lwd=2)
	}
	legend("topright",legend=legendnames, col=plotcolors, lwd=3)
}
mat.window		<- function( matfile, operation="mean", windowsize=5){
    library(tools)
    if(ceiling(windowsize/2)==floor(windowsize/2)){stop("windowsize must be an odd number")}
    cat("reading in matrix\n")
    mat<-read.mat(matfile)
    
    cat("gathering info\n")
    flank<-(windowsize-1)/2
    numcols<-ncol(mat)
    matrownames<-row.names(mat)
    
    print(numcols)
    
    cat("windowing matrix\n")
    mat<-simplify2array(lapply(1:numcols,function(x){
	if(x-flank<1){l=1}
	else{l=x-flank}
	if(x+flank>numcols){r=numcols}
	else{r=x+flank}
	rowMeans(mat[,l:r],na.rm=T)
    }))
    
    cat("wrapping up\n")
    row.names(mat)<-matrownames
    ext<-file_ext(matfile)
    outname<-paste(basename(removeext(matfile)),"_",operation,"win",windowsize,".",ext,sep="")
    write.mat(mat,file=outname)
    return(outname)
}
mat.getscores		<- function( matfile, refmatfile ){
	mat1<-read.mat(matfile)
	mat2<-read.mat(refmatfile)
	mat1genes<-unlist(lapply(strsplit(row.names(mat1),"_"),"[",1))
	mat1genes<-unlist(lapply(strsplit(row.names(mat1),"-"),"[",1))
	mat2genes<-unlist(lapply(strsplit(row.names(mat2),"_"),"[",1))
	refrows<-match(mat1genes,mat2genes)
	badrows<-which(is.na(refrows))
}
mat.heatmap3		<- function( mats=NULL , matnames=NULL , sorton=1 , sort.methods="none" , gene.list = NULL , sort.ranges = NA , sort.breaks = NA , numgroups=3 , crossmat=NULL , matcolors=NULL , groupcolors=NULL , heatmap.lims="5%,95%" , heatmap.colors="black,white" , fragmats=NULL , fragmatnames=NULL , fragrange=c(0,200) , vplot.colors="black,blue,yellow,red" , vplot.lims="0,95%" , forcescore=TRUE , cores="max" ){
	
	
	# TO DO
	# ##############
	# check sorting for expected sorting methods or existing file
	# check that mats is a character vector and all files exist
	# check that all mats have identical row names and nrow
	# check that all colors are R colors

	library(gtools)
	library(tools)
	library(parallel)
	pwd<-getwd()
	if(cores=="max"){cores=detectCores()-1}
	if(is.null(mats) & is.null(fragmats)){stop("nothing to plot")}
	

	valid.sort.methods=c("none","kmeans","chrom","max","min","mean","median","sd","maxloc","minloc","pregrouped")
	if(sum(sort.methods %in% valid.sort.methods) != length(sort.methods)){stop("unrecognized sort.methods. valid methods include none,kmeans,chrom,max,min,mean,median,sd,maxloc,minloc")}

	#CHECK SCOREMAT PARAMETERS AND DEFINE SCOREMAT SETTINGS
	if(is.null(mats) == FALSE){
		
		#gather info and check paramters
		nummats<-length(mats)
		winsizes=as.numeric(gsub("mat","",file_ext(mats)))
		if(sum(is.na(winsizes))>0){stop("cannot find window sizes for one or more matrices. window size (in bp) should be in the matrix file extension after 'mat' (for example, matrixname.mat10 for a window size of 10 bp")}
		# #### check mat names for uniqueness and length

		# set the names of matrices using file names if they aren't explicitly defined
		if(is.null(matnames)){ matnames<-basename(removeext(mats)) }
		
		#set unspecified settings to defaults
		if(length(heatmap.lims)==1){heatmap.lims<-rep(heatmap.lims,nummats)}
		

		if(length(heatmap.colors)==1){heatmap.colors<-rep(heatmap.colors,nummats)}
		scolramps<-lapply(heatmap.colors, function(x) colorRampPalette(unlist(strsplit(x,","))))
		
	}	
	
	#CHECK FRAGMAT PARAMETERS AND DEFINE FRAGMAT SETTINGS
	if(is.null(fragmats) == FALSE){
		numfragmats<-length(fragmats)
		fragwinsizes=as.numeric(gsub("fmat","",file_ext(fragmats)))
		if(sum(is.na(fragwinsizes))>0){stop("cannot find window sizes for one or more matrices")}
		if(is.null(fragmatnames)){ fragmatnames<-basename(removeext(fragmats)) }
		if(length(vplot.lims)==1){vplot.lims<-rep(vplot.lims,numfragmats)}
		

		if(length(vplot.colors)==1){vplot.colors<-rep(vplot.colors,numfragmats)}
		vcolramps<-lapply(vplot.colors, function(x) colorRampPalette(unlist(strsplit(x,","))) )
		
		vplotnames1<-vector(mode="character")
		vplotnames2<-vplotnames1
		

	}
	
	
	
	
	#load sort table if defined, otherwise sort based on mats defined in sorton
	if(mode(sort.methods)=="file"){
		# check if file exists and if sort.methods is length 1
		cat("loading sort table\n")
		sorttable<-read.table(sorting,quote=F,row.names=1,header=T,sep="\t",stringsAsFactors=F)
		sortsettings<-data.frame("V1"=colnames(sorttable)[1:(ncol(sorttable)-2)],"V2"="presort","V3"="presort")
		numgroups<-unlist(lapply(sorttable,length)) #THIS PROBABLY WRONG
		numsorts<-nrow(sortsettings)
		
		#make directory for saving files
		if(file.exists("heatmaps")==FALSE){system(command="mkdir heatmaps")}
		dname<-paste("heatmaps/",basename(removeext(r)),sep="")
		#dname<-uniquefilename(dname)
		system(paste("mkdir",dname))
		write.table(sorttable,file=paste(dname,"/",basename(dname),".sort",sep=""),sep="\t",row.names=TRUE,col.names=TRUE,quote=FALSE)
	} else{
		
		# read matrix for sorting and get dimensions
		sortmat<-read.mat(mats[sorton])
		matcols<-ncol(sortmat)
		matrows<-nrow(sortmat)
		
		# tabulate sort criteria
		sortsettings<-data.frame(
			"METHOD"=sort.methods,
			"START"=as.numeric(unlist(lapply(strsplit(sort.ranges,","),"[",1))),
			"END"=as.numeric(unlist(lapply(strsplit(sort.ranges,","),"[",2))),
			stringsAsFactors=FALSE
		)

		# if only 1 sort method, repeat method so sorting script runs OK
		if(nrow(sortsettings)==1 & sortsettings[1,1] != "kmeans" & sortsettings[1,1] != "chrom"){
			sortsettings[2,1]<-sortsettings[1,1]
		}

		# if sort range not defined, specify to use entire matrix
		sortsettings[is.na(sortsettings[,2]),2]=as.numeric((-1*winsizes[sorton])*(matcols-matcols/2))
		sortsettings[is.na(sortsettings[,3]),3]=as.numeric(winsizes[sorton]*(matcols-matcols/2))
		
		# determine how many sort criteria are defined, and match length of vector defining how many groups to create for each sort criterion
		numsorts<-nrow(sortsettings)
		length(numgroups)<-numsorts
		numgroups[is.na(numgroups)]<-1
		
		#print sort settings
		cat("\n####################\n")
		cat("SORT CRITERIA\n")
		print(sortsettings)
		cat("####################\n\n")
		
		#convert bp to column #s in sort settings
		sortsettings[,2]<-sortsettings[,2]/winsizes[sorton] + matcols/2 +1
		sortsettings[,3]<-sortsettings[,3]/winsizes[sorton] + matcols/2
		
		#if(normalize==TRUE){
		#	cat("normalizing sort matrix\n")
		#	sortmat<-t(simplify2array(mclapply(1:matrows[r],function(x) sortmat[x,]/mean(sortmat[x,],na.rm=TRUE),mc.cores=cores)))*mean(sortmat,na.rm=TRUE)
		#	row.names(sortmat)<-matlist[[r]]
		#}


		#sorttable<-data.frame("V1"=rep(1,matrows),stringsAsFactors=F)
		sorttable<-data.frame(matrix(NA,nrow=matrows,ncol=numsorts),stringsAsFactors=F)
		grouptable<-sorttable

		for(s in 1:numsorts){

			#sortcol<-vector(mode="numeric",length=matrows)

			if(s==1){numprevgroups=1} else{ numprevgroups<-length(unique(grouptable[,s-1])) }
			
			for(p in 1:numprevgroups){
				
				if(s==1){grouprows=1:matrows} else{grouprows<-which(grouptable[,s-1]==p)}
				
				#make and trim sort matrix
				tmpsortmat<-sortmat
				tmpsortmat<-tmpsortmat[grouprows,sortsettings[s,2]:sortsettings[s,3]]
				submatrows<-nrow(tmpsortmat)

				#convert infinite values to NA
				tmpsortmat[is.infinite(tmpsortmat)]<-NA
				
				#perform calculation for sorting
				if(sortsettings[s,1] == "sd"){
					cat("calculating standard deviation\n")
					sortvals<-as.numeric(as.character(apply(tmpsortmat,1,sd, na.rm=TRUE)))
				}
				if(sortsettings[s,1] == "median"){
					cat("calculating medians\n")
					sortvals<-as.numeric(as.character(apply(tmpsortmat,1,median, na.rm=TRUE)))
				}
				if(sortsettings[s,1] == "max"){
					cat("calculating maxs\n")
					sortvals<-as.numeric(as.character(apply(tmpsortmat,1,max, na.rm=TRUE)))
				}
				if(sortsettings[s,1] == "min"){
					cat("calculating mins\n")
					sortvals<-as.numeric(as.character(apply(tmpsortmat,1,min, na.rm=TRUE)))
				}
				if(sortsettings[s,1] == "mean"){
					cat("calculating means\n")
					sortvals<-as.numeric(as.character(apply(tmpsortmat,1,mean, na.rm=TRUE)))
				}
				if(sortsettings[s,1] == "maxloc"){
					cat("calculating max locations\n")
					sortvals<-as.numeric(as.character(apply(tmpsortmat,1,which.max)))
				}
				if(sortsettings[s,1] == "minloc"){
					cat("calculating min locations\n")
					sortvals<-as.numeric(as.character(apply(tmpsortmat,1,which.min)))
				}
				if(sortsettings[s,1] == "chrom"){
					cat("sorting by chromosome\n")
					chromlist<-unlist(lapply(strsplit(row.names(sortmat),";"),"[",2))
					chroms<-unique(chromlist)
					chroms<-mixedsort(chroms)
					chromnums<-1:length(chroms)
					numgroups[s]<-length(chroms)
					sortvals<-match(chromlist,chroms)
				}
				if(sortsettings[s,1] == "kmeans"){
					cat("kmeans-clustering data (k=",numgroups[s],")\n",sep="")
					tmpsortmat2<-tmpsortmat
					tmpsortmat2[is.na(tmpsortmat2)]<-0
					sortvals<-kmeans(tmpsortmat2,numgroups[s])$cluster
					rm(tmpsortmat2)
				}
				if(sortsettings[s,1] == "none"){
					cat("not sorting\n",sep="")
					sortvals<-1:nrow(tmpsortmat)
				}
				if(sortsettings[s,1] == "pregrouped"){
					cat("not sorting\n",sep="")
					sortvals<-tmpsortmat[,1]
					numgroups[s]<-length(unique(sortvals))
				}
				

				#assign sortvals to sorttable
				sorttable[grouprows,s] <- sortvals
				
				# assigned pregrouped data to group table
				if(sortsettings[s,1] == "kmeans" | sortsettings[s,1] == "chrom" | sortsettings[s,1] == "pregrouped"){
					grouptable[grouprows,s] <- sortvals
				}

				# if not already pregrouped, group sort values into specified # of groups
				if(sortsettings[s,1] != "kmeans" & sortsettings[s,1] != "chrom" | sortsettings[s,1] != "pregrouped"){
					
					# #### what if numgroups[s]==1 ???????

					# if breaks not defined for a given sorting criteria
					if(is.na(sort.breaks[s])){
						
						
						# find number of genes in an equally-split group
						rowspergroup<-ceiling(submatrows/numgroups[s])

						# define breaks based on number of genes per group
						brks<-c(seq(1,submatrows,rowspergroup),submatrows)

						#assign rows into equally-sized groups
						#grouptable[grouprows[order(sortvals)],s]<-cut((1:submatrows)[order(sortvals)],brks,labels=F,include.lowest=T)
						grouptable[grouprows,s]<-cut((1:submatrows)[order(order(sortvals))],brks,labels=F,include.lowest=T)

					} else{
						
						cat("grouping genes into",numgroups[s],"groups based on breaks\n")
					
						# #set numgroups to length(brks)+1
						numgroups[s]<-length(brks)+1
						
						brks<-unlist(strsplit(sort.breaks[s],","))
						
						# if percentiles defined for breaks, calculate break values
						if(grepl("%",brks)){
							
							# calculate quantile from %s in sort.breaks
							brks[grep("%",brks)]<-quantile(sortvals,probs=as.numeric(gsub("%","",brks[grep("%",brks)])))
						}
						brks<-c(-Inf,brks,Inf)
						
						# group genes by defined breaks
						grouptable[grouprows,s] <- cut(sortvals,brks,include.lowest=T,labels=F)
					}
				}
			}
		}

		# create table of colors
		if(is.null(groupcolors)){ groupcolors<-rainbow(max(numgroups)) }
		getcolor <- function(c){groupcolors[c]}
		color.table <- as.data.frame ( lapply ( grouptable , getcolor ) )

		# sort table
		heatmap.order <- order(do.call(order,as.data.frame(sorttable)))

		# name columns of tables
		colnames(grouptable)<-paste("GROUPING",1:numsorts,"-",sortsettings[,1],sortsettings[,3],sortsettings[,3],sep=";")
		colnames(sorttable)<-paste("SORTVALS",1:numsorts,"-",sortsettings[,1],sortsettings[,3],sortsettings[,3],sep=";")
		colnames(color.table)<-paste("COLORS",1:numsorts,"-",sortsettings[,1],sortsettings[,3],sortsettings[,3],sep=";")
		
		#make a table to store order, group, and other info
		heatmap.table <- data.frame(cbind(
			row.names=row.names(sortmat),
			heatmap.order,
			sorttable,
			grouptable,
			color.table
		))
		
		# ###### !! NEED FIXING AFTER MAT.MAKE IS CHANGED ####################
		#sorttable$symbol<-unlist(lapply(strsplit(row.names(sorttable),";"), "[", 3 ) )
		#sorttable$id<-unlist(lapply(strsplit(row.names(sorttable),";"), "[", 1 ) )
		#sorttable$chromosome<-unlist(lapply(strsplit(row.names(sorttable),";"), "[", 2 ) )		
		
		
		#make directory for saving files
		dir.create("heatmaps", showWarnings = FALSE)
		dname<-paste("heatmaps/",basename(matnames[sorton]),"_",numgroups[1],"g_",sortsettings[1,1],"_",sortsettings[1,2],"-",sortsettings[1,3],sep="",collapse="")
		#dname<-uniquefilename(dname)
		system(paste("mkdir",dname))
		write.table(heatmap.table,file=paste(dname,"/",basename(matnames[sorton]),".sort",sep=""),sep="\t",row.names=TRUE,quote=FALSE,col.names=TRUE)
	}
	
	
	if(is.null(mats)==FALSE){

		# ##### get order of preloaded sort table
		
		# get number of rows of sort table if preloaded
		matrows<-nrow(sorttable)

		# order the table of group info
		heatmap.order<-heatmap.table$heatmap.order
		table.starts<-1:3*(ncol(heatmap.table)-1)/3
		grouptable<-as.matrix(heatmap.table[,table.starts[2]:(table.starts[2]+1)])
		grouptable<-grouptable[order(heatmap.order),]
		color.table<-as.matrix(heatmap.table[,table.starts[3]:(table.starts[3]+1)])
		color.table<-color.table[order(heatmap.order),]
		#grouptable<-as.matrix(heatmap.table[,table.starts[2]:(table.starts[2]+1)])[order(heatmap.order,na.last=F),]
		#color.table<-as.matrix(heatmap.table[,table.starts[3]:(table.starts[3]+1)])[order(heatmap.order,na.last=F),]
		#heatmap.table<-heatmap.table[order(heatmap.order,na.last=F),]
		
		# identify number of rows in each group for each sorting
		clusternums<-lapply(1:numsorts,function(h){ unlist(lapply(1:numgroups[h],function(x) length(which(grouptable[,h]==x)))) } )
		grouprownumbers<-lapply(1:numgroups[1], function(x) which(grouptable[,1]==x) )
		
		# set legend names and colors
		if(numgroups[1]>1){
			legendnames<-paste(c(1:numgroups[1],"all"),", n= ",c(clusternums,matrows),sep="")
			
			#FIX THIS ######################################
			if(sortsettings[1,1]=="chrom"){
				legendnames<-paste(c(chroms,"all"),", n= ",c(clusternums,matrows),sep="")
			}
			
			legendcolors<-c(groupcolors,"gray50")

		} else{
			legendnames<-paste("all, n =",matrows)
			legendcolors<-"black"
		}
	}

	#process each frag matrix
	if(is.null(fragmats) == FALSE){
		for(v in 1:numfragmats){
			

			fragmat<-read.fmat(fragmats)
			
			fmatcols<-ncol(fragmat)

			x<-((1:fmatcols)-(fmatcols/2))*fragwinsizes[v]

			fragsizes<-lapply(1:fmatcols,function(x) as.numeric(na.omit(as.numeric(unlist(strsplit(paste(fragmat[,x],collapse=","),","))))))
			
			fragsizes<-lapply(1:fmatcols,function(x) fragsizes[[x]][which(fragsizes[[x]] >= fragrange[1] & fragsizes[[x]] <= fragrange[2])])

			vmat<-matrix(0,ncol=fmatcols,nrow=fragrange[2]-fragrange[1])
			
			for(h in 1:fmatcols){
				vmat[,h]<-hist(fragsizes[[h]],breaks=fragrange[2]:fragrange[1],plot=F)$counts
			}
				
			#set vmat score limits
			vbot<-strsplit(vplot.lims[v],",")[[1]][1]
			vtop<-strsplit(vplot.lims[v],",")[[1]][2]
			if(grepl("%",vbot)){ vbot<-quantile(vmat , probs=as.numeric(gsub("%","",vbot)) /100,na.rm=T) }
			if(grepl("%",vtop)){ vtop<-quantile(vmat , probs=as.numeric(gsub("%","",vtop)) /100,na.rm=T) }
			vbot<-as.numeric(vbot)
			vtop<-as.numeric(vtop)
			if(vtop==vbot){vtop=vtop+1}
			
			# if(rpm==TRUE){
			# 	vscalar<-1000000/as.numeric(gsub("#","",readLines(pipe(paste("head -n 1",fragmats[v])))))
			# 	if(is.na(vscalar)==FALSE){vmat<-vmat*vscalar
			# 	} else{cat("WARNING: FRAGMENT COUNT NOT FOUND IN FRAGMAT, NOT NORMALIZING\n")}
			# }

			#vmat<-vmat[nrow(vmat):1,]
			
			cat("drawing vplot\n")
			
			brks=seq(vbot,vtop,by=(vtop-vbot)/100)

			

			# draw heatmap of all
			vplotname<-paste(pwd,"/",dname,"/","vplot_all_",fragmatnames[v],".png",sep="")
			png(width=1000,height=1000,file=vplotname)
			

			
			layout(matrix(c(4,2,1,3),nrow=2),heights=c(1,4),widths=c(4,1))
			#layout(matrix(1:2,nrow=2),heights=c(1,4))
			par(oma=c(2,2,2,2))
			par(mar=c(3,3,0,0))
			image(matrix(brks),col=vcolramps[[v]](100),breaks=brks,axes=F)
			axis(side=1,at=c(0,1),labels=c(brks[1],brks[length(brks)]))
			mtext("bin counts",side=1,line=2)
			
			par(mar=c(3,3,0,0))
			image(t(vmat),breaks=brks,col=vcolramps[[v]](100),xaxt='n',yaxt='n')
			axis(side=1,at=c(0,0.5,1),labels=c(min(x),0,max(x)))
			axis(side=2,at=(0:4)/4,labels=fragrange[1]+0:4*((fragrange[2]-fragrange[1])/4))
			mtext("Distance from feature",side=1,line=2)
			mtext("Fragment size (bp)",side=2,line=2)
			
			dx<-apply(vmat,2,mean,na.rm=T)
			dy<-apply(vmat,1,mean,na.rm=T)
			par(mar=c(3,0,0,0))
			plot(dy,1:(fragrange[2]-fragrange[1]),type="l",yaxs='i',axes=F,lwd=4)
			par(mar=c(0,3,1,0))
			plot(1:fmatcols,dx,type="l",xaxs='i',axes=F,lwd=4,main=fragmatnames[v])
			

			dev.off()
			
			if(is.null(mats)==FALSE){

				fragmat<-fragmat[order(heatmap.order),]

				for(q in 1:numgroups[1]){
					

					vplotname<-paste(pwd,"/",dname,"/","vplot_group",q,"_",fmatname,".png",sep="")
					vplotname<-paste(pwd,"/",dname,"/","vplot_group",q,"_",fragmatnames[v],"_pergene.png",sep="")
					

					groupmat<-fragmat[grouprownumbers[[q]],]
					
					fragsizes<-lapply(1:fmatcols,function(x) as.numeric(na.omit(as.numeric(unlist(strsplit(paste(groupmat[,x],collapse=","),","))))))
			
					fragsizes<-lapply(1:fmatcols,function(x) fragsizes[[x]][which(fragsizes[[x]] >= fragrange[1] & fragsizes[[x]] <= fragrange[2])])

					vmat<-matrix(0,ncol=fmatcols[v],nrow=fragrange[2]-fragrange[1])
					
					for(h in 1:fmatcols[v]){
						vmat[,h]<-hist(fragsizes[[h]],breaks=fragrange[2]:fragrange[1],plot=F)$counts
					}

					# if(rpm==TRUE){
					# 	vscalar<-1000000/as.numeric(gsub("#","",readLines(pipe(paste("head -n 1",fragmats[v])))))
					# 	if(is.na(vscalar)==FALSE){vmat<-vmat*vscalar
					# 	} else{cat("WARNING: FRAGMENT COUNT NOT FOUND IN FRAGMAT, NOT NORMALIZING\n")}
					# }

					vmat<-vmat[nrow(vmat):1,]

					pergenescale<-nrow(fragmat)/nrow(groupmat)
					
					brks=seq(vbot,vtop,by=(vtop-vbot)/100)
					
					print(brks)
					print(length(brks))

					png(width=1000,height=1000,file=vplotname)
					heatmap.2(vmat,Colv=NA,Rowv=NA,dendrogram="none",trace="none",breaks=brks,labRow=NA,labCol=NA,col=vcolramps[[v]](100),main=fragmatnames[v],cex.main=5)
					dev.off()
					png(width=1000,height=1000,file=vplotname)
					heatmap.2(vmat*pergenescale,Colv=NA,Rowv=NA,dendrogram="none",trace="none",breaks=brks,labRow=NA,labCol=NA,col=vcolramps[[v]](100),main=fragmatnames[v],cex.main=5)
					dev.off()
				}

			}

		}
		montagecols<-numgroups[1]+1
		print(vplotnames1)
		cat("\n\n\n")
		print(vplotnames2)
		#cat(paste("montage -tile +",montagecols,"+",numfmats," ",paste(vplotnames1,collapse=" ")," ",pwd,"/",dname,"/vplot_montage_abs.png",sep=""))
		cat("\n\n\n\n")
		#cat(paste("montage -tile +",montagecols,"+",numfmats," ",paste(vplotnames2,collapse=" ")," ",pwd,"/",dname,"/vplot_montage_per.png",sep=""))
		cat("\n\n\n\n")
		
		
		system(paste("montage -geometry +2+2 -tile ",montagecols,"x",numfmats," ",paste(vplotnames1,collapse=" ", sep=" ")," ",pwd,"/",dname,"/vplot_montage_abs.png",sep=""))
		system(paste("montage -geometry +2+2 -tile ",montagecols,"x",numfmats," ",paste(vplotnames2,collapse=" ", sep=" ")," ",pwd,"/",dname,"/vplot_montage_per.png",sep=""))
	}
	
	
	
	
	# ##############################################################################
	# TEMPORARY IMPLEMENTATION OF CROSS-MATRIX GROUP AVERAGES
	#grouplist=lapply(1:nummats, function(x){lapply(1:numgroups[1],function(x){})})
	#q=1
	# ##############################################################################
	
	
	
	
	#process each matrix
	hmnames<-paste(pwd,"/",dname,"/","heatmap_",basename(matnames),".png",sep="")
	
	#for(l in 1:nummats){
	mclapply(1:nummats,function(l){
		
		# load matrix and get dimensions
		cat("reading in matrices\n")
		curmat<-read.mat(mats[l])
		matcols<-ncol(curmat)
		matrows<-nrow(curmat)
		
		cat("1\n")
		
		#sort matrix
		curmat<-curmat[order(heatmap.order),]

		# define heatmap color gradient function
		cat("2\n")
		
		# define x axis based on winsize and ncol assuming non-meta
		x<-((1:matcols)-(matcols/2))*winsizes[l]
		

		# set score limits for heatmap colors
		heatmap.mins<-unlist(lapply(strsplit(heatmap.lims,","),"[",1))
		heatmap.maxs<-unlist(lapply(strsplit(heatmap.lims,","),"[",2))
		if(grepl("%",heatmap.mins[l])){ heatmap.mins[l]<-quantile(curmat,probs=as.numeric(gsub("%","",heatmap.mins[l])) /100,na.rm=T) }
		if(grepl("%",heatmap.maxs[l])){ heatmap.maxs[l]<-quantile(curmat,probs=as.numeric(gsub("%","",heatmap.maxs[l])) /100,na.rm=T) }
		curmin<-as.numeric(heatmap.mins[l])
		curmax<-as.numeric(heatmap.maxs[l])
		
		if(curmin==curmax){curmax<-max(curmat,na.rm=TRUE)}
		if(curmin==curmax){curmax<-curmin+1}

		cat("3\n")
		

		curmat[is.infinite(curmat)]<-NA



		#calculate aggregate profiles
		groupmeans<-lapply(1:numgroups[1], function(y){
				colMeans(as.matrix(curmat[grouprownumbers[[y]],]),na.rm=TRUE)
		})
		groupmeans[[numgroups[1]+1]]<-colMeans(curmat,na.rm=TRUE)
		groupmeanmaxs<-unlist(lapply(groupmeans,max,na.rm=TRUE))
		groupmeanmins<-unlist(lapply(groupmeans,min,na.rm=TRUE))
		
		cat("4\n")
		
		# ##############################################################################
		# TEMPORARY IMPLEMENTATION OF CROSS-MATRIX GROUP AVERAGES
		#if(is.null(crossmat)==FALSE & l %in% crossmat == TRUE){
		#	grouplist[[q]]=groupmeans
		#	q=q+1
		#}
		# ##############################################################################
		
		#make NAs 0 for aesthetics
		if(forcescore==TRUE){
			curmat[is.na(curmat)]<-0
			curmat[is.nan(curmat)]<-0
			curmat[is.infinite(curmat)]<-0
		}
		
		
		
		cat("drawing heatmap (",basename(removeext(mats[l])),")\n")
		png(file=hmnames[l],height=5000,width=1000)
		
		# define color breaks
		brks<-c(seq(curmin,curmax,by=(curmax-curmin)/100),curmax)
		
		
		# copy matrix into new object
		mat<-curmat

		# create layout matrix
		m<-matrix(1:3,nrow=3)

		# set out-of-boundary scores to boundaries
		mat[which(mat>curmax)]<-curmax
		mat[which(mat<curmin)]<-curmin

		# create image layout and set margins
		layout(m,heights=c(1,20,5),widths=1)
		par(oma=c(10,0,10,0))
		par(mar=c(10,15,5,15))

		# draw color scale
		image(matrix(brks),col=scolramps[[l]](101),breaks=brks,axes=F)
		axis(side=1,at=c(0,1),labels=c(curmin,curmax),cex.axis=8,lwd=10,padj=2,line=0,tcl=-3)
		
		# title image
		mtext(matnames[l],side=3,cex=8,outer=T)
		
		# set margins and draw heatmap
		par(mar=c(5,15,5,15),xpd=TRUE)
		image(t(mat),breaks=brks,col=scolramps[[l]](101),axes=FALSE)
		
		# create group color bars next to heatmap
		for(g in 1:numsorts){
			for(i in 1:numgroups[g]){
				linevec<-grouptable[,g]
				linevec[which(grouptable[,g] != i)]<-NA
				linecoords=(1:matrows)/matrows
				linecoords[which(is.na(linevec))]<-NA
				lines(rep(-0.01-g*0.05,matrows),linecoords[matrows:1],lwd=40,col=groupcolors[i],lend=1)
			}
		}
		axis(side=1,at=c(0,0.5,1),labels=F,lwd=10,padj=1,line=1,tcl=-3)
		
		# draw average plot
		par(mar=c(20,15,0,15))
		plot(0,type="n",xlim=c(min(x),max(x)),ylim=c(min(groupmeanmins),max(groupmeanmaxs)),xaxs="i",axes=FALSE)
		for(i in 1:(numgroups[1]+1)){
			lines(x,groupmeans[[i]],lwd=10,col=legendcolors[i])
		}
		axis(side=1,at=c(min(x),0,max(x)),labels=c(min(x),0,max(x)),cex.axis=8,lwd=10,padj=2,line=0,tcl=-3)
		axis(side=2,cex.axis=8,lwd=10,tcl=-3,padj=-1)
		mtext("Distance from feature (bp)",side=1,cex=5,outer=T,line=2)
		dev.off()

	} , mc.cores=cores, mc.preschedule=F)
	#},mc.cores=cores)
	
	# make montage
	#if(nummats < 10){
	#	system(paste("montage -geometry +2+2 -tile ",nummats,"x1 ",paste(hmnames,collapse=" ", sep=" ")," ",pwd,"/",dname,"/heatmap_montage.png",sep=""))
	#}
	
	# ##############################################################################
	# TEMPORARY IMPLEMENTATION OF CROSS-MATRIX GROUP AVERAGES
	# if(is.null(crossmat)==FALSE){
	# 	x<-((1:matcols)-(matcols/2))*winsizes[1]
	# 	pdf(file=paste(pwd,"/",dname,"/","group-averages",".pdf",sep=""))
	# 	for(i in 1:(numgroups[1])){
	# 		cat("i=",i,"\n")
	# 		groupscores=lapply(lapply(grouplist,"[",i),unlist)
	# 		allscores=unlist(groupscores)
	# 		print(allscores)
	# 		#plot(0,type="n",xlim=c(min(x),max(x)),ylim=c(min(allscores),max(allscores)),xlab="Distance from TSS",ylab="Average Score",main=paste("Group",i))
	# 		plot(0,type="n",xlim=c(min(x),max(x)),ylim=c(1,2.5),xlab="Distance from TSS",ylab="Average Score",main=paste("Group",i))
	# 		for(j in 1:length(crossmat)){
	# 			cat("j=",j,"\n")
	# 			lines(x,groupscores[[j]],lwd=3,col=matcolors[j])
	# 		}
			
	# 	}
	# 	dev.off()
	# }
	# ##############################################################################
}
# ####################################
#      KENT FUNCTIONS                #
# ####################################
getbt2index		<- function( genome ){
	if(genome=="hg19"){indexfile=paste(datapath,"hg19/igenome/Sequence/Bowtie2Index/genome",sep="")}
	if(genome=="b73v2"){indexfile=paste(datapath,"b73v2/assembly/b73v2_bowtie2/b73",sep="")}
	if(genome=="dm3"){indexfile=paste(datapath,"dm3/igenome/Sequence/Bowtie2Index/genome",sep="")}
	if(genome=="kshv"){indexfile=paste(datapath,"kshv/bt2index/kshv",sep="")}
	if(genome=="repbase"){indexfile=paste(datapath,"b73v2/repbase/repbase_bowtie2index/repbasemays",sep="")}
        return(indexfile)
}
getbtindex		<- function( genome ){
	if(genome=="b73v2"){indexfile=paste(datapath,"b73v2/btindex/genome",sep="")}
	return(indexfile)
}
getbwaindex		<- function( genome ){
	if(genome=="b73v2"){indexfile=paste(datapath,"b73v2/assembly/genome.fa",sep="")}
	return(indexfile)
}
getgenomefile		<- function( genome){
	if(genome=="b73v2"){chromfile=paste(datapath,"b73v2/b73v2.chrom.sizes",sep="")}
	if(genome=="dm3"){chromfile=paste(datapath,"dm3/dm3.chrom.sizes",sep="")}
	if(genome=="hg19"){chromfile=paste(datapath,"hg19/hg19.chrom.sizes",sep="")}
	if(genome=="hg18"){chromfile=paste(datapath,"hg18/hg18.chrom.sizes",sep="")}
	if(genome=="saccer3"){chromfile=paste(datapath,"saccer3/saccer3.chrom.sizes",sep="")}
	if(genome=="mm10"){chromfile=paste(datapath,"mm10/mm10.chrom.sizes",sep="")}
    return(chromfile)
}
gettindex		<- function( genome){
	if(genome=="hg19"){indexfile=paste(datapath,"hg19/tophatindex/genes",sep="")}
        return(indexfile)
}
getgtf			<- function( genome){
	if(genome=="hg19"){indexfile=paste(datapath,"hg19/igenome/Annotation/Genes/genes.gtf",sep="")}
        return(indexfile)
}
wigToBigWig		<- function( datafiles,genome="hg19"){
	for(l in 1:length(datafiles)){
		system(command=paste("wigToBigWig -clip", datafiles[l], getgenomefile(genome), paste(basename(removeext(datafiles[l])),".bw",sep="")))
	}
}
bedGraphToBigWig		<- function( datafiles,genome="hg19"){
	for(l in 1:length(datafiles)){
		cat(basename(datafiles[l]),": converting to bigWig\n")
		outname<-paste(basename(removeext(datafiles[l])),".bw",sep="")
		system(command=paste("bedGraphToBigWig", datafiles[l], getgenomefile(genome), outname))
		return(outname)
	}
}
bed.clip			<- function( datafiles,genome="hg19"){
	library(tools)
	for(l in 1:length(datafiles)){
		cat(basename(datafiles[l]),": clipping bed\n")
		ext<-file_ext(datafiles[l])
		outname<-paste(basename(removeext(datafiles[l])),".",ext,sep="")
		system(command=paste("bedClip", datafiles[l], getgenomefile(genome), outname))
		return(outname)
	}
}
bed.removechrom		<- function( datafiles,chrom,genome="hg19"){
	library(tools)
	for(l in 1:length(datafiles)){
		cat(basename(datafiles[l]),": removing chromosome",chrom,"from",datafiles[l],"\n")
		ext<-file_ext(datafiles[l])
		outname<-paste(basename(removeext(datafiles[l])),".",ext,sep="")
		system(command=paste("bedClip", datafiles[l], getgenomefile(genome), outname))
		return(outname)
	}
}
bedToBigBed		<- function( datafiles,genome="hg19"){
	for(l in 1:length(datafiles)){
		outname<-paste(basename(removeext(datafiles[l])),".bb",sep="")
		system(command=paste("bedToBigBed", datafiles[l], getgenomefile(genome), outname))
		return(outname)
	}
}
bigWigToBedGraph		<- function( datafiles,genome="hg19"){
	for(l in 1:length(datafiles)){
		cat(basename(datafiles[l]),": converting to bedGraph\n")
		outname<-paste(basename(removeext(datafiles[l])),".bg",sep="")
		system(command=paste("bigWigToBedGraph", datafiles[l], outname))
		return(outname)
	}
}
bigBedToBed		<- function( datafiles,genome="hg19"){
	for(l in 1:length(datafiles)){
		system(command=paste("bigBedToBed", datafiles[l], paste(basename(removeext(datafiles[l])),".bed",sep="")))
	}
}
# ####################################
#      TRACK FUNCTIONS               #
# ####################################
hub.bed			<- function( trackfiles, bedcols, color=TRUE, parentname=NULL, composite=FALSE, maxitems=1000, densecoverage=NULL, colorbystrand=NULL, tracknames=removeext(trackfiles), hubloc="dvera@epsilon.bio.fsu.edu:public_html/hubs/dlv/hg19",printtrack=F){
	library(tools)
	if(length(which(file_ext(trackfiles) %in% c("bed")) > 0)){stop("cannot create track from bed files, convert to bigBed")}
	print(data.frame("file"=trackfiles,"names"=tracknames))
	if(composite==TRUE & is.null(parentname)){stop("must specify parent name if composite=TRUE")}
	#check if dense coverage is numeric and not logical
	
	if(composite){
		track<-c(
			"",
			"",
			paste("track",parentname),
			paste("shortLabel",parentname),
			paste("longLabel",parentname),
			paste("type bigBed",bedcols,"."),
			"compositeTrack on",
			"visibility hide",
			"allButtonPair on",
			"dragAndDrop on",
			if(color){paste("itemRgb on")},
			if(is.null(colorbystrand)==FALSE){paste("colorByStrand",paste(col2rgb(colorbystrand[1]),collapse=","),paste(col2rgb(colorbystrand[2]),collapse=","),sep=" ")},
			if(is.null(densecoverage)==FALSE){paste("denseCoverage",densecoverage)},
			""
			)
	} else{
		track<-""
	}
	
	for(i in 1:length(trackfiles)){
		subtrack<-c(
			paste("track ",if(composite){paste(parentname,"-",sep="")},tracknames[i],sep=""),
			paste("bigDataUrl bbi/",trackfiles[i],sep=""),
			paste("shortLabel",tracknames[i]),
			paste("longLabel",tracknames[i]),
			paste("type bigBed",bedcols,"."),
			paste("maxItems",maxitems),
			"visilbility hide",
			if(color){paste("itemRgb on")},
			if(is.null(colorbystrand)==FALSE){paste("colorByStrand",paste(col2rgb(colorbystrand[1]),collapse=","),paste(col2rgb(colorbystrand[2]),collapse=","),sep=" ")},
			if(is.null(densecoverage)==FALSE){paste("denseCoverage",densecoverage)},
			if(composite){paste("parent",parentname)},
			""
		)
		track<-append(track,subtrack)
	}
	
	
	trackfile<-data.frame("V1"=track,stringsAsFactors=FALSE)
	if(printtrack){cat(unlist(trackfile),sep="\n")} else{write.tsv(trackfile,file="tmphubdb.txt")}
	filelist<-paste(trackfiles,sep=" ",collapse=" ")
	login<-unlist(strsplit(hubloc,":"))[1]
	path<-unlist(strsplit(hubloc,":"))[2]
	cat(paste("scp ",hubloc,"/bbi/",sep=""))
	system(paste("scp ",filelist," ",hubloc,"/bbi/",sep=""))
	system(paste("cat tmphubdb.txt | ssh ",login," 'cat >> ",path,"/hubDb.txt'",sep=""))
}

hub.bam			<- function( trackfiles, minAliQual = 0 , indelDoubleInsert = TRUE , indelQueryInsert = TRUE , bamColorMode = "gray" , bamGrayMode = "aliQual" , paired = TRUE , parentname=NULL, composite=FALSE, maxitems=1000, densecoverage=NULL, colorbystrand=NULL, tracknames=removeext(trackfiles), hubloc="dvera@epsilon.bio.fsu.edu:public_html/hubs/dlv/hg19",printtrack=F){
	library(tools)
	if(length(which(file_ext(trackfiles) %in% c("bed")) > 0)){stop("cannot create track from bed files, convert to bigBed")}
	print(data.frame("file"=trackfiles,"names"=tracknames))
	if(composite==TRUE & is.null(parentname)){stop("must specify parent name if composite=TRUE")}
	#check if dense coverage is numeric and not logical
	
	bais<-paste0(trackfiles,".bai")
	if(sum(file.exists(bais)) != length(trackfiles) ) {stop ("one or more bam indices not found!")}

	if(composite){
		track<-c(
			"",
			"",
			paste("track",parentname),
			paste("shortLabel",parentname),
			paste("longLabel",parentname),
			"type bam",
			"compositeTrack on",
			"visibility hide",
			"allButtonPair on",
			"dragAndDrop on",
			"showNames off",
			paste("minAliQual",minAliQual),
			paste("bamColorMode",bamColorMode),
			if(indelDoubleInsert){"indelDoubleInsert on"},
			if(indelQueryInsert){"indelQueryInsert on"},
			if(bamColorMode=="gray"){paste("bamGrayMode",bamGrayMode)},
			if(paired){"pairEndsByName ."},
			""
			)
	} else{
		track<-""
	}
	
	for(i in 1:length(trackfiles)){
		subtrack<-c(
			paste("track ",if(composite){paste(parentname,"-",sep="")},tracknames[i],sep=""),
			"type bam",
			if(composite){paste("parent",parentname)},
			paste("bigDataUrl bbi/",trackfiles[i],sep=""),
			paste("shortLabel",tracknames[i]),
			paste("longLabel",tracknames[i]),
			if(composite==F){paste("minAliQual",minAliQual)},
			if(composite==F){paste("bamColorMode",bamColorMode)},
			if(composite==F){"visilbility hide"},
			if(composite==F){"showNames off"},
			if(composite==F & indelDoubleInsert==T){"indelDoubleInsert on"},
			if(composite==F & indelQueryInsert==T){"indelQueryInsert"},
			if(composite==F & bamColorMode=="gray"){paste("bamGrayMode",bamGrayMode)},
			if(composite==F & paired==T){"pairEndsByName ."},
			""
		)
		track<-append(track,subtrack)
	}
	
	
	trackfile<-data.frame("V1"=track,stringsAsFactors=FALSE)
	if(printtrack){cat(unlist(trackfile),sep="\n")} else{write.tsv(trackfile,file="tmphubdb.txt")}
	filelist<-paste(trackfiles,bais,sep=" ",collapse=" ")
	login<-unlist(strsplit(hubloc,":"))[1]
	path<-unlist(strsplit(hubloc,":"))[2]
	cat(paste("scp ",hubloc,"/bbi/",sep=""))
	system(paste("scp ",filelist," ",hubloc,"/bbi/",sep=""))
	system(paste("cat tmphubdb.txt | ssh ",login," 'cat >> ",path,"/hubDb.txt'",sep=""))
}

track.wig		<- function( trackfiles,tracknames=removeext(trackfiles), plotcolors=rep("black",length(trackfiles)), user=Sys.getenv("USER"),server="epsilon.bio.fsu.edu",path="public_html/hubs/dlv",genome="hg19",range=c(0,50),printtrack=F){
	library(tools)
	if(user=="dlv04c"){user="dvera"}
	if(length(which(file_ext(trackfiles) %in% c("wig","Wig","wiggle")) > 0)){stop("cannot create track from wig files, convert to bigWig")}
	print(data.frame("file"=trackfiles,"names"=tracknames,"color"=plotcolors))
	
	track<-""
	for(i in 1:length(trackfiles)){
		c<-paste(col2rgb(plotcolors[i]),collapse=",")
		subtrack<-c(
			paste("track ",tracknames[i],sep=""),
			paste("bigDataUrl bbi/",trackfiles[i],sep=""),
			paste("shortLabel ",tracknames[i],sep=""),
			paste("longLabel ",tracknames[i],sep=""),
			paste("type bigWig ",range[1]," ",range[2],sep=""),
			paste("color ",c,sep=""),
			paste("altColor ",c,sep=""),

		""
		)
		track<-append(subtrack,track)
	}
	
	if(printtrack==TRUE){cat(unlist(outfile),sep="\n")}
	
	trackfile<-data.frame("V1"=track,stringsAsFactors=FALSE)
	write.tsv(trackfile,file="tmphubdb.txt")
	filelist<-paste(trackfiles,sep=" ",collapse=" ")
	system(paste("scp ",filelist," ",user,"@",server,":",path,"/",genome,"/bbi/",sep=""))
	system(paste("cat tmphubdb.txt | ssh ",user,"@",server," 'cat >> ",path,"/",genome,"/hubDb.txt'",sep=""))
}
hub.wig			<- function( trackfiles, range=c(0,50), parentname=NULL, multiwig=FALSE, composite=FALSE, tracknames=basename(removeext(trackfiles)), plotcolors=rainbow(length(trackfiles)), altcolors=plotcolors, hubloc="dvera@epsilon.bio.fsu.edu:public_html/hubs/dlv/hg19",printtrack=F){
	if(composite==TRUE & is.null(parentname)){stop("must specify parent name if composite=TRUE")}
	if(composite==TRUE & multiwig==TRUE){stop("cannot be both composite and multiwig")}
	print(data.frame("file"=trackfiles,"names"=tracknames,"color"=plotcolors))
	track=""
	if(multiwig){
		track<-c(
			"",
			"",
			paste("track",parentname),
			paste("shortLabel",parentname),
			paste("longLabel",parentname),
			paste("type bigWig",range[1],range[2]),
			"container multiWig",
			"graphType points",
			"configurable on",
			"visilbility hide",
			"maxHeightPixels 200:50:32",
			"aggregate transparentOverlay",
			"showSubtrackColorOnUi on",
			"autoScale on",
			"windowingFunction mean",
			"yLineOnOff on",
			"yLineMark 0",
			"smoothingWindow off",
			"alwaysZero on",
			""
			)
	}

	for(i in 1:length(trackfiles)){
		c<-paste(col2rgb(plotcolors[i]),collapse=",")
		a<-paste(col2rgb(altcolors[i]),collapse=",")
		
		subtrack<-c(
			paste("track ",if(multiwig){paste(parentname,"-",sep="")},tracknames[i],sep=""),
			paste("bigDataUrl bbi/",basename(trackfiles[i]),sep=""),
			paste("shortLabel ",tracknames[i],sep=""),
			paste("longLabel ",tracknames[i],sep=""),
			paste("type bigWig ",range[1]," ",range[2],sep=""),
			paste("color ",c,sep=""),
			paste("altColor ",a,sep=""),
			if(multiwig){paste("parent",parentname)},
			if(multiwig==F | composite==F){"graphType points"},
			if(multiwig==F | composite==F){"configurable on"},
			if(multiwig==F | composite==F){"visilbility hide"},
			if(multiwig==F | composite==F){"maxHeightPixels 200:50:32"},
			if(multiwig==F | composite==F){"autoScale on"},
			if(multiwig==F | composite==F){"windowingFunction mean"},
			if(multiwig==F | composite==F){"yLineOnOff on"},
			if(multiwig==F | composite==F){"yLineMark 0"},
			if(multiwig==F | composite==F){"smoothingWindow off"},
			if(multiwig==F | composite==F){"alwaysZero on"},
			""
		)
		track<-append(track,subtrack)
	}
	
	trackfile<-data.frame("V1"=track,stringsAsFactors=FALSE)
	if(printtrack){cat(unlist(trackfile),sep="\n")} else{write.tsv(trackfile,file="tmphubdb.txt")}
	filelist<-paste(trackfiles,sep=" ",collapse=" ")
	login<-unlist(strsplit(hubloc,":"))[1]
	path<-unlist(strsplit(hubloc,":"))[2]
	cat(paste("scp ",hubloc,"/bbi/",sep=""))
	system(paste("scp ",filelist," ",hubloc,"/bbi/",sep=""))
	system(paste("cat tmphubdb.txt | ssh ",login," 'cat >> ",path,"/hubDb.txt'",sep=""))
}
track.compositewig	<- function( trackfiles , parentname=NULL , hubloc=NULL , range=c(0,3) , plotcolors=rainbow(length(trackfiles)) , tracknames=removeext(trackfiles) ){
	
	if(is.null(parentname)){stop("must define parentname")}
	if(is.null(hubloc)){stop("must define hubloc")}
	trackfiles<-trackfiles[order(trackfiles,decreasing=T)]
	
	header<-c(
		"",
		"",
		paste("track ",parentname,"-parent",sep=""),
		"compositeTrack on",
		paste("shortLabel",parentname),
		paste("longLabel",parentname),
		"type bed 3",
		"noInherit on",
		"configurable on",
		"visilbility squish",
		paste("subGroup1 view Views ",parentname,"=",parentname,sep=""),
		"",
		paste("track",parentname),
		paste("shortLabel",parentname),
		paste("longLabel",parentname),
		paste("view ",parentname),
		paste("parent ",parentname,"-parent",sep=""),
		"graphType bar",
		"configurable on",
		"visilbility squish",
		"maxHeightPixels 200:64:32",
		"autoScale off",
		"windowingFunction mean",
		"smoothingWindow off",
		""
		)

	for(i in 1:length(trackfiles)){
		c<-paste(col2rgb(plotcolors[i]),collapse=",")
		track<-c(
			paste("track ",parentname,"-",tracknames[i],sep=""),
			paste("bigDataUrl bbi/",trackfiles[i],sep=""),
			#paste("shortLabel ",letters[i],"_",tracknames[i],sep=""),
			paste("shortLabel",tracknames[i]),
			paste("longLabel",tracknames[i]),
			paste("type bigWig",range[1],range[2]),
			paste("parent",parentname),
			paste("color",c),
			paste("altColor",c),
			paste("subGroups view=",parentname,sep=""),
			"visibility squish",
			""
		)
		header<-append(header,track)
	}
	
	outfile<-data.frame("V1"=header,stringsAsFactors=FALSE)
	write.tsv(outfile,file="tmphubdb.txt")
	filelist<-paste(trackfiles,sep=" ",collapse=" ")
	login<-unlist(strsplit(hubloc,":"))[1]
	path<-unlist(strsplit(hubloc,":"))[2]
	cat(paste("scp ",hubloc,"/bbi/",sep=""))
	system(paste("scp ",filelist," ",hubloc,"/bbi/",sep=""))
	system(paste("cat tmphubdb.txt | ssh ",login," 'cat >> ",path,"/hubDb.txt'",sep=""))
}
# ####################################
#      MISC. FUNCTIONS               #
# ####################################
isegtocoords		<- function( coords , isegs ){
	library(parallel)
	segnames<-basename(removeext(isegs))
	cat("loading coordinates\n")
	coords<-read.delim(pipe(paste("cut -f 1,2,3",coords)),stringsAsFactors=F,header=F)
	
	mclapply(1:length(isegs),function(i){
		curseg<-read.tsv(isegs[i],skip=1)
		
		mclapply(0:2, function(b){
			cat(segnames[i],": processing BC",b,"\n")
			bg<-data.frame("V1"=coords[curseg[,(b*3+1)],1],"V2"=coords[curseg[,(b*3+1)],2],"V3"=coords[curseg[,(b*3+2)],3],"V4"=curseg[,(b*3+3)])
			t<-which(complete.cases(bg$V2))
			t<-t[length(t)]
			bg<-bg[1:t,]
			outname<-paste(segnames[i],"_BC",b,".bg",sep="")
			write.tsv(bg,file=outname)
		},mc.cores=3)
	},mc.cores=floor(detectCores()/3))
}
remove.header		<- function( filename ){
	library(tools)
	ext<-file_ext(filename)
	shortname<-basename(removeext(filename))
	outname<-paste(shortname,"_rh.",ext,sep="")
	system(paste("tail -n +2",filename,">",outname))
	system(paste("mv",outname,filename))
	outname<-basename(filename)
	return(outname)
}
rpm2timing		<- function( bgfiles, reference=1, cores="max", fractionhist=TRUE , scorehist=TRUE ){
	
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	fractions=c("G1b","G2","S1","S2","S3","S4")
	
	cat("checking file count\n")
	if(floor(length(bgfiles)/6) != ceiling(length(bgfiles)/6) ){ stop("# of files not divisible by 6. missing a file?") }
	
	cat("checking file lines\n")
	if(length(unique(unlist(lapply(bgfiles,filelines)))) != 1){stop("files have different numbers of lines")}
	
	cat("finding files for each cell line\n")
	cells<-unique(remove.suffix(remove.prefix(bgfiles[grep("G2",bgfiles)],"Seq"),"G2"))
	print(cells)
	numcells<-length(cells)
	rb=rainbow(numcells)
	cellfiles<-lapply(1:numcells,function(x) bgfiles[grep(cells[x],bgfiles)] )
	print(cellfiles)
	
	outnames<-paste(cells,"_weightedscore.bg",sep="")
	finalnames<-paste(cells,"_iqr_qnorm.bg",sep="")
	#print(finalnames)
	
	cat("checking files for each cell line\n")
	numpercell=unique(unlist(lapply(cellfiles,length)))
	if(length(numpercell) != 1 | numpercell[1] != 6 ){print(cellfiles);stop("each cell line must match only 6 files")}
	checko<-lapply(1:numcells,function(x){
		lapply(1:6, function(y){
			if(grepl(fractions[y],cellfiles[[x]][y])==FALSE){
				stop(paste("cell files do not match expected fraction for",cells[x],cellfiles[[x]][y]))
			}
		} )
	})
	
	coords<-read.delim(pipe(paste("cut -f 1,2,3",cellfiles[[1]][1])),header=F)
	
	cat("calculating weighted score and IQR normalizing reference\n")
	# read in scores for each fraction for each cell line, calculated weighted score, and save file
	celldata<-mclapply(1:numcells,function(x){
		
		cd<-as.data.frame(lapply(1:6, function(y){
			as.numeric(readLines(pipe(paste("cut -f 4",cellfiles[[x]][y]))))
		} ) )
		colnames(cd)<-fractions
		return(cd)
	},mc.cores=cores )
	
	#fractiondensities<-mclapply(1:numcells,function(x){
	#	lapply(1:6,function(y){
	#		density(celldata[[x]][y])
	#	})
	#},mc.cores=cores)
	
	
	pdf(file="scoredensities.pdf")
	for(i in 1:6){
		plotmax<-quantile(celldata[[1]][,i],probs=0.95)
		plot(density(celldata[[1]][,i],from=0,to=plotmax),col=rb[1],main=fractions[i])
		for(j in 2:numcells){
			lines(density(celldata[[j]][,i],from=0,to=plotmax),col=rb[j])
		}
		legend("topright",legend=cells,col=rb,lwd=3)
	}
	
	
	celldata<-mclapply(1:numcells,function(x){
		cd<-celldata[[x]]
		wa<-(0.917*cd$G1b)+(0.750*cd$S1)+(0.583*cd$S2)+(0.417*cd$S3)+(0.250*cd$S4)+(0*cd$G2)
		wa[which(wa==0)] <- NA
		return(wa)
	},mc.cores=cores )
	
	
		plotmax<-quantile(celldata[[1]],probs=0.9,na.rm=TRUE)
		plot(density(celldata[[1]],from=0,to=plotmax),col=rb[1],main="weighted scores")
		for(j in 2:numcells){
			lines(density(celldata[[j]],from=0,to=plotmax),col=rb[j])
		}
		legend("topright",legend=cells,col=rb,lwd=3)
	dev.off()
	
	celldata<-mclapply(1:numcells,function(x){
		wa=celldata[[x]]
		if(x==reference){ wa<-( (wa-median(wa,na.rm=TRUE) ) ) * 1.59  / IQR(wa,na.rm=TRUE) }
		coords$V4=wa
		return(coords)
	},mc.cores=cores )
	
	cat("removing windows with no data\n")
	badblocks<-unique(unlist(lapply(1:numcells,function(x)(which(is.na(celldata[[x]][,4]))))))
	celldata<-mclapply(1:numcells,function(x){
		if(length(badblocks)>0){celldata[[x]]<-celldata[[x]][-badblocks,]}
		return(celldata[[x]])
	},mc.cores=cores)
	
	cat("normalizing data to",cells[reference],"and saving\n")
	celldata<-mclapply(1:numcells,function(x){
		celldata[[x]][,4][order(celldata[[x]][,4])] <- celldata[[reference]][,4][order(celldata[[reference]][,4])]
		write.tsv(celldata[[x]],file=finalnames[x])
	},mc.cores=cores)
}
rpm2timing3		<- function ( bgfiles, reference=1,cores="max"){
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	
	
	fractions=c("G1b","G2","S1","S2","S3","S4")
	
	cat("checking file count\n")
	if(floor(length(bgfiles)/6) != ceiling(length(bgfiles)/6) ){ stop("# of files not divisible by 6. missing a file?") }
	
	#cat("checking file lines\n")
	#if(length(unique(unlist(lapply(bgfiles,filelines)))) != 1){stop("files have different numbers of lines")}
	
	cat("finding files for each cell line\n")
	cells<-unique(remove.suffix(remove.prefix(bgfiles[grep("G2",bgfiles)],"Seq"),"G2"))
	print(cells)
	numcells<-length(cells)
	rb=rainbow(numcells)
	cellfiles<-lapply(1:numcells,function(x) bgfiles[grep(cells[x],bgfiles)] )
	#print(cellfiles)
	
	ubgnames<-paste(cells,"_union.bg",sep="")
	outnames<-paste(cells,"_weightedscore.bg",sep="")
	finalnames<-paste(cells,"_iqr_qnorm.bg",sep="")
	#print(finalnames)
	
	cat("checking files for each cell line\n")
	numpercell=unique(unlist(lapply(cellfiles,length)))
	if(length(numpercell) != 1 | numpercell[1] != 6 ){print(cellfiles);stop("each cell line must match only 6 files")}
	checko<-lapply(1:numcells,function(x){
		lapply(1:6, function(y){
			if(grepl(fractions[y],cellfiles[[x]][y])==FALSE){
				stop(paste("cell files do not match expected fraction for",cells[x],cellfiles[[x]][y]))
			}
		} )
	})
	
	cat("making unionbg\n")
	#celldata<-lapply(1:numcells,function(x){
	celldata<-mclapply(1:numcells,function(x){
		cd<-read.delim(pipe(paste("bedtools unionbedg -filler NA -i",paste(cellfiles[[x]],collapse=" "))))
		colnames(cd)<-c("chrom","start","stop",fractions)
		goodrows<-which(rowSums(is.na(cd)) != 6)
		numrows<-nrow(cd)
		numgoodrows<-length(goodrows)
		cat(numrows-numgoodrows,"windows have no data in at least 1 fraction (",(numrows-numgoodrows)/numrows,"% )\n")
		cd[is.na(cd)]<-0
		wa<-(0.917*cd$G1b)+(0.750*cd$S1)+(0.583*cd$S2)+(0.417*cd$S3)+(0.250*cd$S4)+(0*cd$G2)
		cd<-data.frame("V1"=cd$chrom,"V2"=cd$start,"V3"=cd$stop,"V4"=wa,stringsAsFactors=FALSE)
		rm(wa)
		return(cd)
		print(head(cd))
	},mc.cores=cores)
	#})
	ref<-unlist(lapply(1:numcells,function(x) celldata[[x]][,4] ))
	#numrefs<-length(ref)
	
	cat("normalizing data\n")
	celldata<-mclapply(1:numcells,function(x){
		cd<-celldata[[x]]
		numpoints<-nrow(cd)
	#	if(numpoints < numrefs){
			curref<-sample(ref,numpoints)
			cd[,4][order(cd[,4])]<-curref[order(curref)]
	#	}
	#	if(numpoints > numrefs){
	#		curref<-sample(rep(ref,10),numpoints)
	#		cd[,4][order(cd[,4])]<-curref[order(curref)]
	#	}
	#	if(numpoints == numrefs){
	#		curref<-ref
	#		cd[,4][order(cd[,4])]<-curref[order(curref)]
	#	}
		cd[,4]<-( (cd[,4]-median(cd[,4],na.rm=TRUE) ) ) * 1.59  / IQR(cd[,4],na.rm=TRUE)
		write.tsv(cd,file=finalnames[x])
		return(cd)
	},mc.cores=cores)
}
scatterdens		<- function( x , y , maxscore="95%" , breaks=250 , xlims=c("1%","99%") , ylims=c("1%","99%") , white.background = F , xlabel=NULL , ylabel=NULL , cores="max" , hmcolors=c("black","blue","yellow","red") ){
	
	#check for infinite values? or just discard?
	xinfind <- which(is.infinite(x))
	yinfind <- which(is.infinite(y))
	infind <- unique(c(xinfind,yinfind))
	if(length(infind) > 0){
		x<-x[-infind]
		y<-y[-infind]
	}
	

	if(is.null(xlabel)){xlabel=deparse(substitute(x))}
	if(is.null(ylabel)){ylabel=deparse(substitute(y))}
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	rb<-colorRampPalette(hmcolors)
	#define score ranges
	if(grepl("%",xlims[1])){xlims[1]<-quantile(x,probs=as.numeric(gsub("%","",xlims[1]))/100,na.rm=TRUE)}
	if(grepl("%",xlims[2])){xlims[2]<-quantile(x,probs=as.numeric(gsub("%","",xlims[2]))/100,na.rm=TRUE)}
	if(grepl("%",ylims[1])){ylims[1]<-quantile(y,probs=as.numeric(gsub("%","",ylims[1]))/100,na.rm=TRUE)}
	if(grepl("%",ylims[2])){ylims[2]<-quantile(y,probs=as.numeric(gsub("%","",ylims[2]))/100,na.rm=TRUE)}
	
	xlims<-as.numeric(xlims)
	ylims<-as.numeric(ylims)
	
	#limit scores to defined ranges
	cat("limiting scores to range\n")
	goodrows<-which(x >= xlims[1] & x <= xlims[2] & y >= ylims[1] & y <= ylims[2])
	a<-x[goodrows]
	b<-y[goodrows]
	
	#define bin breaks
	xbreaks<-seq(xlims[1],xlims[2],(xlims[2]-xlims[1])/breaks)
	ybreaks<-seq(ylims[1],ylims[2],(ylims[2]-ylims[1])/breaks)
	
	cat("binning x-axis data\n")
	xbinind<-cut(a,xbreaks,labels=F)
	
	cat("binning y-axis data\n")
	h<-mclapply(1:breaks,function(x){
		
		hist(b[which(xbinind==x)],breaks=ybreaks,plot=F)
	},mc.cores=cores)
	
	
	binmat<-data.matrix(as.data.frame(mclapply(h,"[[",2)))
	#binmat<-binmat[nrow(binmat):1,]
	row.names(binmat)<-xbreaks[1:breaks]
	colnames(binmat)<-ybreaks[1:breaks]
	binmat<-t(binmat)
	
	if(white.background){ binmat[binmat==0]<-NA }

	cat("plotting heatmap\n")
	if(grepl("%",maxscore)){maxscore<-quantile(binmat,probs=as.numeric(gsub("%","",maxscore))/100,na.rm=TRUE)}
	if(maxscore==0){maxscore=max(binmat)}
	brks<-seq(0,maxscore,maxscore/100)
	binmat[binmat>brks[101]]<-brks[101]
	
	layout(matrix(c(4,2,1,3),nrow=2),heights=c(1,4),widths=c(4,1))
	#layout(matrix(1:2,nrow=2),heights=c(1,4))
	par(oma=c(2,2,2,2))
	par(mar=c(3,3,0,0))
	image(matrix(brks),col=rb(length(brks)-1),breaks=brks,axes=F)
	axis(side=1,at=c(0,1),labels=c(brks[1],brks[length(brks)]))
	mtext("bin counts",side=1,line=2)
	
	par(mar=c(3,3,0,0))
	image(data.matrix(binmat),breaks=brks,col=rb(length(brks)-1),xaxt='n',yaxt='n')
	axis(side=1,at=(0:4)/4,labels=xlims[1]+0:4*((xlims[2]-xlims[1])/4))
	axis(side=2,at=(0:4)/4,labels=ylims[1]+0:4*((ylims[2]-ylims[1])/4))
	mtext(xlabel,side=1,line=2)
	mtext(ylabel,side=2,line=2)
	
	dx<-density(x,from=xlims[1],to=xlims[2],na.rm=TRUE)
	dy<-density(y,from=ylims[1],to=ylims[2],na.rm=TRUE)
	par(mar=c(3,0,0,0))
	plot(dy[["y"]],dy[["x"]],type="l",yaxs='i',axes=F,lwd=4)
	par(mar=c(0,3,1,0))
	plot(dx[["x"]],dx[["y"]],type="l",xaxs='i',axes=F,lwd=4,main="")
	
}
rpm2timing2		<- function( bgfiles, reference=1, cores="max", fractionhist=TRUE , scorehist=TRUE ){
	
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	
	fractions=c("G1b","G2","S1","S2","S3","S4")
	
	cat("checking file count\n")
	if(floor(length(bgfiles)/6) != ceiling(length(bgfiles)/6) ){ stop("# of files not divisible by 6. missing a file?") }
	
	cat("checking file lines\n")
	if(length(unique(unlist(lapply(bgfiles,filelines)))) != 1){stop("files have different numbers of lines")}
	
	cat("finding files for each cell line\n")
	cells<-unique(remove.suffix(remove.prefix(bgfiles[grep("G2",bgfiles)],"Seq"),"G2"))
	print(cells)
	numcells<-length(cells)
	rb=rainbow(numcells)
	cellfiles<-lapply(1:numcells,function(x) bgfiles[grep(cells[x],bgfiles)] )
	print(cellfiles)
	
	outnames<-paste(cells,"_weightedscore.bg",sep="")
	finalnames<-paste(cells,"_iqr_qnorm.bg",sep="")
	#print(finalnames)
	
	cat("checking files for each cell line\n")
	numpercell=unique(unlist(lapply(cellfiles,length)))
	if(length(numpercell) != 1 | numpercell[1] != 6 ){print(cellfiles);stop("each cell line must match only 6 files")}
	checko<-lapply(1:numcells,function(x){
		lapply(1:6, function(y){
			if(grepl(fractions[y],cellfiles[[x]][y])==FALSE){
				stop(paste("cell files do not match expected fraction for",cells[x],cellfiles[[x]][y]))
			}
		} )
	})
	
	# #######################################
	
	
	coords<-read.delim(pipe(paste("cut -f 1,2,3",cellfiles[[1]][1])),header=F)
	
	cat("getting scores\n")
	# read in scores for each fraction for each cell line, calculated weighted score, and save file
	celldata<-mclapply(1:numcells,function(x){
		
		cd<-as.data.frame(lapply(1:6, function(y){
			as.numeric(readLines(pipe(paste("cut -f 4",cellfiles[[x]][y]))))
		} ) )
		colnames(cd)<-fractions
		return(cd)
	},mc.cores=cores )
	
	#fractiondensities<-mclapply(1:numcells,function(x){
	#	lapply(1:6,function(y){
	#		density(celldata[[x]][y])
	#	})
	#},mc.cores=cores)

	
	# 	pdf(file="scoredensities.pdf")
	# 	for(i in 1:6){
	# 		plotmax<-quantile(celldata[[1]][,i],probs=0.95)
	# 		plot(density(celldata[[1]][,i],from=0,to=plotmax),col=rb[1],main=fractions[i])
	# 		for(j in 2:numcells){
	# 			lines(density(celldata[[j]][,i],from=0,to=plotmax),col=rb[j])
	# 		}
	# 		legend("topright",legend=cells,col=rb,lwd=3)
	# 	}

	
	
	
	celldata<-as.data.frame(mclapply(1:numcells,function(x){
		cd<-celldata[[x]]
		wa<-(0.917*cd$G1b)+(0.750*cd$S1)+(0.583*cd$S2)+(0.417*cd$S3)+(0.250*cd$S4)+(0*cd$G2)
		wa[which(wa==0)] <- NA
		return(wa)
	},mc.cores=cores ),stringsAsFactors=FALSE)
	colnames(celldata)<-cells
	
	numwindows<-nrow(celldata)
	goodblocks<-which(complete.cases(celldata))
	cat("discarding",numwindows-length(goodblocks),"windows that have no data in at least 1 sample (",(numwindows-length(goodblocks))/numwindows,"% )\n")
	celldata<-celldata[goodblocks,]
	coords<-coords[goodblocks,]
	
	# 	plotmax<-quantile(celldata,probs=0.95,na.rm=TRUE)
	# 	plot(density(celldata[,1],from=0,to=plotmax),col=rb[1],main="weighted scores")
	# 	for(j in 2:numcells){
	# 		lines(density(celldata[,j],from=0,to=plotmax),col=rb[j])
	# 	}
	# 	legend("topright",legend=cells,col=rb,lwd=3)
	# 	dev.off()
	# 	
	reference<-rowMeans(as.data.frame(lapply(celldata,sort)))
	celldata<-as.data.frame(mclapply(1:numcells,function(x){
		celldata[,x][order(celldata[,x])]<-reference
		return(celldata[,x])
	},mc.cores=cores))
	
	celldata<-( (celldata-median(celldata,na.rm=TRUE) ) ) * 1.59  / IQR(unlist(celldata),na.rm=TRUE)
	
	celldata<-mclapply(1:numcells,function(x){
		write.tsv(cbind(coords,celldata[,x],stringsAsFactors=FALSE),file=finalnames[x])
	},mc.cores=cores)
}
qnormall<-function( seqfiles , arrayfiles, cores="max" ){
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	
	
	numseqfiles=length(seqfiles)
	numarrayfiles=length(arrayfiles)
	
	arrayheads<-lapply(1:numarrayfiles,function(x) read.delim(pipe(paste("head",arrayfiles[x]))) )
	numarraycells<-unlist(lapply(arrayheads,ncol))-2
	
	cat("pooling all data\n")
	#seqscores<-unlist(mclapply(1:numseqfiles, function(x) seqdata[[x]][,4] , mc.cores=cores))
	#arrayscores<-unlist(mclapply(1:numarrayfiles, function(x) as.vector(arraydata[[x]][,3:ncol(arraydata[[x]])]), mc.cores=cores))
	allscores<-unlist(mclapply(1:(numarrayfiles+numseqfiles), function(x){
		
		if(x<=numseqfiles){
			cat("loading",seqfiles[x],"\n")
			as.numeric(readLines(pipe(paste("cut -f 4",seqfiles[x]))))
		} else{
			a=x-numseqfiles
			cat("loading",arrayfiles[a],"\n")
			d<-unlist(read.delim(pipe(paste("cut -f",paste(3:numarraycells[a],collapse=","),arrayfiles[a]))))
			d<-as.numeric(d[2:length(d)])
			return(d)
		}
		
	}, mc.cores=cores))
	
	
	
	
	
	
	if(cores>numseqfiles){cores2=numseqfiles} else{cores2=cores}
	cat("normalizing sequencing data\n")
	mclapply(1:numseqfiles, function(x){
		seqdata<-read.tsv(seqfiles[x])
		curref<-sample(allscores,nrow(seqdata))
		seqdata[,4][order(seqdata[,4])]<-curref[order(curref)]
		outname<-paste(basename(removeext(seqfiles[x])),"_qnormToPool.bg",sep="")
		print(outname)
		write.tsv(seqdata,file=outname)
	}, mc.cores=cores2)
	
	if(cores>numarrayfiles){cores2=numarrayfiles} else{cores2=cores}
	cat("normalizing array data\n")
	mclapply(1:numarrayfiles, function(x){
		for(i in 1:numarraycells[x]){
			arraydata<-read.delim(pipe(paste("cut -f 1,2,",i+2," ",arrayfiles[x],sep="")),header=TRUE)
			curref<-sample(allscores,nrow(arraydata))
			arraydata[,3][order(arraydata[,3])]<-curref[order(curref)]
			outname<-paste(basename(removeext(arrayfiles[x])),"_",colnames(arraydata)[3],"_qnormToPool.txt",sep="")
			print(outname)
			write.tsv(arraydata,file=outname)
		}
	}, mc.cores=cores2)
}
# ####################################
#      DEATH ROW                     #
# ####################################
nolbg			<- function( bg ){
	#assumes no data within 25 bp of end of chromosome
	#assumes nol file names match column1 of bedgraph and have .txt extension
	chroms<-unique(bg$V1)
	bg$V1<-bg$V1+25
	for(i in 1:length(chroms)){
		write.tsv(bg[bg$V1==chroms[i],][,1],file="linenumbers.tmp")
		sc(paste("awk 'NR==FNR { a[$1];next } (FNR in a)' linenumbers.tmp ",chroms[i],".txt > ",chroms[i],".tmp",sep=""))
		
	}
}
mat.makenol		<- function( bedfile, regionsize=3000, start=2, stop=3, suffix="", center=FALSE, row_names=TRUE, stepsize=1,path="/lustre/maize/home/dlv04c/data/b73v2/nol/a375/"){

	#REQUIRES NOL FILES TO BE SEPARATED BY CHROMOSOME, AND BE NAMED WITH SAME CHROMOSOME NAMES WITH .txt EXTENSION ( "chr1.txt" )
	
	pb <- txtProgressBar(min = 0, max = nrow(bedfile), style = 3)

	#REMOVE UNSHARED CHROMOSOMES FROM BED
	bedchroms<-unique(bedfile$V1)
	gffchroms<-unlist(lapply(strsplit(list.files(path=path),"\\."),"[",1))
	bed<-subset(bedfile,bedfile$V1 %in% gffchroms)
	
	#MINIMIZE REGION TABLE AND ADJUST CENTER FOR minus-stranded
	if(ncol(bed)>5){
		regions<-data.frame(V1=bed$V1,V2=bed[,start],V3=bed[,stop],V4=bed$V6,stringsAsFactors=FALSE)
		regions[which(regions[,4]=="-"),2]<-regions[which(regions[,4]=="-"),3]
	}
	else{
		if(center==TRUE){
		regions<-data.frame(V1=bed$V1,V2=round((bed$V2+bed$V3)/2),V3=bed$V3,V4=1)
		}
		else{
		regions<-data.frame(V1=bed$V1,V2=bed$V2,V3=bed$V3,V4=1)
		}
	}
	
	#CREATE MATRIX FOR STORING SCORES
	scorematrix<-matrix(ncol=regionsize,nrow=nrow(bed))
	if(row_names==TRUE){rownames(scorematrix)<-bed$V4}
	
	#FILL MATRIX WITH SCORES AND FLIP MINUS-STRANDED ROWS
	for(i in 1:nrow(regions)){
		startcoord<-round(regions[i,2] - regionsize/2)
		scorematrix[i,]<-system(command=paste("head -n ",startcoord+regionsize-25," ",path,regions[i,1],".txt | tail -n ",regionsize,sep=""),intern=TRUE)
		setTxtProgressBar(pb, i)
	}
	scorematrix[which(regions[,4]=="-"),1:regionsize]<-scorematrix[which(regions[,4]=="-"),regionsize:1]
	
	close(pb)
	
	#SAVE MATRIX FILE
	write.table(scorematrix,file=paste("nol-",suffix,".mat",collapse="",sep=""),sep="\t",quote=FALSE,row.names=TRUE,col.names=FALSE)
}
anno.dist		<- function( ){
	#define intersect function
	
	
	
	
	
	xsect			<- function( feat,annos){
		write.tsv(feat[,1:3],file="tmpfeatures.tmp")
		annotations<-unique(annos$V4)
		numannos<-length(annotations)
		annocounts<-vector(length=numannos)
		for(i in 1:numannos){
			curanno<-subset(annos,annos$V4==annotations[i])
			write.tsv(curanno[,1:5],file="tmpanno.tmp")
			sc("bedtools intersect -u -a tmpfeatures.tmp -b tmpanno.tmp > tmpintersect.tmp")
			cat("pass3\n")
			if(file.info("tmpintersect.tmp")$size!=0){
				annocounts[i]<-nrow(read.tsv("tmpintersect.tmp"))
			}
			else{
				annocounts[i]<-0
			}
			cat("pass4\n")
		}
		annocounts
	}
	

	
	
	#prepare annotation file
	#allow no results to exist to complete script
	if(annotation.type=="gff"){
		cat("\tConverting gff to pga\n")
		annotation<-make.pga(annotation,intern=TRUE)
	}
	if(annotation.type=="pga"){
	}
	#setup results table
	cat("\n\tPreparing data\n")
	annotations<-unique(annotation$V4)
	numannos<-length(annotations)
	results<-matrix(nrow=length(featurelist),ncol=numannos)
	expresults<-matrix(nrow=length(featurelist),ncol=numannos)
	colnames(results)<-annotations
	row.names(results)<-removeext(featurelist)
	resultsd<-results
	annoave<-results
	
	
	write.tsv(annotation,file="tmpannotation.tmp")
	expmat<-as.data.frame(read.mat(expressionmat)[,1],stringsAsFactors=FALSE)
	expresults<-matrix(ncol=numannos,nrow=nrow(expmat))

	
	
	
	
	
	#intersect each feature with annotations
	cat("\n\tIntersecting features with annotation")
	write.tsv(annotation,file="tmpannos.tmp")
	filler<-as.data.frame(matrix(nrow=nrow(annotation),ncol=length(featurelist)))
	annotation2<-cbind(annotation[,4],annotation[,5],filler)
	colnames(annotation2)[1:2]<-c("type","gene")
	for(f in 1:length(featurelist)){
		
		cat("\n\t\t",featurelist[f])
		features<-read.tsv(featurelist[f])
		if(feature.center==TRUE){
			features$V2<-floor((features$V3+features$V2)/2)
			features$V3<-features$V2+1
		}
		cat("pass\n")
		#get average feature scores for each annotation
		results[f,]<-xsect(features,annotation)
		cat("pass2\n")
		#get average expression for each annotation overlapping with each feature
		sc(paste("bedtools intersect -u -a tmpannotation.tmp -b ",featurelist[f], " > tmpintersect2.tmp",sep=""))
		annotation3<-read.tsv("tmpintersect2.tmp")
		annotation3<-annotation3[,4:5]
		annotation3<-unique(annotation3)
		annotation4<-merge(annotation3,expmat,by.x="V5",by.y="row.names")
		for(i in 1:numannos){
			curanno<-subset(annotation4,annotation4[,2]==annotations[i])
			annoave[f,i]<-mean(curanno[,3],na.rm=TRUE)
		}
	}
	
	#determine gene expression based on gene annotation overlap with features
	sc(paste("bedtools intersect -c -a tmpannos.tmp -b ",featurelist[f], " > tmpintersect3.tmp",sep=""))

	
	#print(head(annotation2,n=50))
	for(i in 1:numannos){
		annoscores<-subset(annotation2,annotation2[,2]==annotations[i])
		write.tsv(annoscores,file="tmpannos2.tmp")
		scoregenes<-unique(annoscores[,3])
		numgenes<-length(scoregenes)
	}
	
	
	cat("\n\t\t Denominator")
	if(length(denominator)>1){
		#denom<-as.vector(read.table(paste(denominator),header=TRUE,row.names=1,stringsAsFactors=FALSE))
		denom<-denominator
		for(i in 1:length(featurelist)){
			resultsd[i,]<-(results[i,]/denom)*1000
		}
	}
	
	
	if(length(denominator)==1){
		features<-read.tsv(denominator)
		if(feature.center==TRUE){
			features$V2<-floor((features$V3+features$V2)/2)
			features$V3<-features$V2+1
		}
		write.tsv(features[,1:3],file="tmpfeatures.tmp")
		denom<-xsect(features,annotation)
		write.table(denom,file="denominator.tsv",sep="\t",quote=FALSE)
		for(i in 1:length(featurelist)){
			resultsd[i,]<-(results[i,]/denom)*1000
		}
	}
	
	
	
	cat("\n")
	write.table(results,file=paste(prefix,"-annodist.txt",sep=""),quote=FALSE,sep="\t")
	
	if(X==FALSE){pdf(file=paste(prefix,"-annodist.pdf",sep=""))}
	par(mfrow=c(3,3))
	barplot(results, beside=TRUE,las=3,col=plotcolors,axis.lty=0,xlab="Region",ylab="# Features")
	legend("topleft",legend=legendnames, fill=plotcolors)

	barplot(resultsd, beside=TRUE,las=3,col=plotcolors,axis.lty=0,xlab="Region",ylab="Features / kb")
	legend("topleft",legend=legendnames, fill=plotcolors)

	barplot(annoave, beside=TRUE,las=3,col=plotcolors,axis.lty=0,xlab="Region",ylab="Expression Level")
	legend("topleft",legend=legendnames, fill=plotcolors)

	
	results<-results[,colnames(results) %in% pieFeatures]
	resultsd<-resultsd[,colnames(resultsd) %in% pieFeatures]
	for(y in 1:nrow(results)){
		pie(results[y,],labels=paste(colnames(results),",",results[y,]),clockwise=TRUE,main=paste("Total",row.names(results)[y]))
	}
	plot(0,0)
	for(y in 1:nrow(resultsd)){
		pie(resultsd[y,],labels=paste(colnames(resultsd),",",resultsd[y,]),clockwise=TRUE,main=paste("Features/kb",row.names(results)[y]))
	}
	if(X==FALSE){dev.off()}
}
vplot.make.old		<- function( fragments,bed,regionsize=1000, suffix="",ylims=c(0,200),hm.max="default",X=TRUE,savepng=TRUE){

	#get object names for file names
	f<-deparse(substitute(fragments))
	b<-deparse(substitute(bed))

	#define distances from annotations to search for features
	leftboundary<--0.5*regionsize
	rightboundary<-0.5*regionsize

	#load necessary libraries
	library(gplots)
	library(compiler)
	
	#define feature-mapping function
	f1 <- function(frags, centers) {
		score_matrix <- matrix(NA_real_, 1, 2)
		d1 <- frags[,1]
		d2 <- frags[,2]
		for (i in seq_along(centers)) {
			score <- d1 - centers[i]
			idx <- score > leftboundary & score <= rightboundary #rows which have relative coords of -500 and 500
			if(length(idx)>0){
				score_matrix<-rbind(score_matrix,cbind(score[idx],d2[idx]))
			}
			setTxtProgressBar(pb, y)
			y=y+1
		}
		
		cat("\n")
		score_matrix
	}
	
	#compile f1
	f1c <- cmpfun(f1)

	
	cat("removing unshared chromosomes\n")
	fragments<-subset(fragments,fragments$V1 %in% bed$V1)
	bed<-subset(bed,bed$V1 %in% fragments$V1)

	
	cat("preprocessing data\n")
	#calculate fragment sizes
	fragments$V4=fragments$V3-fragments$V2
	#calculate center of fragments
	fragments$V2=floor( (fragments$V2+fragments$V3)/2 )
	#calculate center of annotations
	bed$V2=floor( (bed$V2+bed$V3)/2 )

	y=1
	chromosomes<-unique(bed[,1])
	
	cat("searching ",nrow(fragments)," fragments for those within ",regionsize/2," bp of ",nrow(bed)," annotations\n",sep="") 
	pb <- txtProgressBar(min = 0, max = nrow(bed), style = 3)
	for(c in 1:length(chromosomes)){
		#find fragments in chromosome
		chromfrags<-subset(fragments,fragments[,1]==chromosomes[c])
		#trim to relevant data
		chromfrags<-data.matrix(cbind(chromfrags[,2],chromfrags[,4]))
		#find annotations in chromosome
		chromregions<-subset(bed,bed[,1]==chromosomes[c])
		centers<-chromregions[,2]
		#find fragments nearby annotations
		chrommatrix<-f1c(chromfrags,centers)
		#create scorematrix if first chromosome processed
		if(c==1){scorematrix<-chrommatrix[2:nrow(chrommatrix),]}
		#append chromscores to scorematrix if not first chromosome processed
		else{scorematrix<-rbind(scorematrix,chrommatrix[2:nrow(chrommatrix),])}
	}
	close(pb)
	
	#define matrix for heatmap
	m<-matrix(0,ncol=regionsize,nrow=ylims[2])
	
	#remove too-distant fragments and trim regions extending beyond matrix
	scorematrix[,1]=scorematrix[,1]+regionsize/2
	scorematrix<-scorematrix[which(scorematrix[,1]<regionsize),]
	scorematrix<-scorematrix[which(scorematrix[,1]>0),]
	scorematrix<-scorematrix[which(scorematrix[,2]<=ylims[2]),]
	cat(nrow(scorematrix),"/",nrow(fragments)," within size and range of annotations\n")
	
	#fill matrix with data
	for(d in 1:nrow(scorematrix)){
		mrow=scorematrix[d,2]
		mcol=scorematrix[d,1]
		m[mrow,mcol]=m[mrow,mcol]+1
	}
	
	#flip matrix vertically
	m<-m[nrow(m):1,]
	
	#define maximum in heatmap
	if(hm.max=="default"){hm.max=as.numeric(round(quantile(m[m!=0],prob=0.99)))}
	
	#define breaks in heatmap colors
	brks<-seq(0,hm.max,by=hm.max/100)
	
	#define heatmap colors
	greycolors<-grey((brks   [1:(   length(brks) -1)]  )/hm.max)
	
	#define file name based on objects and parameters
	filename<-paste("vplot-",f,"-",b,"-r",regionsize,"-f",ylims[2],"-d",hm.max,suffix,collapse="",sep="")
	
	#save matrix for later replotting
	write.mat(m,file=paste(filename,".mat",sep=""))

	#open png
	if(savepng==TRUE){
		png(file=paste(filename,".png",sep=""),width=1000,height=1000)
	}
	
	#draw heatmap
	heatmap.2(m,Colv=NA,Rowv=NA,dendrogram="none",trace="none",breaks=seq(0,hm.max,by=hm.max/100),labRow=NA,labCol=NA,col=greycolors)
	
	#close png
	if(savepng==TRUE){
		dev.off()
	}

	if(X==TRUE){
		#draw heatmap
		heatmap.2(m,Colv=NA,Rowv=NA,dendrogram="none",trace="none",breaks=seq(0,hm.max,by=hm.max/100),labRow=NA,labCol=NA,col=greycolors)
	}
}
vplot.make.old2		<- function( fragments,bed,regionsize=1000, suffix="",ylims=c(0,200),hm.max="default"){
	leftboundary<--0.5*regionsize-2-ylims[2]/2
	rightboundary<-0.5*regionsize+2+ylims[2]/2
	library(gplots)
	library(compiler)
	f1 <- function(frags, centers) {
		score_matrix <- matrix(NA_real_, 1, 2)
		d1 <- frags[,1]
		d2 <- frags[,2]
		
		for (i in seq_along(centers)) {
			score <- d1 - centers[i]
			idx <- score > leftboundary & score <= rightboundary
			if(length(idx)>0){
				score_matrix<-rbind(score_matrix,cbind(score[idx],d2[idx]))
			}
			setTxtProgressBar(pb, i)
		}
		cat("\n")
		score_matrix
	}
	f1c <- cmpfun(f1)

	cat("removing unshared chromosomes\n")
	fragments<-subset(fragments,fragments$V1 %in% bed$V1)
	bed<-subset(bed,bed$V1 %in% fragments$V1)

	#create bed matrix with relevant data
	fragments$V4=fragments$V3-fragments$V2
	fragments$V2=floor( (fragments$V2+fragments$V3)/2 )
	bed$V2=floor( (bed$V2+bed$V3)/2 )

	cat("#regions=",nrow(bed),"\n")
	chromosomes<-unique(bed[,1])
	for(c in 1:length(chromosomes)){
		cat("chromosome",chromosomes[c],"\n")
		chromfrags<-subset(fragments,fragments[,1]==chromosomes[c])
		chromfrags<-data.matrix(cbind(chromfrags[,2],chromfrags[,4]))
		chromregions<-subset(bed,bed[,1]==chromosomes[c])
		centers<-chromregions[,2]
		pb <- txtProgressBar(min = 0, max = length(centers), style = 3)
		chrommatrix<-f1c(chromfrags,centers)
		if(c==1){scorematrix<-chrommatrix[2:nrow(chrommatrix),];cat("created scorematrix\n")}
		else{scorematrix<-rbind(scorematrix,chrommatrix[2:nrow(chrommatrix),])}
		cat("appended scorematrix\n")
	}
	
	
	
	
	#shift coordinates so that they are all positive for matrix
	scorematrix[,1]=scorematrix[,1]+regionsize/2
	scorematrix<-scorematrix[which(scorematrix[,2]<=ylims[2]),]
	
	#define left and right boundary distance for each fragment
	flanks<-round(scorematrix[,2]/2)
	
	#add fragment left and right boundaries
	scorematrix<-cbind(scorematrix,scorematrix[,1]-flanks,scorematrix[,1]+flanks)
	
	#remove fragments beyond heatmap boundaries
	scorematrix<-scorematrix[which(scorematrix[,3]<regionsize),]
	scorematrix<-scorematrix[which(scorematrix[,4]>0),]
	
	#truncate fragments extending past heatmap boundaries
	scorematrix[,3][scorematrix[,3]<0]=0
	scorematrix[,4][scorematrix[,4]>regionsize]=regionsize
	
	
	#print(nrow(scorematrix))
	#plot(density(scorematrix[,2],na.rm=TRUE))
	#X11()
		#plot(scorematrix[,1],scorematrix[,2],pch=20,col=rgb(0,0,0,5,maxColorValue=255))
		#X11()

	
	write.tsv(scorematrix,file="scoremat.tsv")
	for(d in 1:nrow(scorematrix)){
		mrow=scorematrix[d,2]
		mcol=scorematrix[d,1]
		m[mrow,(scorematrix[d,3]:scorematrix[d,4])]=m[mrow,(scorematrix[d,3]:scorematrix[d,4])]+1
	}
	
	#flip matrix vertically
	m<-m[nrow(m):1,]	

	
	if(hm.max=="default"){hm.max=as.numeric(round(quantile(m[m!=0],prob=0.99)))}
	brks<-seq(0,hm.max,by=hm.max/100)
	greycolors<-grey((brks   [1:(   length(brks) -1)]  )/hm.max)

	
	
	filename<-paste("vplot-",paste(deparse(substitute(fragments))),"-",paste(deparse(substitute(bed))),"-r",regionsize,"-f",ylims[2],"-d",hm.max,suffix,".mat",collapse="",sep="")
	write.mat(m,file=paste(filename,".mat",sep=""))
	if(png==TRUE){
		png(file=paste(filename,".png",sep=""))
	}
	heatmap.2(m,Colv=NA,Rowv=NA,dendrogram="none",trace="none",breaks=seq(0,hm.max,by=hm.max/100),labRow=NA,labCol=NA,col=greycolors)
	if(png==TRUE){
		dev.off()
	}
}
get.file.names		<- function( paths){
	for(i in 1:length(paths)){
		namevector<-unlist(strsplit(paths[i],"/"))
		paths[i]<-namevector[length(namevector)]
	}
	paths
}
get.file.extensions	<- function( filenames){
	for(i in 1:length(filenames)){
		namevector<-unlist(strsplit(filenames[i],"\\."))
		filenames[i]<-namevector[length(namevector)]
	}
	filenames
}
# #####################################
scrapcode		<- function( ){
#if(mincovwin>0){
#				cat("finding poorly-covered features\n")
#				goodcovrows<-which(unlist(lapply(1:nrow(maskmat),function(x) length(which(maskmat[x,] > 0))))>mincovwin)
#				maskmat<-maskmat[goodcovrows,]
#				goodcovgenefile<-paste(basename(removeext(beds[i])),"_",basename(removeext(maskbed)),"_gc",mincovwin,"-",numwindows,".bed",sep="")
#				cat("saving well-covered regions in",goodcovgenefile,"\n")
#				write.tsv(bed[goodcovrows,],file=goodcovgenefile)
#
}
repliseqwindowsize	<- function( early , late , scalar="auto" , genome="hg19" , stepsize=1000 , windowsizes = 1000 * 2 ^(0:10) , cores="max"){
	
	library(parallel)
	if(cores=="max"){cores=detectCores()-1}
	
	numwins<-length(windowsizes)
	rb<-rainbow(numwins)
	
	cat("windowing genome\n")
	windowfiles<-unlist(mclapply(1:numwins,function(x) bed.makewindows("hg19",genome=TRUE,windowsize=windowsizes[x],stepsize=stepsize,outname=paste(early,windowsizes[x],"windows",sep="")) , mc.cores=cores ))
	
	cat("counting windows\n")
	windowcounts<-unlist(mclapply(windowfiles,filelines,mc.cores=cores))
	print(windowcounts)
	
	# SORT BEDS!
	
# 	cat("calculating genome coverage\n")
# 	gcovs<-unlist(mclapply(c(early,late),bed.genomecov , genome=genome , scalar=scalar , makebigwig=FALSE , mc.cores=2 ))
# 	
# 	cat("calculating windowed coverage for early\n")
# 	ecovs<-unlist(mclapply(1:numwins,function(x) bg.window(gcovs[1], windowbed=windowfiles[x] , windowsize=windowsizes[x] , stepsize=stepsize , genome=genome , operation="mean") , mc.cores=cores ))
# 	lcovs<-unlist(mclapply(1:numwins,function(x) bg.window(gcovs[2], windowbed=windowfiles[x] , windowsize=windowsizes[x] , stepsize=stepsize , genome=genome , operation="mean") , mc.cores=cores ))
# 	
# 	cat("loading data\n")
# 	covs<-mclapply(ecovs,read.tsv,mc.cores=cores)
# 	
# 	covs<-mclapply(1:numwins,function(x) { 
# 		covs[[x]]$V5<-as.numeric(readLines(pipe(paste("cut -f 4",lcovs[x]))))
# 		covs[[x]]$V6<-log2(covs[[x]][,4]/covs[[x]][,5])
# 		covs[[x]]$V6[is.infinite(covs[[x]][,6])]<-NA
# 		covs[[x]]
# 	} , mc.cores=cores )
	
	cat("calculating windowed coverage for early\n")
	ecovs<-unlist(mclapply(1:numwins,function(x) bed.windowcov(early, windowbed=windowfiles[x] , windowsize=windowsizes[x] , stepsize=stepsize , genome=genome , scalar=scalar) , mc.cores=cores ))
	
	cat("calculating windowed coverage for late\n")
	lcovs<-unlist(mclapply(1:numwins,function(x) bed.windowcov(late, windowbed=windowfiles[x] , windowsize=windowsizes[x] , stepsize=stepsize , genome=genome , scalar=scalar) , mc.cores=cores ))
	
	ecovs<-gsub("bw","bg",ecovs)
	lcovs<-gsub("bw","bg",lcovs)
	
	cat("loading coverages\n")
	cat("loading data\n")
	covs<-mclapply(ecovs,read.tsv,mc.cores=cores)
	
	covs<-mclapply(1:numwins,function(x) { 
		covs[[x]]$V5<-as.numeric(readLines(pipe(paste("cut -f 4",lcovs[x]))))
		covs[[x]]$V6<-log2(covs[[x]][,4]/covs[[x]][,5])
		covs[[x]]$V6[is.infinite(covs[[x]][,6])]<-NA
		covs[[x]]
	} , mc.cores=cores )
	
	
	cat("counting and plotting zeroes\n")
	earlyzeroes<-(mclapply(1:numwins,function(x) which(covs[[x]][,4] == 0 ), mc.cores=cores ))
	numearlyzeroes<-unlist(lapply(earlyzeroes,length))
	
	latezeroes<-(mclapply(1:numwins,function(x) which(covs[[x]][,5] == 0 ), mc.cores=cores ))
	numlatezeroes<-unlist(lapply(latezeroes,length))
	
	bothzeroes<-(mclapply(1:numwins,function(x) which(covs[[x]][,4] == 0 & covs[[x]][,5] == 0) , mc.cores=cores ))
	numbothzeroes<-unlist(lapply(bothzeroes,length))
	
	pdf(file=paste(basename(removeext(early)),"_noscores-vs-windowsize.pdf",sep=""))
	plot(0,type="n",xlab="windowsize (bp)",ylab="% windows with no reads",xlim=c(0,128000) , ylim=c(0,50) )
	abline(v=windowsizes,col="grey70")
	points(windowsizes,100*numearlyzeroes/windowcounts,col="blue",lwd=3)
	lines(windowsizes,100*numearlyzeroes/windowcounts,col="blue",lwd=3)
	lines(windowsizes,100*numlatezeroes/windowcounts,col="red",lwd=3)
	points(windowsizes,100*numlatezeroes/windowcounts,col="red",lwd=3)
	lines(windowsizes,100*numbothzeroes/windowcounts,lwd=3)
	points(windowsizes,100*numbothzeroes/windowcounts,lwd=3)
	legend("topright",legend=c("early","late","both"),col=c("blue","red","black"),lwd=3)
	
	
	cat("calculating score distributions and plotting\n")
	equants<-unlist(mclapply(1:numwins,function(x) quantile(coverages[[x]][,4],probs=0.95,na.rm=TRUE) , mc.cores=cores ))
	equants<-1000*equants/windowsizes
	
	lquants<-unlist(mclapply(1:numwins,function(x) quantile(coverages[[x]][,5],probs=0.95,na.rm=TRUE) , mc.cores=cores ))
	lquants<-1000*lquants/windowsizes
	
	maxscore<-max(c(equants,lquants))
	
	earlydensities<-mclapply(1:numwins,function(x) density(1000*coverages[[x]][,4]/windowsizes[x],na.rm=TRUE,from=0,to=maxscore) , mc.cores=cores)
	latedensities<-mclapply(1:numwins,function(x) density(1000*coverages[[x]][,5]/windowsizes[x],na.rm=TRUE,from=0,to=maxscore) , mc.cores=cores)
	ratiodensities<-mclapply(1:numwins,function(x) density(coverages[[x]][,6],na.rm=TRUE,from=-8,to=8) , mc.cores=cores)
	
	
	plot(0,type="n",xlab="RPM",ylab="frequency of windows (kernel density estimate)",main="read density histograms for early fraction",xlim=c(0,quantile(unlist(lapply(earlydensities,"[","x")),probs=0.9)) , ylim=c(0,max(unlist(lapply(earlydensities,"[","y")))) )
	for(i in 1:numwins){
		lines(earlydensities[[i]][["x"]],earlydensities[[i]][["y"]],lwd=3,col=rb[i])
	}
	legend("topright",legend=paste(windowsizes,"bp windows") , col=rb , lwd=3, lty=1)
	
	
	plot(0,type="n",xlab="RPM",ylab="frequency of windows (kernel density estimate)",main="read density histograms for late fraction",xlim=c(0,quantile(unlist(lapply(earlydensities,"[","x")),probs=0.9)) , ylim=c(0,max(unlist(lapply(earlydensities,"[","y")))) )
	for(i in 1:numwins){
		lines(latedensities[[i]][["x"]],latedensities[[i]][["y"]],lwd=3,col=rb[i])
	}
	legend("topright",legend=paste(windowsizes,"bp windows") , col=rb , lwd=3, lty=1)
	
	
	plot(0,type="n",xlab="RT score , log2(early/late)",ylab="frequency of windows (kernel density estimate)",main="RT score histograms",xlim=c(-8,8) , ylim=c(0,max(unlist(lapply(ratiodensities,"[","y")))) )
	for(i in 1:numwins){
		lines(ratiodensities[[i]][["x"]],ratiodensities[[i]][["y"]],lwd=3,col=rb[i])
	}
	legend("topright",legend=paste(windowsizes,"bp windows") , col=rb , lwd=3, lty=1 , cex=0.75)
	
	latewherenoearly<-mclapply(1:numwins,function(x){
		coverages[[x]][,5][which(coverages[[x]][,4]==0)]
	},mc.cores=cores)
	
	earlywherenolate<-mclapply(1:numwins,function(x){
		coverages[[x]][,4][which(coverages[[x]][,5]==0)]
	},mc.cores=cores)
	dev.off()
	
	
}
	
dvera/travis documentation built on June 5, 2019, 5:12 a.m.