R/GenometriCorrResult.R

Defines functions .rainbooo .plot_colored_hist

# GenometriCorrelation project evaluating two interval markups genomewide independence. 
# (c) 2010-2020 Alexander Favorov, Loris Mularoni, 
#               Yulia Medvedeva, Harris A. Jaffee, 
#               Ekaterina V. Zhuravleva, Veronica Busa,
#               Leslie M. Cope, Andrey A. Mironov, 
#               Vsevolod J. Makeev, Sarah J. Wheelan.
#
# result - result of GenometriCorrelation function - works with ini file format  

#if (!require('methods')) stop('GenometriCorrResult requires methods package!\n')
#if (!require('graphics')) stop('GenometriCorrResult requires graphics package!\n')

#'@importFrom grDevices as.raster rainbow
#'@importFrom grDevices colorRamp dev.off dev.size pdf rgb 
#'@importFrom graphics axis hist layout lines mtext par plot plot.new rasterImage text xinch 

#'@export
setClass('GenometriCorrResult',contains='namedList',representation(config="GenometriCorrConfig"))

#'@export
setMethod('show','GenometriCorrResult',function(object)
	{
		if (length(object) == 0) {
			cat("It is an empty GenometriCorrResult.")
			return()
		}
		namelist<-names(object[[1]])
		#possible operations with namelist here
		do_not_show<-
			c(
				'relative.distances.data',
				'absolute.min.distance.data',
				'absolute.inter.reference.distance.data',
				'relative.distances.ecdf.deviation.area.null.list',
				'scaled.absolute.min.distance.sum.null.list',
				'jaccard.measure.null.list',
				'jaccard.intersection.null.list',
				'reference.middles',
				'projection.test')
				#scaled.absolute.min.distance.sum
		#we remove the do_not_show list from namelist
		namelist<-setdiff(namelist,do_not_show)
		print(sapply(object,function(x){
		x[namelist]
		}))
	})


#setGeneric('plot', useAsDefault=function(x,y,...) graphics::plot(x,y,...))
setGeneric('graphical.report',function(x,pdffile='',show.all=TRUE,show.chromosomes=c(), trustname=TRUE, make.new=TRUE)
                                standardGeneric('graphical.report'))


#'@export
setMethod('graphical.report',
	signature(x='GenometriCorrResult'),
	function(x, pdffile='',show.all=FALSE,show.chromosomes=c(),trustname=TRUE,make.new=TRUE)
	# if show.all == TRUE, show all of them
	# if show.all == FALSE, show only awhole+show.chromosomes; if it leads to showing nothing, show.all turns back to TRUE
	{
		if (!is.null(x@config$options$awhole.space.name))
			awhole.space.name=x@config$options$awhole.space.name
		else
			awhole.space.name='awhole'


		if (is.null(x[[awhole.space.name]]))
			awhole.space.name.list=c()
		else
			awhole.space.name.list=c(awhole.space.name)	

		if(!show.all)
		{
			to.show<-union(show.chromosomes,awhole.space.name.list)
			if (length(to.show)==0) show.all=TRUE
		}

		if (show.all)
			to.show<-names(x)	

		chromosomes<-setdiff(names(x),awhole.space.name.list)

		show.awhole<-(length(awhole.space.name.list)>0)
		#we believe that both reference and exist, so we do not check it. 
		if (pdffile=='') 
		{
			if (!is.null(x@config$data$query) && length(grep("[a-zA-Z0-9]",x@config$data$query))>0)
			{
				if (length(strsplit(basename(x@config$data$query),'\\.')[[1]])>1)
					qname<-paste(head(strsplit(basename(x@config$data$query),'\\.')[[1]],-1),sep='.')
				else
					qname<-basename(x@config$data$query)
			}
			else qname<-'unknown'

			if (!is.null(x@config$data$reference) && length(grep("[a-zA-Z0-9]",x@config$data$reference))>0)
			{
				if (length(strsplit(basename(x@config$data$reference),'\\.')[[1]])>1)
					rname<-paste(head(strsplit(basename(x@config$data$reference),'\\.')[[1]],-1),sep='.')
				else
					rname<-basename(x@config$data$reference)
			}
			else rname<-'unknown'
			pdffile<-paste(qname,'_to_',rname,'_plot.pdf',sep='')
		}
		else if (trustname==FALSE)
		{
			if (!(length(strsplit(pdffile,'\\.')[[1]]) > 1 && tail(strsplit(pdffile,'\\.')[[1]],1) == 'pdf'))
				pdffile<-paste(pdffile,'.pdf',sep='')
		}
		


		width<-if(! is.null(x@config$options$keep.distributions) && x@config$options$keep.distributions) 12 else 4
		if (make.new == TRUE)
		{
			pdf(file=pdffile, width=width, height=4, paper="special")
		}
		for (name in to.show)
		{
			if (! is.null(x@config$options$keep.distributions) && x@config$options$keep.distributions)
				layout(matrix(c(1,2,3), 1, 3, byrow = TRUE), heights=c(1,1,1))
			plot.new()
			data <- x[[name]]#[[1]]
			if (name == "awhole")
				usename <- "All Chromosomes"
			else
				usename <- name
			formstring_s<-"%s"
			formstring_g<-"%g"
			mtext(usename, line=-1, cex=1.5)
			mtext(paste("Query population :", data$query.population, sep=" "), line=-4, cex=0.7)
			mtext(paste("Reference population :", data$reference.population, sep=" "), line=-5, cex=0.7)
			mtext(paste("Relative Ks p-value :", sprintf(formstring_g,data$relative.distances.ks.p.value), sep=" "), line=-6, cex=0.7)
			mtext(paste("Relative ecdf deviation area :", sprintf(formstring_g,data$relative.distances.ecdf.deviation.area), sep=" "), line=-7, cex=0.7)
			mtext(paste("Relative ecdf area correlation :", sprintf(formstring_g,data$relative.distances.ecdf.area.correlation), sep=" "), line=-8, cex=0.7)
			if('relative.distances.ecdf.deviation.area.p.value' %in% names(data))
				mtext(paste("Relative ecdf deviation area p-value :", sprintf(formstring_s,data$relative.distances.ecdf.deviation.area.p.value), sep=" "), line=-9, cex=0.7)
			if('scaled.absolute.min.distance.sum.p.value' %in% names(data))
				mtext(paste("Scaled Absolute min. distance p-value :", sprintf(formstring_s,data$scaled.absolute.min.distance.sum.p.value), sep=" "), line=-10, cex=0.7)
				mtext(paste("Scaled Absolute min. direction :", data$scaled.absolute.min.distance.sum.direction, sep=" "), line=-11, cex=0.7)
			if('jaccard.measure.p.value'%in% names(data))
				mtext(paste("Jaccard Measure p-value :", sprintf(formstring_s,data$jaccard.measure.p.value), sep=" "), line=-12, cex=0.7)
			if('jaccard.measure.test.direction'%in% names(data))
				mtext(paste("Jaccard Measure test direction :", data$jaccard.measure.test.direction, sep=" "), line=-13, cex=0.7)
			if('projection.test.p.value'%in% names(data))
				mtext(paste("Projection test p-value :", sprintf(formstring_g,data$projection.test.p.value), sep=" "), line=-14, cex=0.7)
			if('projection.test.direction'%in% names(data))
				mtext(paste("Projection test direction :", data$projection.test.direction, sep=" "), line=-15, cex=0.7)
			if('projection.test.obs.to.exp'%in% names(data))
				mtext(paste("Projection test observed to expected ratio :", sprintf(formstring_g,data$projection.test.obs.to.exp), sep=" "), line=-16, cex=0.7)

			if (! is.null(x@config$options$keep.distributions) && x@config$options$keep.distributions)
			{
				twotimes<-function(x){2*x}
				plot(ecdf(data$absolute.min.distance.data),col="black",lty='solid',lwd=2, main="Absolute distances", xlab="Distance (bp)", ylab="Cumulative fraction")
				#plot(ecdf(data$absolute.inter.reference.distance.data), col="grey",lty='solid',lwd=2, main="Absolute distances", xlab="Distance (bp)", ylab="Cumulative fraction")
				#plot abs.data? no sense :)
				#create the null-hypothesis cdf function for absolute.min.distance.data
				cdf.x<-sort(data$absolute.inter.reference.distance.data)/2
				cdf.y<-cdf.x #to init
				L.half<-sum(cdf.x) #just to remember it is half-sum 
				for(i in 1:length(cdf.x)) #it is sorted, a=x_i=r/2; after the loop, we will 2/L
					cdf.y[i]<-sum(cdf.x[1:i])+ # x is r/2 and sum is L/2
						(length(cdf.x)-i)*cdf.x[i]
				cdf.y<-cdf.y/L.half
				lines(cdf.x,cdf.y,col="blue") #plot expetation with blue
				#ok. start plot 2 with expetation
				plot(twotimes, xlim=c(0,0.5), col="blue", main="Relative distances", xlab="Fractional distance", ylab="Cumulative fraction")
				lines(ecdf(data$relative.distances.data))
			}
		}
		if (make.new == TRUE)
		{
			dev.off()
		}
	})



setGeneric('visualize',function(x,pdffile='',show.all=TRUE,show.chromosomes=c(), trustname=TRUE, make.new=TRUE, style="blue-white-red")
                                standardGeneric('visualize'))

#'@export
setMethod('visualize',
	signature(x='GenometriCorrResult'),
	function(x, pdffile='',show.all=FALSE,show.chromosomes=c(), trustname=TRUE, make.new=TRUE, style="blue-white-red")
	{
		# gplots is imported now
		#if (!require('gplots')) stop('GenometriCorr visualize requires gplots package\n')
		if (is.null(x@config$options$keep.distributions)) return()

		if (!is.null(x@config$options$awhole.space.name))
			awhole.space.name=x@config$options$awhole.space.name
		else
			awhole.space.name='awhole'

		if (is.null(x[[awhole.space.name]]))
			awhole.space.name.list=c()
		else
			awhole.space.name.list=c(awhole.space.name)	

		if(!show.all)
		{
			to.show<-union(show.chromosomes,awhole.space.name)
			if (length(to.show)==0) show.all=TRUE
		}

		if (show.all)
			to.show<-names(x)

		chromosomes<-setdiff(names(x),awhole.space.name.list)

		show.awhole<-(length(awhole.space.name.list)>0)
		alldist <- c() # maybe, we wiil never need it
		alldatdist <- c()

		if(show.all == TRUE)
		{
			for (chr in chromosomes)
			{
				data <- x[[chr]]#[[1]]
				alldist <- c(alldist, diff(data$reference.middles))
				alldatdist <- c(alldatdist, data$absolute.min.distance.data)
			}
			alldist <- as.numeric(alldist)
			alldatdist <- as.numeric(alldatdist)
		}
		if (pdffile=='') 
		{
			if (!is.null(x@config$data$query))
			{
				if (length(strsplit(basename(x@config$data$query),'\\.')[[1]])>1)
					qname<-paste(head(strsplit(basename(x@config$data$query),'\\.')[[1]],-1),sep='.')
				else
					qname<-basename(x@config$data$query)
			}
			else qname<-'unknown'

			if (!is.null(x@config$data$reference))
			{
				if (length(strsplit(basename(x@config$data$reference),'\\.')[[1]])>1)
					rname<-paste(head(strsplit(basename(x@config$data$reference),'\\.')[[1]],-1),sep='.')
				else
					rname<-basename(x@config$data$reference)
			}
			else rname<-'unknown'
			pdffile<-paste(qname,'_to_',rname,'_visualize.pdf',sep='')
		}
		else if (trustname==FALSE)
		{
			if (!(length(strsplit(pdffile,'\\.')[[1]]) > 1 && tail(strsplit(pdffile,'\\.')[[1]],1) == 'pdf'))
				pdffile<-paste(pdffile,'.pdf',sep='')
		}

		if (make.new == TRUE)
		{
			pdf(file=pdffile, width=10, height=19, paper="special")
			mymat <- matrix(ncol=2, nrow=8)
			mymat[1,1] <- 2
			mymat[1,2] <- 2
			mymat[2,1] <- 3
			mymat[2,2] <- 3
			mymat[3,1] <- 1
			mymat[3,2] <- 1
			mymat[4,1] <- 4
			mymat[4,2] <- 5
			mymat[5,1] <- 6
			mymat[5,2] <- 6
			mymat[6,1] <- 7
			mymat[6,2] <- 8 
			mymat[7,1] <- 9 
			mymat[7,2] <- 9 
			mymat[8,1] <- 10
			mymat[8,2] <- 11
				
			layout(mymat, heights=c(0.05,0.05,0.15,0.15,0.15,0.15,0.15,0.15))
		}
		nbreaks <- 50
		colorinterpolation <- 10
		layoutresults <- 3
		i <- 0
		for (name in to.show)
		{
			maxdist <- 0
			i <- i+1
			data <- x[[name]]#[[1]]
			plot(1,1, type="n", axes=F, xlab="", ylab="")
			par(mar=c(1,1,1,1))
			if (name == "awhole")
			{
				name <- "All chromosomes"
			}
			mtext(paste("Results: ", name, sep=""), line=-2, cex=1.0, side=3)
			mtext(paste("Overlap summary (Jaccard and projection tests)"), line=-4, cex=0.8, side=3)
			mtext(paste("Jaccard p-value: ", data$jaccard.measure.p.value, sep=""), line=-6, cex=0.8, side=3)
			if (!is.integer(data$jaccard.measure.p.value))
			{
				if (data$jaccard.measure.test.direction == 'attraction')
				{
					mtext(paste("Query and reference intervals overlap significantly more than expected by chance, by Jaccard"), line=-8, cex=0.8, side=3)
				}
				else
				{
					mtext(paste("Query and reference intervals overlap significantly less than expected by chance, by Jaccard"), line=-8, cex=0.8, side=3)
				}
			}
			if (!is.integer(data$projection.test.p.value))
			{
				if (data$projection.test.direction == 'attraction')
				{
					mtext(paste("Query midpoints and reference intervals overlap significantly more than expected by chance, by projection"), line=-10, cex=0.8, side=3)
				}
				else
				{
					mtext(paste("Query midpoints and reference intervals overlap significantly less than expected by chance, by projection"), line=-10, cex=0.8, side=3)
				}
			}

			if (!is.integer(data$relative.distances.ecdf.area.correlation))
			{
				goodness_of_corr <- 0
			} else
			{
				goodness_of_corr <- data$relative.distances.ecdf.area.correlation
			}
			data_rel_exp <- seq(0, 50, by=50/nbreaks)/100
			data$relative.distances.data <- data$relative.distances.data[data$relative.distances.data>=0]
			hist_to_plot <- hist(data$relative.distances.data, breaks=data_rel_exp, plot=F)
			hist_exp <- hist(data_rel_exp, breaks=hist_to_plot$breaks, plot=F)
			plottitle <- paste("Relative distance from query to reference, pvalue= ", data$relative.distances.ecdf.deviation.area.p.value, sep="")
			.plot_colored_hist(goodness_of_corr, hist_to_plot$density, log2(hist_to_plot$density/hist_exp$density), nbreaks, colorinterpolation, plottitle, 0.5, printlegend = switch((i%%layoutresults)+1, 0, 1, 0, 0),style=style)
			if (!is.integer(data$scaled.absolute.min.distance.sum.p.value))
			{
				goodness_of_corr = 0
			} else
			{
				goodness_of_corr <- as.numeric(data$scaled.absolute.min.distance.sum.p.value)
			}
#for absolute distances the data are really different than for relative distances
#need to consider both the real interval lengths and the actual distances
			if (maxdist==0)
			{
				maxdist <- min(quantile(data$absolute.min.distance.data)[[4]], quantile(data$absolute.inter.reference.distance.data)[[4]])
				maxdist <- min(maxdist, min(quantile(data$absolute.min.distance.data)[[3]], quantile(data$absolute.inter.reference.distance.data)[[3]]))
			}
                        if (maxdist<nbreaks)
                        {
                                maxdist <- max(nbreaks,max(quantile(data$absolute.min.distance.data)[[4]], quantile(data$absolute.inter.reference.distance.data)[[4]]))
                        }
			dist_to_consider <- data$absolute.inter.reference.distance.data[data$absolute.inter.reference.distance.data<=maxdist]
			exp_dist_to_consider <- data$absolute.min.distance.data[data$absolute.min.distance.data<=maxdist]
			exp_dist_to_consider <- exp_dist_to_consider[exp_dist_to_consider >= 0]
			abs_breaks <- seq.int(from=0, to=maxdist+maxdist/nbreaks-1, by=maxdist/nbreaks)
			if (length(dist_to_consider) == 0 || length(exp_dist_to_consider) == 0)
			{
				plot(1, 1, type="n", axes=F, xlab="", ylab="")
				par(mar=c(1,1,1,1))
				mtext("Insufficient data", line=-3)
				next
			}
			hist_obs <- hist(exp_dist_to_consider, breaks=abs_breaks, plot=F)
			hist_exp <- hist(dist_to_consider, breaks=hist_obs$breaks, plot=F)
			abs_breaks <- hist_obs$breaks
			hist_exp$counts[hist_exp$counts<=0] <- min(hist_exp$counts[hist_exp$counts>0])
			hist_exp$counts <- hist_exp$counts*(sum(hist_obs$counts)/sum(hist_exp$counts))  #scale expected to same # observations
			plottitle <- paste("Absolute distance from query to reference, pvalue= ", data$scaled.absolute.min.distance.sum.p.value, sep="")
			.plot_colored_hist(goodness_of_corr, hist_obs$density, log2(hist_obs$counts/hist_exp$counts), length(abs_breaks), colorinterpolation, plottitle, maxdist, style=style)
		}
		if (make.new == TRUE)
		{
			dev.off()
		}
	})


.plot_colored_hist <- function(goodness_of_corr, densities, obs_exp_rats, nbreaks, colorinterpolation=1, plottitle, extremexlim, printlegend=0, style="blue-white-red")
{
	obs_exp_rats[!(is.finite(obs_exp_rats))] <- 0
	nbreaks=length(obs_exp_rats)
##first scale the correlation by the range of difference in strength of signal
        span_graph <- max(median(obs_exp_rats)-min(obs_exp_rats), max(obs_exp_rats)-median(obs_exp_rats))
        scale_factor_line <- 4/span_graph
        scale_factor <- 1-goodness_of_corr
	if (printlegend > 0)
	{
		par(mar=c(1,1,5,1))
		posscols_up <- .rainbooo(1000, start=0.05, end=0.18,style=style)
		posscols_down <- .rainbooo(1000, start=0.18, end=0.6,style=style)
		allposscols <- c(posscols_up, posscols_down)
		allposscols <- allposscols[length(allposscols):1]
		hist(seq(2:1999), breaks=(seq(1:2002)-0.5), col=allposscols, border=allposscols, axes=F, labels=F, main="Color key\n  <- blue is negative correlation, -> red is positive correlation", xlab="", ylab="", cex.main=1.2)
		par(mar=c(1,1,1,1))
		plot(1,1, type="n", axes=F, xlab="", ylab="")
		mtext(paste("Overlay line on graph is data density, over ", nbreaks, "  bins\nThis range of densities is real though does not on its own convey significance\nThe p-value signals whether the trends are statistically significant.", sep=""), line=-3, cex=1.0, side=3)
	}
        ranges_to_plot <- obs_exp_rats*scale_factor_line
        end <- 0.18+scale_factor*0.42
	#if (end > 1)
	#{
	#	end = 1
	#}
	if (end > .6)
	{
		end = .6
	} # consistent with posscols
	colorscheme_down <- .rainbooo(nbreaks, start=0.18, end=end,style=style) ##scaling by correlation strength
	ranges_to_plot_up <- ranges_to_plot[ranges_to_plot>=0]
	start=0.18-scale_factor*0.13
	#if (start < 0)
	#{
	#	start = 0.05
	#}
	if (start < 0.05)
	{
		start = 0.05
	}
	colorscheme_up <- .rainbooo(nbreaks, start=start, end=0.18,style=style) ##scaling by correlation strength
	##reverse the positive correlation colorscheme
	colorscheme_up <- colorscheme_up[nbreaks:1]
	ranges_to_plot_down <- ranges_to_plot[ranges_to_plot<0]
	thesecols_up <- 0
	thesecols_down <- 0
        if (length(ranges_to_plot_up) > 0)
        {
                rr <- max(ranges_to_plot_up)-min(ranges_to_plot_up)
                thesecols_up <- ((ranges_to_plot_up-min(ranges_to_plot_up))/rr)*(nbreaks-1)
                thesecols_up <- as.integer(thesecols_up+1)
        }
        if (length(ranges_to_plot_down) > 0)
        {
                rr <- max(ranges_to_plot_down)-min(ranges_to_plot_down)
                thesecols_down <- ((abs(ranges_to_plot_down)-min(abs(ranges_to_plot_down)))/rr)*(nbreaks-1)
                thesecols_down <- as.integer(thesecols_down+1)
        }
	mybreaks <- seq(1:(nbreaks+1))
	mybreaks <- mybreaks-0.5
	allcols <- vector(length=length(obs_exp_rats))
	allcols[ranges_to_plot<0] <- colorscheme_down[thesecols_down]
	allcols[ranges_to_plot>=0] <- colorscheme_up[thesecols_up]
	if (colorinterpolation > 0)
	{
		newcols <- vector(length=colorinterpolation*(length(allcols)-1))
		for (v in 1:(length(allcols)-1))
		{
			newcols[(v-1)*colorinterpolation + 1] = allcols[v]
			ramp <- gplots::colorpanel(colorinterpolation, allcols[v], allcols[v+1])
			for (j in 1:colorinterpolation)
			{
				newcols[(v-1)*colorinterpolation + 1 + j] = ramp[j]
			}
		}
		allcols <- newcols
		newbreaks <- vector(length=(length(mybreaks)-1)*colorinterpolation)
		newbreaks <- seq(1:(length(newbreaks)))
		mybreaks <- newbreaks-0.5
	}
	par(mar=c(3,3,1,1))
	hist(seq(1:(length(allcols)-1)), breaks=mybreaks[mybreaks<=(length(allcols))], col=allcols, border=allcols, axes=FALSE, labels=FALSE, main=plottitle, xlab="", ylab="", cex.main=1.2)
	par(new=TRUE)
	par(mar=c(3,3,1,1))
	uprange <- max(densities)/sum(densities)
	plot(seq(1:length(densities)), densities/sum(densities), ylim = c(0-uprange/10,uprange+uprange/10), type="l", xaxt="n", yaxt="n", ylab="", xlab="")
	xmax = length(obs_exp_rats)-1
	xlabs <- c(0, extremexlim/5, extremexlim*2/5, extremexlim*3/5, extremexlim*4/5, extremexlim)
	xlabs <- round(xlabs, 3)
	atlabs <- c(0, xmax/5, xmax*2/5, xmax*3/5, xmax*4/5, xmax)
	atlabs <- round(atlabs, 3)
	axis(1, labels=xlabs, at=atlabs)
	topdensity <- round(max(densities)/sum(densities), 3)
        topdensity_to_print <- topdensity*100
	tdlab <- paste(topdensity_to_print, "%", sep="")
	axis(2, labels=c(0,tdlab), at=c(0,topdensity))
}

.rainbooo<-function(n, start, end,  style="blue-white-red")
{
		
		#cat('.rainbbbooo    ',n,style,start,end,"\n")
		if(style=="rainbow")
		{
			return(rainbow(n=n,start=start,end=end))
		}
		if (style!="blue-white-red")
			warning("Unknown color style; use dafault blue-white-red")


		# remap any...0.18 to red (0.05->0) -> white(0.18->1)
		# remap 0.18..any to white(0.18->0) -> blue(0.6->1) 
		#3-color ramp code
		#ramp <- colorRamp(c('blue',"white",'red'))
		#frCol = rgb( ramp(seq(0, 1, length = 20)), max = 255)
		{
			if (end==0.18)
			{
				if (start<0.05)
				{
					warning("Color mapping from <0.05 to 0.18 in .rainbooo.\n")
					return(rainbow(n=n,start=start,end=end))
				}
				ramp <- colorRamp(c("red","white"))
				frCol = rgb( ramp(seq((start-0.05)/(0.18-0.05), 1, length = n)), maxColorValue = 255)
			}
			else if (start==0.18)
			{
				if (end>0.6)
				{
					warning("Color mapping from 0.18 to >0.6 in .rainbooo.\n")
					return(rainbow(n=n,start=start,end=end))
				}
				ramp <- colorRamp(c("white","blue"))
				frCol = rgb( ramp(seq(0, 1-((0.6-end)/(0.6-0.08)), length = n)), maxColorValue = 255)
			}
			else
			{
				warning("Color mapping not from 0.18 and not to 0.18 in .rainbooo.\n")
				return(rainbow(n=n,start=start,end=end))
			}
		}
}
favorov/GenometriCorr documentation built on March 30, 2021, 5:21 p.m.