Nothing
# eval_compactness_loc.R
#
# This function creates the plots of autocorrelation, as well as the rank product computation. Specifically, it creates
# plots for heatmap of autocorrelation, density plot of autocorrelation values, and if there is more than one dataset, for
# each individual gene signature, will do a rank product analysis on the lists of median autocorrelation coefficients to
# identify genes consistently low ranking in autocorrelation among more than one dataset.
# @param gene_sigs_list A list of genes representing the gene signature to be tested.
# @param names_sigs The names of the gene signatures (one name per gene signature, in gene_sigs_list)
# @param mRNA_expr_matrix A list of expression matrices
# @param names_datasets The names of the different datasets contained in mRNA_expr_matrix
# @param out_dir A path to the directory where the resulting output files are written
# @param file File representing the log file where errors can be written
# @param showResults Tells if open dialog boxes showing the computed results. Default is FALSE
# @param radar_plot_values A list of values that store computations that will be used in the final summary radarplot
# @keywords eval_compactness_loc
eval_compactness_loc <- function(gene_sigs_list,names_sigs, mRNA_expr_matrix, names_datasets, out_dir = '~',file=NULL,showResults = FALSE,radar_plot_values,logged=T,origin=NULL){
#create new canvas for plotting if to be shown on screen
# if (showResults){
grDevices::dev.new()
# }
# print(grDevices::dev.cur())
# if(grDevices::dev.cur()==1){
# grDevices::dev.new()
# }
# else{
# grDevices::pdf(file.path(out_dir, 'sig_autocor_hmps.pdf'),width=10,height=10)
# }
# pdf(file.path(out_dir,'sig_autocor_hmps.pdf'),width=10,height=10)
#the following calculates the maximum length of a title for appropriate font sizing
max_title_length <- -999
for(i in 1:length(names_sigs)){
for(j in 1:length(names_datasets)){
if(nchar(paste0(names_datasets[[j]] ,' ',names_sigs[[i]])) > max_title_length){
max_title_length <- nchar(paste0(names_datasets[[j]] ,' ',names_sigs[[i]]))
}
}
}
#sets up graphics parameters for the plotting area
graphics::par(cex.main=min(0.8,(3*6/max_title_length)),cex.lab = 0.6,oma=c(2,0,0,0),mar=c(0,0,0,0))
#in the following, we compute each of the autocorrelation heatmaps
hmaps <- lapply(1:(length(names_sigs) *length(names_datasets)),function(i) {
# first take the combined index i, and figure out it's index as an array index over a grid for each dataset and each signature
dataset_ind <- i %% length(names_datasets)
if (dataset_ind == 0 ){
dataset_ind <- length(names_datasets)
}
sig_ind <- ceiling(i/length(names_datasets))
gene_sig <- gene_sigs_list[[names_sigs[sig_ind]]]
if(is.matrix(gene_sig)){gene_sig = as.vector(gene_sig);}
data.matrix = mRNA_expr_matrix[[names_datasets[dataset_ind]]] #load the data
inter <- intersect(gene_sig, row.names(data.matrix)) #consider only the genes inside the dataset
autocors <- stats::cor(t(stats::na.omit(data.matrix[inter,])),method='spearman') #calculate the autocorrelations
# print(paste0('gene_sig ',gene_sig))
# print(paste0("intersection",inter))
# print(cor(t(data.matrix[inter,]),method='spearman'))
# print(autocors)
#let's output this to a file---
if(!dir.exists(file.path(out_dir,'autocorrelation_matrices'))){
dir.create(file.path(out_dir,'autocorrelation_matrices'))
}
utils::write.table(autocors,file=file.path(out_dir,'autocorrelation_matrices', paste0('autocorrelation_matrix_',names_sigs[sig_ind],'_',names_datasets[dataset_ind],'.txt')),quote=F,sep='\t')
#-----
#the following draws the heatmaps for the autocorrelation
tryCatch({
# plot.new()
# strwd <- strwidth(max(nchar(rownames(autocors))), units = "figure", cex = max(min(0.5,(4*4/length(rownames(autocors)))),0.06))
# print(max(nchar(rownames(autocors))))
# print(strwd)
gplots::heatmap.2( stats::na.omit(autocors),
col = gplots::colorpanel(100,"blue","white","red"), #redgreen(100),#colorpanel(100,"red","yellow","green"),
trace = "none",
# xlab = "Gene ID",
# ylab="Gene ID",
na.color="grey",
labRow=rownames(autocors),
labCol=colnames(autocors),#gene_sig,
main = paste0("\n\nIntra-sig. Corr.\n", names_datasets[[dataset_ind]] ,' ',names_sigs[[sig_ind]]),
dendrogram = "col",
symbreaks = T,
Rowv = T,Colv=T ,key.xlab='Rho',key.ylab=NA, key.title=NA,cexRow=max(min(0.5,(4*4/length(rownames(autocors)))),0.06),cexCol=max(min(0.5,(4*4/length(rownames(autocors)))),0.06),margins=c(1+(max(nchar(rownames(autocors)))/2),1+ (max(nchar(rownames(autocors)))/2)))
},
error=function(err){
graphics::plot.new()
graphics::title(paste0('\n\nToo many NA values in \n',names_datasets[dataset_ind],' ',names_sigs[sig_ind]))#cex=min(1,4*10/max_title_length))
cat(paste0("There was an error, ",names_datasets[dataset_ind]," ", names_sigs[sig_ind]," ", err,'\n'), file=file)
})
grab_grob() #grabs the image from the screen to put on a pdf
})
draw.heatmaps(hmaps,names_datasets,names_sigs) #this function draws the heatmaps to a canvas
# if(showResults){
#saves the canvas object to file
grDevices::dev.copy(grDevices::pdf,file.path(out_dir,'sig_autocor_hmps.pdf'),width=4*(length(names_datasets)),height=4*(length(names_sigs)))#width=10,height=10)
# }
if(grDevices::dev.cur()!=1){
g <- grDevices::dev.off() # to reset the graphics pars to defaults
}
#because we want the legend outside the plot, first we calculate the width of the legend to get the right sized canvas
#code from stackoverflow.
if(grDevices::dev.cur()!=1){
g <- grDevices::dev.off() # to reset the graphics pars to defaults
}
graphics::par(mar=c(0,0,0,0),cex=0.6)#c(par('mar')[1:3], 0)) # optional, removes extraneous right inner margin space
graphics::plot.new()
legend_names <- c()
legend_cols <- c()
legend_lty <- c()
for(k in 1:length(names_sigs)){
for (i in 1:length(names_datasets) ){
legend_names <- c(legend_names,paste0(names_datasets[i],' ',names_sigs[k]))
legend_cols <- c(legend_cols,i)
legend_lty <- c(legend_lty,k)
}
}
l <- graphics::legend(0, 0,legend_names,col=legend_cols,lty=legend_lty,lwd=rep(1,times=(length(names_datasets) * length(names_sigs))),pt.cex=1,cex=min(0.5,(4*10/max_title_length)), plot=FALSE)
# calculate right margin width in ndc
w <- graphics::grconvertX(l$rect$w, to='ndc')- graphics::grconvertX(0, to='ndc')
w <- graphics::grconvertX(w,from="ndc",to="inches") + graphics::grconvertX(10,from="device",to="inches") #add a bit of padding around legend
#sets up the graphical parameters
#sets up a new graphics object
if (showResults){
grDevices::dev.new()
}else{
grDevices::pdf(file.path(out_dir,'sig_autocor_dens.pdf'),width=5,height=5)
}
graphics::par(cex.main=0.8,cex.lab = 0.6,mar=c(3,3,4,1),mfrow=c(1,1),xpd=TRUE,omi=(c(0,0,0,w)))
# graphics::par(xpd=TRUE,mai=(graphics::par('mai') + c(0,0,0,padding)))
#the following is to set up the right y limit on the autocorrelation density plot (first we need to know the maximum of the density plots)
max_dens <- -9999
max_x_coord <- -9999 #for legnd plotting
for(k in 1:length(names_sigs)){
gene_sig <- gene_sigs_list[[names_sigs[k]]]
if(is.matrix(gene_sig)){gene_sig = as.vector(gene_sig);}
for (i in 1:length(names_datasets) ){
data.matrix = mRNA_expr_matrix[[names_datasets[i]]]
inter <- intersect(gene_sig, row.names(data.matrix))
autocors <- stats::cor(t(stats::na.omit(data.matrix[inter,])),method='spearman')
# print(paste0("autocor dim", dim(autocors)))
# print(autocors)
if(dim(autocors)[1] > 1){
cur_max <- max(stats::density(unlist(stats::na.omit(autocors)))$y)
if (max_dens < cur_max){
max_dens <- cur_max
}
cur_max <- max(stats::density(unlist(stats::na.omit(autocors)))$x)
if (max_x_coord < cur_max){
max_x_coord <- cur_max
}
}
}
}
#the following actually draws the density plots for each signature and dataset
plots_count <- 0
for(k in 1:length(names_sigs)){
gene_sig <- gene_sigs_list[[names_sigs[k]]] #load the signature
if(is.matrix(gene_sig)){gene_sig = as.vector(gene_sig);}
for (i in 1:length(names_datasets) ){
data.matrix = mRNA_expr_matrix[[names_datasets[i]]] #load the datasets
inter <- intersect(gene_sig, row.names(data.matrix)) #consider only the genes in the dataset
autocors <- stats::cor(t(stats::na.omit(data.matrix[inter,])),method='spearman') #calculate autocorrelation
#make the plots
if(dim(autocors)[1] > 1){
if (plots_count==0){#(i ==1) && (k==1)){
graphics::plot(stats::density(unlist(stats::na.omit(autocors))),ylim=c(0,ceiling(max_dens)),xlim=c(-1,1),col=i,main=NA,lwd=2,lty=k)
plots_count <- 1
}else{
graphics::lines(stats::density(unlist(stats::na.omit(autocors))),ylim=c(0,ceiling(max_dens)),xlim=c(-1,1),col=i,main=NA,lwd=2,lty=k)
}
radar_plot_values[[names_sigs[k]]][[names_datasets[i]]]['autocor_median'] <- stats::median(autocors,na.rm=T) #store the median autocorrelation for the final radar plot
}else{
radar_plot_values[[names_sigs[k]]][[names_datasets[i]]]['autocor_median'] <- 0#stats::median(stats::na.omit(autocors)) #store the median autocorrelation for the final radar plot
}
}
}
# draw the labels for the plot
graphics::mtext(side = 2, line = 2, 'Density',cex=0.8)
graphics::mtext(side = 1, line = 2, 'Rho',cex=0.8)
graphics::mtext(side = 3, line = 2,'Intra-sig. Corr. Density')
# makes the legend for the plot (sets parameters)
op <- graphics::par(cex=0.6)#,xpd=T)
graphics::legend(graphics::par('usr')[2]+0.05, graphics::par('usr')[4],xpd=NA,legend_names,col=legend_cols,lty=legend_lty,lwd=rep(1,times=(length(names_datasets) * length(names_sigs))),pt.cex=1,cex=min(0.5,(4*10/max_title_length)))
#saves the plot to file
if(showResults){
grDevices::dev.copy(grDevices::pdf,file.path(out_dir,'sig_autocor_dens.pdf'),width=5,height=5)
}
if(grDevices::dev.cur()!=1){
g <- grDevices::dev.off() # to reset the graphics pars to defaults
} # the following only computes the rank product if there is more than one dataset for which to compute it for
if (length(names_datasets) > 1){
RankProdInstalled = (nchar(system.file(package='RankProd')) > 0)
if(RankProdInstalled){
for(k in 1:length(names_sigs)){
gene_sig <- gene_sigs_list[[names_sigs[k]]] #load the gene signature
if(is.matrix(gene_sig)){gene_sig = as.vector(gene_sig);}
#create the canvas
if (showResults){
grDevices::dev.new()
}else{
grDevices::pdf(file.path(out_dir,paste0('sig_autocor_rankProd_',names_sigs[k],'.pdf')),width=10,height=10)
}
graphics::par(cex.main=0.8,cex.lab = 0.6,oma=c(2,2,2,2),mar=c(4,4,4,4)) #set drawing parameters
#now we take the median of the genes' autocorrelation for each gene in each dataset and then look at the rank product over the different cancer types
#note that the rank product analysis is only done if there is more than one dataset (otherwise not done, and is doen separately for each gene signature)
overall_rank_mat <- matrix(NA,nrow=length(unique(gene_sig)),ncol=length(names_datasets))
#create the overall matrix of datsets and signature genes containing the
#median gene autocorrelation for each dataset
row.names(overall_rank_mat) <- unique(gene_sig)
colnames(overall_rank_mat) <- names_datasets
for (i in 1:length(names_datasets)){
data.matrix = mRNA_expr_matrix[[names_datasets[i]]] #load data
inter = intersect(unique(gene_sig),rownames(data.matrix)) #consider only genes present in that dataset
autocors <- stats::cor(t(stats::na.omit(data.matrix[inter,])),method='spearman')
median_scores <- as.matrix(apply(autocors,2,function(x) {stats::median(stats::na.omit(x))})) #median autocorrelation
overall_rank_mat[rownames(median_scores),i] <- median_scores[,1]
}
# the following computes the rank product
# require(RankProd)
if(is.null(origin)){
origin <- rep(1,length(names_datasets))
}
RP.out <-RankProd::RPadvance(data = overall_rank_mat,cl = rep(1,times=length(names_datasets)),origin = origin ,logged = T,gene.names=rownames(overall_rank_mat)) #
RankProd::plotRP(RP.out ,cutoff=0.05)
#the following will calcualte the full output table for the rank prod and save it sorted two ways
table_rank_prod <- cbind(RP.out$pfp,RP.out$pval,RP.out$RPs)
colnames(table_rank_prod) <- c(paste0("pfp_",colnames(RP.out$pfp)),paste0('p_val',colnames(RP.out$pval)),paste0("Rank_Product_",colnames(RP.out$RPs)))
if(!dir.exists(file.path(out_dir,'rank_prod'))){
dir.create(file.path(out_dir,'rank_prod')) #create the dir
}
utils::write.csv(table_rank_prod[order(table_rank_prod[,1]),],file=file.path(out_dir, 'rank_prod',paste0('rank_product_table1_',names_sigs[k],'.txt')),quote=F)
utils::write.csv(table_rank_prod[order(table_rank_prod[,2]),],file=file.path(out_dir, 'rank_prod',paste0('rank_product_table2_',names_sigs[k],'.txt')),quote=F)
# #compute the tables of up and down regulated genes
# table_rank_prod <- RankProd::topGene(RP.out,cutoff=0.05,method="pfp",logged=T, gene.names=rownames(overall_rank_mat))#intersect(gene_sig[,1],rownames(mRNA_expr_matrix[[names_datasets[i]]])))
# # output the rank product table to file
# if( (!is.null(table_rank_prod$Table1))) {
# dir.create(file.path(out_dir,'rank_prod')) #create the dir
# utils::write.csv(table_rank_prod$Table1,file=file.path(out_dir, 'rank_prod',paste0('rank_product_table1_',names_sigs[k],'.txt')),quote=F,sep='\t')
# }
# if (!is.null(table_rank_prod$Table2)){
# dir.create(file.path(out_dir,'rank_prod')) #create the dir
# utils::write.csv(table_rank_prod$Table2,file=file.path(out_dir, 'rank_prod',paste0('rank_product_table2_',names_sigs[k],'.txt')),quote=F,sep='\t')
# }
cat("Intra-sig. corr. rank product successfully computed.\n", file=file) #output to log file
#save the rank product plot
if(showResults){
grDevices::dev.copy(grDevices::pdf,file.path(out_dir,paste0('sig_autocor_rankProd_',names_sigs[k],'.pdf')),width=10,height=10)
}
if(grDevices::dev.cur()!=1){
g <- grDevices::dev.off() # to reset the graphics pars to defaults
}
}
cat("Intra-sig. corr. metrics successfully computed.\n", file=file) #output to log file
}else{
cat("Intra-sig. corr. metrics not computed. Please install RankProd from BioConductor.\n", file=file) #output to log file
}
}else{
cat("Rank product not computed as there is only one dataset.\n", file=file)
}
radar_plot_values #return the radar plot values
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.