Nothing
# eval_expr_loc.R
#
# This function creates the plots of measures of expression. Takes in the list of thresholds of expression, and if this is not
# specified by the user, uses the median of all genes across all samples as the threshold cutoff for the barchart presenting
# proportion of samples expressing above the threshold. Also produces similar barchart for proportion of samples with NA values
# for each of the genes considered. Creates density plots as well for these barcharts for easy viewing of distribution in the case
# of a large number of signature genes.
# @param gene_sigs_list A list of genes representing the gene signature to be tested.
# @param names_sigs The names of the gene signatures (one name per gene signature, in gene_sigs_list)
# @param mRNA_expr_matrix A list of expression matrices
# @param names_datasets The names of the different datasets contained in mRNA_expr_matrix
# @param thresholds A list of thresholds to be considered for each data set, default is median of the data set. A gene is considered expressed if above the threshold, non-expressed otherwise. One threshold per dataset, in the same order as the dataset list.
# @param out_dir A path to the directory where the resulting output files are written
# @param file File representing the log file where errors can be written
# @param showResults Tells if open dialog boxes showing the computed results. Default is FALSE
# @param radar_plot_values A list of values that store computations that will be used in the final summary radarplot
# @keywords eval_expr_loc
eval_expr_loc <- function(gene_sigs_list,names_sigs, mRNA_expr_matrix,names_datasets, thresholds = NULL, out_dir = '~',file=NULL,showResults = FALSE,radar_plot_values){
#calculate the number of rows and columns in the final plot
num_rows <- length(names_sigs)#ceiling(sqrt(length(names)))
num_cols <- length(names_datasets)#ceiling(length(names)/num_rows)
# Depending on whether results are to be shown to the user, create a pdf drawing object or a visible canvas
if (showResults){
grDevices::dev.new()
}else{
grDevices::pdf(file.path(out_dir,'sig_expr_barcharts_NA_values.pdf'),width=4*(length(names_datasets)),height=4*(length(names_sigs)))#width=10,height=10)
}
#sets up the graphics drawing area
graphics::par(mfrow=c(num_rows,num_cols),cex=0.7, cex.axis=0.5)
#the following is to determine the font size of the title; need to know max length of characters in title
max_line_length <- -999
for(k in 1:length(names_sigs)){
for (i in 1:length(names_datasets)){
if(max_line_length < nchar(paste0(names_datasets[i],' ',names_sigs[k]))){
max_line_length <- nchar(paste0(names_datasets[i],' ',names_sigs[k]))
}
}
}
for(k in 1:length(names_sigs)){
gene_sig <- gene_sigs_list[[names_sigs[k]]] #load the signature
if(is.matrix(gene_sig))
gene_sig = as.vector(gene_sig);
for (i in 1:length(names_datasets)){
#calculate the porportion of non-NA expression data in the matrix
# genes_expr <- mRNA_expr_matrix[[names_datasets[i]]][gene_sig,]
data.matrix = mRNA_expr_matrix[[names_datasets[i]]] #load the expression data
inter <- intersect(gene_sig, row.names(data.matrix))
genes_expr <- data.matrix[inter,] #subset to only the genes present in the data
gene_expr_vals <- (rowSums(is.na(genes_expr)) / dim(genes_expr)[2]) #count the proportion of NA values
gene_expr_vals[setdiff(gene_sig, row.names(data.matrix))] <- 1 #add back in the genes that were never present anyways
gene_expr_vals <- -sort(-gene_expr_vals)
#make the barplot; if all are zero, we manually set the limits for the barplot
if(max(gene_expr_vals)==0){
bar_expr <- graphics::barplot(gene_expr_vals,xlab="Signature Gene IDs",ylab="Proportion of NA expression ",main=paste0("Signature gene expression\n",names_datasets[i],' ',names_sigs[k]), axisnames=F,axis=F,ylim=c(0,1),cex.main=min(1,4*12/max_line_length), border = NA)
}else{
bar_expr <- graphics::barplot(gene_expr_vals,xlab="Signature Gene IDs",ylab="Proportion of NA expression ",main=paste0("Signature gene expression\n",names_datasets[i],' ',names_sigs[k]), axisnames=F,axis=F,cex.main=min(1,4*12/max_line_length), border = NA)
}
graphics::text(bar_expr, graphics::par("usr")[3], labels = names(gene_expr_vals), srt = 45, adj = c(1.1,1.1), xpd = TRUE, cex=max(min(0.5,(0.5*4*12)/(sqrt(2) * length(gene_expr_vals))),0.06))
graphics::axis(2)
radar_plot_values[[names_sigs[k]]][[names_datasets[i]]]['med_prop_na'] <- stats::median(1-gene_expr_vals) #store the median NA proportion for the radarplot at the end
}
}
# graphics output
if(showResults){
grDevices::dev.copy(grDevices::pdf,file.path(out_dir,'sig_expr_barcharts_NA_values.pdf'),width=4*(length(names_datasets)),height=4*(length(names_sigs)))#width=10,height=10)
}
if(grDevices::dev.cur()!=1){
g <- grDevices::dev.off() # to reset the graphics pars to defaults
}
# create new canvas
if (showResults){
grDevices::dev.new()
}else{
grDevices::pdf(file.path(out_dir,'sig_expr_barcharts.pdf'),width=4*(length(names_datasets)),height=4*(length(names_sigs)))#width=10,height=10)
}
#set graphics parameters
graphics::par(mfrow=c(num_rows,num_cols),cex=0.7, cex.axis=0.5)
#decide what the cutoff for defining an 'expressed' gene is; if user has specified thresholds, use those, otherwise take the median of all
if (length(thresholds) == 0) {
thresholds <- rep(0,length(names_datasets))
for ( i in 1:length(names_datasets)){
thresholds[i] <- stats::median(unlist(stats::na.omit(mRNA_expr_matrix[[names_datasets[i]]])))
}
#names(thresholds) <- names_datasets
}
# assign names to the threshold variable so we can call it by dataset
if(length(names(thresholds))==0){
names(thresholds) <- names_datasets
}
#the following loops through every signature and dataset and computes the proportion of expressed genes for each case
for (k in 1:length(names_sigs)){
gene_sig <- gene_sigs_list[[names_sigs[k]]] #load signature
if(is.matrix(gene_sig)){gene_sig = as.vector(gene_sig);}
for (i in 1:length(names_datasets)){
#calculate the porportion of expressed data in the matrix
# genes_expr <- mRNA_expr_matrix[[names_datasets[i]]][gene_sig,]
data.matrix = mRNA_expr_matrix[[names_datasets[i]]] #load in the data
inter <- intersect(gene_sig, row.names(data.matrix)) #only consider the genes in the dataset
genes_expr <- data.matrix[inter,] #subset the matrix to just the gene signature
gene_expr_vals <- 1 - ((rowSums(genes_expr < thresholds[i])) / (dim(genes_expr)[2])) #figure out what proportion of samples express gene above threshold
gene_expr_vals[setdiff(gene_sig, row.names(data.matrix))] <- 0
gene_expr_vals <- sort(gene_expr_vals)
radar_plot_values[[names_sigs[k]]][[names_datasets[i]]]['med_prop_above_med'] <- stats::median(gene_expr_vals) #store median value for final radar plot
#create bar graph
bar_expr <- graphics::barplot(gene_expr_vals,xlab="Signature Gene IDs",ylab="Proportion with expression above threshold",main=paste0("Signature gene expression\n",names_datasets[i],' ',names_sigs[k]), axisnames=F,axis=F,cex.main=min(1,4*12/max_line_length), border = NA)
graphics::text(bar_expr, graphics::par("usr")[3], labels = names(gene_expr_vals), srt = 45, adj = c(1.1,1.1), xpd = TRUE, cex=max(min(0.5,(0.5*4*12)/(sqrt(2) * length(gene_expr_vals))),0.06))
graphics::axis(2)
}
}
#plot saving commands
if(showResults){
grDevices::dev.copy(grDevices::pdf,file.path(out_dir,'sig_expr_barcharts.pdf'),width=4*(length(names_datasets)),height=4*(length(names_sigs)))#width=10,height=10)
}
if(grDevices::dev.cur()!=1){
g <- grDevices::dev.off() # to reset the graphics pars to defaults
}
#create new plot
if (showResults){
grDevices::dev.new()
}else{
grDevices::pdf(file.path(out_dir,'sig_expr_density_plots.pdf'),width=4*(length(names_datasets)),height=4*(length(names_sigs)))#width=10,height=10)
}
#set up the canvas in a grid
#the following computes each of the density curves for expression
graphics::par(mfrow=c(num_rows,num_cols))
for (k in 1:length(names_sigs)){
gene_sig <- gene_sigs_list[[names_sigs[k]]] #load signature
if(is.matrix(gene_sig)){gene_sig = as.vector(gene_sig);}
for (i in 1:length(names_datasets)){
#calculate the porportion of nonzero expression data in the matrix
# genes_expr <- mRNA_expr_matrix[[names_datasets[i]]][gene_sig,]
data.matrix = mRNA_expr_matrix[[names_datasets[i]]] #load dataset
inter <- intersect(gene_sig, row.names(data.matrix)) #only look at genes that are in the dataset
genes_expr <- data.matrix[inter,]
gene_expr_vals <- 1 - (rowSums(genes_expr < thresholds[i]) / (dim(genes_expr)[2])) #take proportion of expression
gene_expr_vals[setdiff(gene_sig, row.names(data.matrix))] <- 0
#make the density plot
graphics::plot(stats::density(stats::na.omit(gene_expr_vals),adjust=0.25),main=paste0("Signature gene expression\n",names_datasets[i], ' ',names_sigs[k]),ylab="Density",cex.main=min(1,3.5*10/max_line_length))
}
}
#save the plot
if(showResults){
grDevices::dev.copy(grDevices::pdf,file.path(out_dir,'sig_expr_density_plots.pdf'),width=4*(length(names_datasets)),height=4*(length(names_sigs)))#width=10,height=10)
}
if(grDevices::dev.cur()!=1){
g <- grDevices::dev.off() # to reset the graphics pars to defaults
}
#write to log file
cat('Expression and density graphs created successfully.\n', file=file)
#here we calculate the outputs again just to put in text files
if(!dir.exists(file.path(out_dir,'expression_tables'))){
dir.create(file.path(out_dir,'expression_tables'))
}
for (k in 1:length(names_sigs)){
gene_sig <- gene_sigs_list[[names_sigs[k]]] #load signature
if(is.matrix(gene_sig)){gene_sig = as.vector(gene_sig);}
for (i in 1:length(names_datasets)){
#calculate the porportion of nonzero expression data in the matrix
# genes_expr <- mRNA_expr_matrix[[names_datasets[i]]][gene_sig,]
data.matrix = mRNA_expr_matrix[[names_datasets[i]]] #load dataset
inter <- intersect(gene_sig, row.names(data.matrix)) #only look at genes that are in the dataset
genes_expr <- data.matrix[inter,]
gene_expr_vals <- 1 - (rowSums(genes_expr < thresholds[i]) / (dim(genes_expr)[2])) #take proportion of expression
gene_expr_vals[setdiff(gene_sig, row.names(data.matrix))] <- 0 #add back in the genes that were never present anyways
gene_expr_vals_na <- (rowSums(is.na(genes_expr)) / dim(genes_expr)[2]) #count the proportion of NA values
gene_expr_vals_na[setdiff(gene_sig, row.names(data.matrix))] <- 1 #add back in the genes that were never present anyways
output_table <- cbind(gene_expr_vals,gene_expr_vals_na)
colnames(output_table) <- c('Proportion_above_threshold','NA proportion')
utils::write.table(output_table,file=file.path(out_dir,'expression_tables', paste0('expression_table_',names_sigs[k],'_',names_datasets[i],'.txt')),quote=F,sep='\t')
}
}
radar_plot_values #return the values we will use in the radarplot
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.