# setting global options figure.outpath <- paste(output.path,"/icreport_figures/",data.set,"/",sep="") opts_chunk$set(fig.path=figure.outpath,dev=file.ext, warning=FALSE, message=FALSE) options(warn=-1,width = 100) flower.palette <- c("#283545","#E09E12","#A72613","#95CDDC","#395717","#3A3B35","#D68931","#442243")
if(method == "ica"){ cat("--------------------------------------------------------------------------------------------------------------","\n") cat("Type = Independent Component Analysis Report \n") cat("--------------------------------------------------------------------------------------------------------------","\n") } else if (method == "pca") { cat("--------------------------------------------------------------------------------------------------------------","\n") cat("Type = Principal Component Analysis Report \n") cat("--------------------------------------------------------------------------------------------------------------","\n") }
if(method == "ica"){ if(!is.null(ica.result$info.df)){ covariates <- unique(ica.result$cov.corr.idx$Covariate.Name) covariate.gene.df <- data.frame(matrix(nrow = length(covariates), ncol = 3)) colnames(covariate.gene.df) <- c("Covariate","NumberOfGenes","NumberOfICs") for( c in 1: length(covariates)){ single.covariate <- subset(ica.result$cov.corr.idx, Covariate.Name == covariates[c]) ic.index <- single.covariate$IC if(length(ic.index) > 1){ peaks.for.covariate <- sapply(1:length(ic.index), function(x) ica.result$peaks[[ic.index[x]]]) peaks.for.covariate <- unlist(peaks.for.covariate) covariate.peaks <- unique(names(peaks.for.covariate)) } else { covariate.peaks <- ica.result$peaks[[ic.index]] } covariate.gene.df[c,1] <- covariates[c] covariate.gene.df[c,2] <- length(covariate.peaks) covariate.gene.df[c,3] <- length(ic.index) } covaraite.gene.df <- covariate.gene.df[order(covariate.gene.df$NumberOfICs, decreasing = TRUE),] covariate.gene.df$Covariate <- factor(covariate.gene.df$Covariate, levels = c(covariate.gene.df$Covariate)) #ica.result$cov.corr.idx <- within(ica.result$cov.corr.idx, # Covariate.Name <- factor(Covariate.Name, # levels=names(sort(table(Covariate.Name), # decreasing=TRUE)))) summary.plots <- list() if(length(covariates) < 8){ summary.plots[[1]] <- ggplot(covariate.gene.df, aes(x = Covariate, fill = Covariate, y = NumberOfICs)) + geom_bar(stat = 'identity',col = "black", width = .5) + scale_fill_manual(values=flower.palette) + ylab("Number of ICs") summary.plots[[2]] <- ggplot(covariate.gene.df, aes(x = Covariate, y = NumberOfGenes,fill = Covariate)) + geom_bar(stat = 'identity',col = "black", width = .5) + scale_fill_manual(values=flower.palette) + ylab("Number of Genes") } else { summary.plots[[1]] <- ggplot(covariate.gene.df, aes(x = Covariate, y = NumberOfICs, fill = Covariate)) + geom_bar(stat = 'identity',col = "black", width = .5) + ylab("Number of ICs") summary.plots[[2]] <- ggplot(covariate.gene.df, aes(x = Covariate, y = NumberOfGenes,fill = Covariate)) + geom_bar(stat = 'identity',col = "black", width = .5) + ylab("Number of Genes") } summary.plots[[3]] <- ggplot(ica.result$ica.stat.df, aes(x = N.peaks,group = corr.ic)) + geom_bar(aes(fill = corr.ic)) + scale_fill_manual(values=c("grey50","darkred")) + xlab("Number of Peaks") + ylab("Number of ICs") summary.plots[[4]] <- ggplot(ica.result$ica.stat.df, aes(x = n.clust,group = corr.ic)) + geom_bar(aes(fill= corr.ic)) + scale_fill_manual(values=c("grey50","darkred")) + xlab("Predicted of Clusters") + ylab("Number of ICs") summary.plots[[5]] <- ggplot(ica.result$ica.stat.df, aes(x = percent.var,group = corr.ic)) + geom_bar(aes(fill= corr.ic)) + scale_fill_manual(values=c("grey50","darkred")) + xlab("Variance(%) Explained") + ylab("Number of ICs") for( p in 1:length(summary.plots)){ summary.plots[[p]] <- ggplot_add_theme(summary.plots[[p]]) } for( p in 3:5 ){ summary.plots[[p]] <- summary.plots[[p]] + theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) } } }
if (method == "pca"){ if(!is.null(pca.result$info.df)){ summary.pca.df <- data.frame("PC1" = pca.result$x[,1], "PC2" = pca.result$x[,2], "VarPercent" = pca.result$var.percent, "idx" = c(1:length(pca.result$var.percent)), pca.result$info.df[rownames(pca.result$x),]) } else{ summary.pca.df <- data.frame("PC1" = pca.result$x[,1], "PC2" = pca.result$x[,2], "VarPercent" = pca.result$var.percent, "idx" = c(1:length(pca.result$var.percent))) } summary.plots <- list() # scree of variance explained by each PC summary.plots[[1]] <- ggplot(summary.pca.df, aes(x = idx, y = VarPercent)) + geom_point() + xlab("PC index") + ylab("Variance Explained") + labs(title ="Variance by Component") # check if PC 1 is correlated with any covariates and if so use the color to plot PC1 vs PC2 if(1 %in% pca.result$cov.corr.idx$PC){ most.significant.corr <- which.min(pca.result$cov.pval.mx[1,]) color.by <- names(most.significant.corr) summary.plots[[2]] <- ggplot(summary.pca.df, aes_string(x = "PC1", y = "PC2", col = color.by)) + geom_point(size = 3) + xlab("PC1") + ylab("PC2") }else { summary.plots[[2]] <- ggplot(summary.pca.df, aes(x = PC1, y = PC2)) + geom_point(size = 3) + xlab("PC1") + ylab("PC2") } # under construction if(!is.null(pca.result$cov.corr.idx)){ if(length(unique(pca.result$cov.corr.idx$Covariate.Name)) < 8){ summary.plots[[3]] <- ggplot(pca.result$cov.corr.idx, aes(x = Covariate.Name, fill = Covariate.Name)) + geom_bar(col = "black", width = .5) + scale_fill_manual(values = flower.palette) summary.plots[[4]] <- ggplot(pca.result$cov.corr.idx, aes(x = Covariate.Name, y = var, fill = Covariate.Name)) + geom_bar(stat = 'identity', col = "black", width = .5) + scale_fill_manual(values = flower.palette) } else { summary.plots[[3]] <- ggplot(pca.result$cov.corr.idx, aes(x = Covariate.Name, fill = Covariate.Name)) + geom_bar(col = "black", width = .5) summary.plots[[4]] <- ggplot(pca.result$cov.corr.idx, aes(x = Covariate.Name, y = var, fill = Covariate.Name)) + geom_bar(stat = 'identity',col = "black", width = .5) } summary.plots[[3]] <- summary.plots[[3]] + theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) summary.plots[[4]] <- summary.plots[[4]] + theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) } for( p in 1:length(summary.plots)){ summary.plots[[p]] <- ggplot_add_theme(summary.plots[[p]]) } multiplot(plotlist = list(summary.plots[[1]], summary.plots[[2]]),cols = 2) }
if(method == "ica"){ if(!is.null(ica.result$info.df)){ if(length(covariates) < 7){ multiplot(plotlist = list(summary.plots[[1]],summary.plots[[2]]), cols = 2) } } }
if (method == "pca"){ if(length(summary.plots) > 2){ multiplot(plotlist = list(summary.plots[[3]], summary.plots[[4]]),cols = 2) } if(!is.null(pca.result$ordered.geneinfo)){ # create axis tick positions for chromosomes if gene info is supplied # the same axis tick positions are used for every plot so it only needs to be calculated once x.axis <- chr_axis_creator(pca.result$ordered.geneinfo) } }
if(method == "ica"){ if(!is.null(ica.result$info.df)){ if(length(covariates) > 6){ multiplot(plotlist = list(summary.plots[[1]],summary.plots[[2]]), cols = 1) } } if(!is.null(ica.result$ordered.geneinfo)){ # create axis tick positions for chromosomes if gene info is supplied # the same axis tick positions are used for every plot so it only needs to be calculated once x.axis <- chr_axis_creator(ica.result$ordered.geneinfo) } }
if(method == "ica"){ for( p in 1:dim(ica.result$A)[1]){ ###################### Plotting without Gene information ########################## # check if there is a specified order for the ICs (default is by explained variance) if(!is.null(ica.result$order)){ i <- ica.result$order[p] } else { i <- p } # Running Mclust on IC coefficient to estimate ideal number of clusters in 1D IC coefficient space mclust.obj <- suppressMessages(mclust::Mclust(ica.result$A[i,], modelNames = "V")) mclust.plots <- list() # Plot IC loading with peaks labeled in red (If peaks are available) if(is.null(ica.result$ordered.geneinfo)){ # If gene info is missing if(!is.null(ica.result$peaks)){ # If the peaks are labeled use them mclust.plots[[1]] <- plot_component(ica.result$S[,i, drop = FALSE],peaks = TRUE, peakresult = ica.result$peaks[[i]], plot.title = paste(data.set,"_IC#_",i,sep ="")) } else { # If no peak labeling is available color them in black mclust.plots[[1]] <- plot_component(ica.result$S[,i, drop = FALSE], plot.title = paste(data.set,"_IC#_",i,sep ="")) } } else { # if gene position information is available if(!is.null(ica.result$peaks)){ # If the peaks are labeled use them mclust.plots[[1]] <- plot_component_chr(ica.result$S[,i, drop = FALSE], geneinfo.df = ica.result$ordered.geneinfo, x.axis = x.axis, plot.title = paste(data.set,"_IC#_",i,sep =""), peaks = TRUE, peakresult = ica.result$peaks[[i]]) } else { # If no peak labeling is available color them in black mclust.plots[[1]] <- plot_component(ica.result$S[,i, drop = FALSE], plot.title = paste(data.set,"_IC#_",i,sep ="")) } } # Gene weight histogram plot (Investigating the non-normality of the component ) #mclust.plots[[4]] <- plot_component_hist(ica.result$S[,i],"Gene Weight Histogram") # create a data frame for plotting if(!is.null(colnames(ica.result$A)) & !is.null(info.df)){ # Dataframe that will go into ggplot for plotting with IC coefficients in IC and idx as sample index coeff.plot.df <- data.frame(as.data.frame(info.df[colnames(ica.result$A),]), "IC" = ica.result$A[i,], "idx"= c(1:dim(ica.result$A)[2])) # Setting the colnames to covariate names colnames(coeff.plot.df)[1:dim(as.data.frame(info.df[colnames(ica.result$A),]))[2]] <- colnames(info.df) } else if (is.null(info.df)) { # If there is no sample information available coeff.plot.df <- data.frame("IC" = ica.result$A[i,],"idx"=c(1:dim(ica.result$A)[2])) } else { coeff.plot.df <- data.frame(as.data.frame(info.df),"IC" = ica.result$A[i,],"idx"=c(1:dim(ica.result$A)[2])) } # If IC is correlated to a covariate color the pot accordingly if(i %in% unique(ica.result$cov.corr.idx$IC)){ # color the graph by the covariate with the most significant association color.by <- names(which.min(ica.result$cov.pval.mx[i, as.character(ica.result$cov.corr.idx$Covariate.Name[which(ica.result$cov.corr.idx$IC == i)])])) mclust.plots[[2]] <- plot_coeff_w_legend(coeff.plot.df, k.col = color.by) + # plot.title = paste("Covariate = ",color.by,sep="")) + theme(legend.position ="none") # Extra if statement for covariates that are not a factor (cannot produce a boxplot with individual groups) # Need to figure out which plot might be most effective if(class(info.df[,color.by]) == "factor"){ number.of.levels <- length(levels(ica.result$info.df[,color.by])) mclust.plots[[3]] <- ggplot(coeff.plot.df, aes_string(x = color.by, y = "IC", color = color.by)) + geom_boxplot() + xlab(paste("Covariate = ",color.by,sep="")) + ylab("") + theme(axis.ticks = element_blank(), axis.text.x = element_blank()) if(number.of.levels > 10){ mclust.plots[[3]] <- mclust.plots[[3]] + theme(legend.position = "none") } } else{ mclust.plots[[3]] <- ggplot(coeff.plot.df, aes_string(x = color.by, y = "IC", color = color.by)) + geom_point() + xlab(paste("Covariate = ",color.by,sep="")) + ylab(" ") + theme(axis.ticks = element_blank(), axis.text.x = element_blank()) } } else { # If IC is not correlated to covariate color it using the clustering information obtained from mclust coeff.plot.df$mclust <- factor(mclust.obj$classification) mclust.plots[[2]] <- ggplot(coeff.plot.df, aes(x= idx, y = IC, color = mclust)) + geom_point(size = 3, pch = 20) + xlab("Sample idx") + ylab("IC coefficient") + theme(legend.position = "none") mclust.plots[[3]] <- ggplot(coeff.plot.df, aes(x = mclust, y = IC, color = mclust)) + geom_boxplot() + xlab(paste(mclust.obj$G,"clusters detected")) + ylab(" ") + theme(axis.ticks = element_blank(), axis.text.x = element_blank()) if(mclust.obj$G == 1){ mclust.plots[[2]] <- mclust.plots[[2]] mclust.plots[[3]] <- mclust.plots[[3]] + xlab("No clusters detected") + theme(legend.position = "none") } } layout <- matrix(c(1,1,1,1, 2,2,3,3), byrow = T,ncol = 4) for( k in 1:length(mclust.plots)){ mclust.plots[[k]] <- ggplot_add_theme(mclust.plots[[k]]) } cat("--------------------------------------------------------------------------------------------------------------","\n") if (i %in% unique(ica.result$cov.corr.idx$IC)){ #cat("IC # ",i," Correlation with covariate ", color.by, # "/ P-value = ",ica.result$cov.pval.mx[i,color.by],"/ Variance (%) = ",ica.result$percent.var[i],"\n") ic.info <- data.frame(row.names = paste("IC",i,sep = ""), "Variance_Percent" = ica.result$ica.stat.df[i,"percent.var"], "Correlated_Covariate" = color.by, "p-value" = ica.result$cov.pval.mx[i,color.by], "Number_of_Peaks" = ica.result$ica.stat.df[i,"N.peaks"]) print(ic.info) cat("---- Top 10 Gene Weights ---- \n") gene.table <- t(round(ica.result$S[order(abs(ica.result$S[,i]), decreasing = T)[1:10],i, drop = FALSE],digits = 2)) print(gene.table) rm(ic.info, gene.table) #cat(paste(names(ica.result$S[order(abs(ica.result$S[,i]), decreasing = T),i])[1:10]," ", sep =" "), "\n") #cat(paste(round(ica.result$S[order(abs(ica.result$S[,i]), decreasing = T),i][1:10],digits = 3)," ",sep =" "), "\n") } else { #cat("IC # ",i," No correlation with covariates / Variance (%) = ",ica.result$percent.var[i],"\n") ic.info <- data.frame(row.names = paste("IC",i,sep = ""), "Variance_Percent" = ica.result$ica.stat.df[i,"percent.var"], "Correlated_Covariate" = "none", "p-value" = NA, "Number_of_Peaks" = ica.result$ica.stat.df[i,"N.peaks"]) print(ic.info) cat("---- Top 10 Gene Weights ---- \n") gene.table <- t(round(ica.result$S[order(abs(ica.result$S[,i]), decreasing = T)[1:10],i, drop = FALSE],digits = 2)) print(gene.table) rm(ic.info, gene.table) #knitr::kable(gene.table, digits = 2, caption = "A table produced by printr.") #cat(paste(names(ica.result$S[order(abs(ica.result$S[,i]), decreasing = T),i])[1:10]," ", sep =" "), "\n") #cat(paste(round(ica.result$S[order(abs(ica.result$S[,i]), decreasing = T),i][1:10],digits = 3)," ", sep =" "), "\n") } cat("--------------------------------------------------------------------------------------------------------------","\n") #mclust.plots[[4]] <- NULL suppressMessages(multiplot(plotlist = mclust.plots, layout = layout)) } }
if (method == "pca"){ for( i in 1:plot.pc){ mclust.plots <- list() mclust.obj <- suppressMessages(Mclust(pca.result$x[,i], modelNames = "V")) mclust.bic.df <- data.frame("V" = as.numeric(mclust.obj$BIC), "N.clust" = attr(mclust.obj$BIC,"G")) #mclust.cer.df <- data.frame("uncertainty" = as.numeric(mclust.obj$uncertainty), # "data" = mclust.obj$data ) if(is.null(pca.result$ordered.geneinfo)){ mclust.plots[[1]] <- plot_component(pca.result$rotation[,i], paste(data.set,"_PC#_",i,sep ="")) } else { # if gene position information is available mclust.plots[[1]] <- plot_component_chr(pca.result$rotation[,i, drop = FALSE], geneinfo.df = pca.result$ordered.geneinfo, x.axis = x.axis, plot.title = paste(data.set,"_PC#_",i,sep =""), peaks = TRUE, peakresult = pca.result$peaks[[i]]) mclust.plots[[1]] <- mclust.plots[[1]] + theme(axis.text.x = element_text(angle = 45, hjust = 1)) } #mclust.plots[[4]] <- plot_component_hist(pca.result$rotation[,i], # "Gene Weight Histogram") # create a data frame for plotting if(!is.null(colnames(pca.result$x)) & !is.null(pca.result$info.df)){ # create a data frame for plotting coeff.plot.df <- data.frame( as.data.frame( pca.result$info.df[rownames(pca.result$x),]) , "IC" = pca.result$x[,i],"idx"=c(1:dim(pca.result$x)[1]) ) # Setting the colnames to covariate names colnames(coeff.plot.df)[1:dim(as.data.frame(pca.result$info.df[rownames(pca.result$x),]))[2]] <- colnames(pca.result$info.df) } else if (is.null(pca.result$info.df)) { # If there is no sample information available coeff.plot.df <- data.frame("IC" = pca.result$x[,i], "idx"=c(1:dim(pca.result$x)[1])) } else { coeff.plot.df <- data.frame(as.data.frame(pca.result$info.df), "IC" = pca.result$x[,i], "idx"=c(1:dim(pca.result$x)[1])) } # If IC is correlated to a covariate color the pot accordingly if(i %in% unique(pca.result$cov.corr.idx$PC)){ color.by <- names(which.min(pca.result$cov.pval.mx[i, as.character(pca.result$cov.corr.idx$Covariate.Name[which(pca.result$cov.corr.idx$PC == i)])])) mclust.plots[[2]] <- plot_coeff_w_legend(coeff.plot.df, k.col = color.by) + theme(legend.position ="none") + ylab("PC coefficient") if(class(pca.result$info.df[,color.by]) == "factor"){ number.of.levels <- length(levels(pca.result$info.df[,color.by])) mclust.plots[[3]] <- ggplot(coeff.plot.df, aes_string(x = color.by, y = "IC", color = color.by)) + geom_boxplot() + xlab(paste("Covariate = ",color.by,sep="")) + ylab("") + theme(axis.ticks = element_blank(), axis.text.x = element_blank()) if(number.of.levels > 10){ mclust.plots[[3]] <- mclust.plots[[3]] + theme(legend.position = "none") } } else{ mclust.plots[[3]] <- ggplot(coeff.plot.df, aes_string(x = color.by, y = "IC", color = color.by)) + geom_point() + xlab(paste("Covariate = ",color.by,sep="")) + ylab("") + theme(axis.ticks = element_blank(), axis.text.x = element_blank()) } } else { # If IC is not correlated to covariate color it using the clustering information obtained from mclust coeff.plot.df$mclust <- factor(mclust.obj$classification) mclust.plots[[2]] <- ggplot(coeff.plot.df, aes(x= idx, y = IC, color = mclust)) + geom_point(size = 3, pch = 20) + theme(legend.position = "none") + xlab("Sample idx") + ylab("PC coefficient") mclust.plots[[3]] <- ggplot(coeff.plot.df, aes(x = mclust, y = IC, color = mclust)) + geom_boxplot() + xlab(paste(mclust.obj$G,"clusters detected")) + ylab(" ") + theme(axis.ticks = element_blank(), axis.text.x = element_blank()) if(mclust.obj$G == 1){ mclust.plots[[3]] <- mclust.plots[[3]] + xlab("No Clusters detected") } } layout <- matrix(c(1,1,1,1, 2,2,3,3), byrow = T,ncol = 4) for( k in 1:length(mclust.plots)){ mclust.plots[[k]] <- ggplot_add_theme(mclust.plots[[k]]) } cat("-----------------------------------------------------------------------------------------------------------------","\n") if (i %in% unique(pca.result$cov.corr.idx$PC)){ pc.info <- data.frame(row.names = paste("PC",i,sep = ""), "Variance_Percent" = pca.result$var.percent[i], "Correlated_Covariate" = color.by, "p-value" = pca.result$cov.pval.mx[i,color.by]) print(pc.info) cat("---- Top 10 Gene Weights ---- \n") gene.table <- t(round(pca.result$rotation[order(abs(pca.result$rotation[,i]), decreasing = T)[1:10],i, drop = FALSE],digits = 2)) print(gene.table) rm(pc.info, gene.table) } else { pc.info <- data.frame(row.names = paste("PC",i,sep = ""), "Variance_Percent" = pca.result$var.percent[i], "Correlated_Covariate" = "none", "p-value" = NA) print(pc.info) cat("---- Top 10 Gene Weights ---- \n") gene.table <- t(round(pca.result$rotation[order(abs(pca.result$rotation[,i]), decreasing = T)[1:10],i, drop = FALSE],digits = 2)) print(gene.table) rm(pc.info, gene.table) } cat("-----------------------------------------------------------------------------------------------------------------","\n") suppressMessages(multiplot(plotlist = mclust.plots, layout = layout)) } }
if(method == "ica"){ if(!is.null(ica.result$hclust)){ n.ics <- dim(ica.result$A)[1] plot(ica.result$hclust$plot, labels = FALSE, main = paste("Hierarchical Clustering of Multiple ICA Runs /", n.ics,"significant ICs")) abline(h=ica.result$hclust$cutoff, col="red", lty=2) } }
if(method == "ica"){ if(!is.null(ica.result$info.df)){ multiplot(plotlist = list(summary.plots[[3]], summary.plots[[4]], summary.plots[[5]]), cols = 3) } }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.