# This file is part of POMA. # POMA is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # POMA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with POMA. If not, see <https://www.gnu.org/licenses/>. library(patchwork) library(knitr) library(dplyr) library(tibble) library(ggplot2) library(reshape2) library(Biobase) library(POMA) e <- t(Biobase::exprs(data)) target <- Biobase::pData(data) %>% rownames_to_column("ID") %>% rename(Group = 2) %>% select(ID, Group)
\pagebreak
if(clean_outliers){ imputed <- PomaImpute(data, method = imputation) pre_processed <- PomaNorm(imputed, method = normalization) %>% PomaOutliers(coef = coeff_outliers) } else { imputed <- PomaImpute(data, method = imputation) pre_processed <- PomaNorm(imputed, method = normalization) }
# zeros zeros <- data.frame(number = colSums(e == 0, na.rm = TRUE)) %>% rownames_to_column("names") %>% filter(number != 0) all_zero <- zeros %>% filter(number == nrow(e)) # missing values nas <- data.frame(number = colSums(is.na(e))) %>% rownames_to_column("names") %>% filter(number != 0) # zero variance var_zero <- e %>% as.data.frame() %>% summarise_all(~ var(., na.rm = TRUE)) %>% t() %>% as.data.frame() %>% rownames_to_column("names") %>% filter(V1 == 0)
Your data have r nrow(e)
samples, r ncol(e)
features and r length(table(target$Group))
groups, that are r noquote(paste(shQuote(levels(as.factor(target$Group))), collapse=", "))
. r ifelse(ncol(Biobase::pData(data)) > 1, paste0("Furthermore, **", ncol(Biobase::pData(data)) - 1,"** covariates have been found in your data. These covariates are **",noquote(paste(shQuote(paste0(colnames(Biobase::pData(data))[2:ncol(Biobase::pData(data))])), collapse=", ")),"**."), "")
A r round((sum(is.na(e))/(nrow(e)*ncol(e)))*100, 2)
% of values in your data are NAs (missing values). r ifelse(nrow(nas) >= 1, paste0("Variables that have NA values are **",noquote(paste(shQuote(paste0(nas$names," (",nas$number,")")), collapse=", ")),"**."), "")
A r round((sum(zeros$number)/(nrow(e)*ncol(e)))*100, 2)
% of values in your data are zeros. r ifelse(nrow(zeros) >= 1, paste0("Variables that have zeros are **",noquote(paste(shQuote(paste0(zeros$names," (",zeros$number,")")), collapse=", ")),"**."), "")
Removed from the exploratory data analysis r nrow(all_zero)
features that only have zeros. r ifelse(nrow(all_zero) >= 1, paste0("These variables are **",noquote(paste(shQuote(all_zero$names), collapse=", ")),"**."), "")
Removed from the exploratory data analysis r nrow(var_zero)
features that have zero variance. r ifelse(nrow(var_zero) >= 1, paste0("These variables are **",noquote(paste(shQuote(var_zero$names), collapse=", ")),"**."), "")
summary_table1 <- data.frame(Samples = nrow(e), Features = ncol(e), Covariates = ncol(Biobase::pData(data)) - 1) summary_table2 <- data.frame(Counts_Zero = sum(zeros$number), Percent_Zero = paste(round((sum(zeros$number)/(nrow(e)*ncol(e)))*100, 2), "%")) summary_table3 <- data.frame(Counts_NA = sum(is.na(e)), Percent_NA = paste(round((sum(is.na(e))/(nrow(e)*ncol(e)))*100, 2), "%")) knitr::kable(summary_table1) knitr::kable(summary_table2) knitr::kable(summary_table3)
if (nrow(nas) >= 1){ ggplot(nas, aes(reorder(names, number), number, fill = number)) + geom_col() + ylab("Missing values") + xlab("") + ggtitle("Missing Value Plot") + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") }
if (nrow(zeros) >= 1){ ggplot(zeros, aes(reorder(names, number), number, fill = number)) + geom_col() + ylab("Zeros") + xlab("") + ggtitle("Zeros Plot") + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") }
counts <- data.frame(table(target$Group)) colnames(counts) <- c("Group", "Counts") ggplot(counts, aes(reorder(Group, Counts), Counts, fill = Group)) + geom_col() + ylab("Counts") + xlab("") + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
indNum <- nrow(Biobase::pData(pre_processed)) jttr <- ifelse(indNum <= 10, TRUE, FALSE) p1 <- PomaBoxplots(imputed, jitter = jttr) + xlab("Samples") + ylab("Value") + ggtitle("Not Normalized") + theme(legend.position = "bottom") p2 <- PomaBoxplots(pre_processed, jitter = jttr) + xlab("Samples") + ylab("Value") + ggtitle(paste0("Normalized (", normalization, ")")) + theme(legend.position = "bottom") p1 + p2
p3 <- PomaDensity(imputed) + ggtitle("Not Normalized") p4 <- PomaDensity(pre_processed) + ggtitle(paste0("Normalized (", normalization, ")")) p3/p4
outliers <- data %>% PomaImpute(method = imputation) %>% PomaNorm(method = normalization) %>% PomaOutliers(do = "analyze", coef = coeff_outliers) outliers$polygon_plot
r nrow(outliers$outliers)
possible outliers detected in your data. r ifelse(nrow(outliers$outliers) >= 1, paste0("These outliers are **",noquote(paste(shQuote(paste0(outliers$outliers$sample)), collapse=", ")),"**."), "")
if(nrow(outliers$outliers) >= 1){ knitr::kable(outliers$outliers) }
correlations <- PomaCorr(pre_processed) high_correlations <- correlations$correlations %>% filter(abs(corr) > 0.97)
There are r nrow(high_correlations)
high correlated feature pairs in your data. r ifelse(nrow(high_correlations) >= 1, paste0("These features are **",noquote(paste(shQuote(paste0(high_correlations$Var1, " - " , high_correlations$Var2)), collapse=", ")),"**."), "")
PomaHeatmap(pre_processed)
PomaMultivariate(pre_processed, method = "pca")$scoresplot
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.