Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
# fig.align = "center",
comment = ">"
)
## ---- eval = FALSE------------------------------------------------------------
# # install.packages("BiocManager")
# BiocManager::install("POMA")
## ---- warning = FALSE, message = FALSE, comment = FALSE-----------------------
library(POMA)
## ---- eval = FALSE------------------------------------------------------------
# data("st000336")
# PomaEDA(st000336)
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
library(patchwork)
library(knitr)
library(dplyr)
library(tibble)
library(ggplot2)
library(reshape2)
library(Biobase)
e <- t(Biobase::exprs(st000336))
target <- Biobase::pData(st000336) %>% rownames_to_column("ID") %>% rename(Group = 2) %>% select(ID, Group)
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
imputed <- PomaImpute(st000336, method = "knn")
pre_processed <- PomaNorm(imputed, method = "log_pareto") %>%
PomaOutliers(coef = 1.5)
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
# zeros
zeros <- data.frame(number = colSums(e == 0, na.rm = TRUE)) %>%
rownames_to_column("names") %>%
filter(number != 0)
all_zero <- zeros %>%
filter(number == nrow(e))
# missing values
nas <- data.frame(number = colSums(is.na(e))) %>%
rownames_to_column("names") %>%
filter(number != 0)
# zero variance
var_zero <- e %>%
as.data.frame() %>%
summarise_all(~ var(., na.rm = TRUE)) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("names") %>%
filter(V1 == 0)
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
summary_table1 <- data.frame(Samples = nrow(e),
Features = ncol(e),
Covariates = ncol(Biobase::pData(st000336)) - 1)
summary_table2 <- data.frame(Counts_Zero = sum(zeros$number),
Percent_Zero = paste(round((sum(zeros$number)/(nrow(e)*ncol(e)))*100, 2), "%"))
summary_table3 <- data.frame(Counts_NA = sum(is.na(e)),
Percent_NA = paste(round((sum(is.na(e))/(nrow(e)*ncol(e)))*100, 2), "%"))
knitr::kable(summary_table1)
knitr::kable(summary_table2)
knitr::kable(summary_table3)
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
if (nrow(nas) >= 1){
ggplot(nas, aes(reorder(names, number), number, fill = number)) +
geom_col() +
ylab("Missing values") +
xlab("") +
ggtitle("Missing Value Plot") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none")
}
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
if (nrow(zeros) >= 1){
ggplot(zeros, aes(reorder(names, number), number, fill = number)) +
geom_col() +
ylab("Zeros") +
xlab("") +
ggtitle("Zeros Plot") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none")
}
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
counts <- data.frame(table(target$Group))
colnames(counts) <- c("Group", "Counts")
ggplot(counts, aes(reorder(Group, Counts), Counts, fill = Group)) +
geom_col() +
ylab("Counts") +
xlab("") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none")
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
indNum <- nrow(Biobase::pData(pre_processed))
jttr <- ifelse(indNum <= 10, TRUE, FALSE)
p1 <- PomaBoxplots(imputed, jitter = jttr) +
xlab("Samples") +
ylab("Value") +
ggtitle("Not Normalized") +
theme(legend.position = "bottom")
p2 <- PomaBoxplots(pre_processed, jitter = jttr) +
xlab("Samples") +
ylab("Value") +
ggtitle("Normalized ('log_pareto')") +
theme(legend.position = "bottom")
p1 + p2
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
p3 <- PomaDensity(imputed) +
ggtitle("Not Normalized")
p4 <- PomaDensity(pre_processed) +
ggtitle("Normalized ('log_pareto')")
p3/p4
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
outliers <- st000336 %>%
PomaImpute(method = "knn") %>%
PomaNorm(method = "log_pareto") %>%
PomaOutliers(do = "analyze", coef = 1.5)
outliers$polygon_plot
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
if(nrow(outliers$outliers) >= 1){
knitr::kable(outliers$outliers)
}
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
correlations <- PomaCorr(pre_processed)
high_correlations <- correlations$correlations %>% filter(abs(corr) > 0.97)
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
PomaHeatmap(pre_processed)
## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
PomaMultivariate(pre_processed, method = "pca")$scoresplot
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.