inst/doc/POMA-eda.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  # fig.align = "center",
  comment = ">"
)

## ---- eval = FALSE------------------------------------------------------------
#  # install.packages("BiocManager")
#  BiocManager::install("POMA")

## ---- warning = FALSE, message = FALSE, comment = FALSE-----------------------
library(POMA)

## ---- eval = FALSE------------------------------------------------------------
#  data("st000336")
#  PomaEDA(st000336)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
library(patchwork)
library(knitr)
library(dplyr)
library(tibble)
library(ggplot2)
library(reshape2)
library(Biobase)

e <- t(Biobase::exprs(st000336))
target <- Biobase::pData(st000336) %>% rownames_to_column("ID") %>% rename(Group = 2) %>% select(ID, Group)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
imputed <- PomaImpute(st000336, method = "knn")
pre_processed <- PomaNorm(imputed, method = "log_pareto") %>%
  PomaOutliers(coef = 1.5)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
# zeros
zeros <- data.frame(number = colSums(e == 0, na.rm = TRUE)) %>%
  rownames_to_column("names") %>%
  filter(number != 0)

all_zero <- zeros %>% 
  filter(number == nrow(e))

# missing values
nas <- data.frame(number = colSums(is.na(e))) %>%
  rownames_to_column("names") %>%
  filter(number != 0)

# zero variance
var_zero <- e %>%
  as.data.frame() %>%
  summarise_all(~ var(., na.rm = TRUE)) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("names") %>%
  filter(V1 == 0)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
summary_table1 <- data.frame(Samples = nrow(e),
                             Features = ncol(e),
                             Covariates = ncol(Biobase::pData(st000336)) - 1)
summary_table2 <- data.frame(Counts_Zero = sum(zeros$number),
                             Percent_Zero = paste(round((sum(zeros$number)/(nrow(e)*ncol(e)))*100, 2), "%"))
summary_table3 <- data.frame(Counts_NA = sum(is.na(e)),
                             Percent_NA = paste(round((sum(is.na(e))/(nrow(e)*ncol(e)))*100, 2), "%")) 
knitr::kable(summary_table1)
knitr::kable(summary_table2)
knitr::kable(summary_table3)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
if (nrow(nas) >= 1){
  ggplot(nas, aes(reorder(names, number), number, fill = number)) +
    geom_col() +
    ylab("Missing values") +
    xlab("") +
    ggtitle("Missing Value Plot") +
    theme_bw() + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "none")
}

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
if (nrow(zeros) >= 1){
  ggplot(zeros, aes(reorder(names, number), number, fill = number)) +
    geom_col() +
    ylab("Zeros") +
    xlab("") +
    ggtitle("Zeros Plot") +
    theme_bw() + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "none")
}

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
counts <- data.frame(table(target$Group))
colnames(counts) <- c("Group", "Counts")

ggplot(counts, aes(reorder(Group, Counts), Counts, fill = Group)) +
  geom_col() +
  ylab("Counts") +
  xlab("") +
  theme_bw() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none")

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
indNum <- nrow(Biobase::pData(pre_processed))
jttr <- ifelse(indNum <= 10, TRUE, FALSE)

p1 <- PomaBoxplots(imputed, jitter = jttr) +
  xlab("Samples") +
  ylab("Value") +
  ggtitle("Not Normalized") +
  theme(legend.position = "bottom")

p2 <- PomaBoxplots(pre_processed, jitter = jttr) +
  xlab("Samples") +
  ylab("Value") +
  ggtitle("Normalized ('log_pareto')") +
  theme(legend.position = "bottom")

p1 + p2

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
p3 <- PomaDensity(imputed) +
  ggtitle("Not Normalized")

p4 <- PomaDensity(pre_processed) +
    ggtitle("Normalized ('log_pareto')")

p3/p4

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
outliers <- st000336 %>% 
  PomaImpute(method = "knn") %>%
  PomaNorm(method = "log_pareto") %>%
  PomaOutliers(do = "analyze", coef = 1.5)
outliers$polygon_plot

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
if(nrow(outliers$outliers) >= 1){
  knitr::kable(outliers$outliers)
  }

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
correlations <- PomaCorr(pre_processed)
high_correlations <- correlations$correlations %>% filter(abs(corr) > 0.97)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
PomaHeatmap(pre_processed)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
PomaMultivariate(pre_processed, method = "pca")$scoresplot

Try the POMA package in your browser

Any scripts or data that you put into this service are public.

POMA documentation built on Nov. 8, 2020, 6:26 p.m.