#' IGC Input and Rarefaction Curves/Basic Comparative
#'
#' @param mre adsf
#'
#' @return NULL asdf
#' @export
#'
#' @import dplyr
#' @import ggplot2
#' @import stringr
#' @import purrr
#' @import tidyr
#' @importFrom grDevices boxplot.stats cairo_pdf
#' @importFrom stats lm na.omit quantile reorder setNames
#' @importFrom ggforce facet_row
#'
#' @examples
igc_refraction <- function(mre) {
dataTable <- mre$dataTable
metadata_df <- mre$metadata_df
categorical_vals <- mre$categorical_vals
numeric_vals <- mre$numeric_vals
longitudinal_vals <- mre$longitudinal_vals
# Logg info
IGCperc <- 0.02
message("Starting IGC refraction")
message("IGC percentage to use as a threshold: ", IGCperc)
## ===========================================================
## CATEGORIC PLOTS
## PREPORCESSING DATA
prepro_1 <- dataTable %>%
as_tibble() %>%
select(SampleID, InitialReads, NumberMappedReads) %>%
distinct_all() %>%
arrange(InitialReads) %>%
mutate(SampleID = factor(SampleID, levels = SampleID),
InitialReads = InitialReads - NumberMappedReads) %>%
gather("Step", "value", -SampleID) %>%
inner_join(metadata_df, by = "SampleID")
prepro_2 <- dataTable %>%
as_tibble() %>%
select(SampleID, InitialReads, NumberMappedReads) %>%
distinct_all() %>%
mutate(SampleID = factor(SampleID, levels = SampleID),
NumberMappedReads = NumberMappedReads / InitialReads * 100,
InitialReads = 100 - NumberMappedReads) %>%
arrange(NumberMappedReads) %>%
mutate(idx = 1:nrow(.)) %>%
gather("Step", "value", -SampleID, -idx) %>%
inner_join(metadata_df, by = "SampleID")
## STACKED BARPLOTS
plot_list <- list()
plot_list[['StackedBarplot1']] <- prepro_1 %>%
select(SampleID, Step, value) %>%
drop_na() %>%
ggplot(aes(x = SampleID, y = value, fill = Step)) +
geom_bar(stat = "Identity", size = 0.5, alpha = 0.75) +
labs(x = "", y = "Million PE reads", title = "IGC Mapped Reads") +
theme_minimal() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1),
axis.text=element_text(size = 6)) +
ggsave(filename = str_c("CatalogMapping/IGC/IGCMapping_StackedBarplot1.pdf"),
device = cairo_pdf, width = 16, height = 10, dpi = 600)
plot_list[['StackedBarplot1_by_vars']] <- categorical_vals %>%
pull(CategoricalVariable) %>%
purrr::set_names() %>%
purrr::map(function(cat_var) {
prepro_1 %>%
select(SampleID, Step, value, cat_var) %>%
na.omit() %>%
ggplot(aes(x = SampleID, y = value, fill = Step)) +
geom_bar(stat="Identity", size = 0.5, alpha = 0.75) +
facet_row(ensym(cat_var), scales = 'free_x', space = 'free') +
labs(x = "", y = "Million PE reads", title = "IGC Mapped Reads") +
theme_minimal() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1),
axis.text=element_text(size = 6)) +
ggsave(filename = str_c("CatalogMapping/IGC/IGCMapping_StackedBarplot2_by_",cat_var,".pdf"),
device = cairo_pdf, width = 16, height = 10, dpi = 600)
})
plot_list[['StackedBarplot2']] <- prepro_2 %>%
select(SampleID, Step, value, idx) %>%
drop_na() %>%
ggplot(aes(x = reorder(SampleID, idx), y = value, fill = Step)) +
geom_bar(stat = "Identity", size = 0.5, alpha = 0.75) +
labs(x = "", y = "PE reads (%)", title = "% of IGC Mapped Reads") +
theme_minimal() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1),
axis.text=element_text(size = 6)) +
ggsave(filename = str_c("CatalogMapping/IGC/IGCMapping_StackedBarplot3.pdf"),
device = cairo_pdf, width = 16, height = 10, dpi = 600)
plot_list[['StackedBarplot2_by_vars']] <- categorical_vals %>%
pull(CategoricalVariable) %>%
purrr::set_names() %>%
purrr::map(function(cat_var) {
prepro_2 %>%
select(SampleID, Step, value, cat_var, idx) %>%
na.omit() %>%
ggplot(aes(x = reorder(SampleID, idx), y = value, fill = Step)) +
geom_bar(stat="Identity", size = 0.5, alpha = 0.75) +
facet_row(ensym(cat_var), scales = 'free_x', space = 'free') +
labs(x = "", y = "PE reads (%)", title = "% of IGC Mapped Reads") +
theme_minimal() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1),
axis.text=element_text(size = 6)) +
ggsave(filename = str_c("CatalogMapping/IGC/IGCMapping_StackedBarplot4_by_",cat_var,".pdf"),
device = cairo_pdf, width = 16, height = 10, dpi = 600)
})
## REFRACTION PLOTS
plot_list[['Refraction']] <- dataTable %>%
ggplot(aes(ReadCountReal, GeneNumber, colour = SampleID)) +
geom_line(size = 0.5) +
geom_point(size = 1) +
ggsave(filename = str_c("CatalogMapping/IGC/IGCMapping_RarefactionPlot.pdf"),
device = cairo_pdf, width = 16, height = 10, dpi = 600)
plot_list[['Refraction_by_vars']] <- categorical_vals %>%
pull(CategoricalVariable) %>%
purrr::set_names() %>%
purrr::map(function(cat_var) {
dataTable %>%
as_tibble() %>%
select(SampleID, ReadCountReal, GeneNumber) %>%
inner_join(metadata_df %>% select(SampleID, Arm), by = 'SampleID') %>%
setNames(c("SampleID","ReadCountReal","GeneNumber","Category")) %>%
ggplot(aes(ReadCountReal,GeneNumber, color = Category)) +
labs(title = str_c("IGC Rarefaction Plots by", cat_var)) +
geom_line(size = 0.5,aes(color = Category,group = SampleID)) +
geom_point(size = 1) +
ggsave(filename = str_c("CatalogMapping/IGC/IGCMapping_RarefactionPlotBy_",cat_var,".pdf"),
device = cairo_pdf, width = 16, height = 10, dpi = 600)
})
plot_list[['Refraction_boxplot']] <- c("InitialReads","NumberMappedReads","ReadCountReal","GeneNumber") %>%
purrr::set_names() %>%
map(function(col) {
categorical_vals %>%
pull(CategoricalVariable) %>%
purrr::set_names() %>%
purrr::map(function(cat_var) {
dataTable %>%
select(SampleID, !!col) %>%
inner_join(metadata_df %>% select(SampleID, !!cat_var), by = 'SampleID') %>%
select(!!cat_var, !!col) %>%
boxplot_single_var(., outdir = "CatalogMapping/IGC", palette_names = 'Set1',
filename = str_c("IGCMapping_Boxplot_",col,"_By_",cat_var,".pdf"))
})
})
plot_list[['Refraction_boxplot_thres']] <- categorical_vals %>%
pull(CategoricalVariable) %>%
purrr::set_names() %>%
purrr::map(function(cat_var) {
thres <- quantile(dataTable$NumberMappedReads, probs = 0.05)
dataTable %>%
as_tibble() %>%
dplyr::filter(ReadCountReal >= thres) %>%
dplyr::filter(!duplicated(SampleID)) %>%
inner_join(metadata_df %>% select(SampleID, !!cat_var), by = 'SampleID') %>%
select(!!cat_var, "GeneNumber") %>%
boxplot_single_var(., outdir = "CatalogMapping/IGC", palette_names = 'Set1',
title = str_c("Gene Count by ", cat_var, ' at ',thres, ' depth'),
filename = str_c("IGCGeneNumber_Boxplot_By_", cat_var, ".pdf"))
})
## ===========================================================
## LONGITUDINAL PLOTS
if(!is.null(longitudinal_vals)) {
path <- "CatalogMapping/IGC/Longitudinal/"
dir.create(path, showWarnings = F, recursive = T)
plot_list[['Loitudinal_plots_1']] <- longitudinal_vals %>%
pull(LongitudinalVariable) %>%
purrr::set_names() %>%
purrr::map(function(long_var) {
thres <- quantile(dataTable$NumberMappedReads, probs = 0.05)
link_var <- longitudinal_vals %>%
dplyr::filter(LongitudinalVariable == long_var) %>%
pull(LinkVariable)
pp_1 <- dataTable %>%
as_tibble() %>%
dplyr::filter(ReadCountReal >= thres) %>%
dplyr::filter(!duplicated(SampleID)) %>%
inner_join(metadata_df, by = 'SampleID') %>%
select(SampleID, !!link_var, !!long_var, GeneNumber) %>%
setNames(c("SampleID","LinkVariable","TimeVariable","GeneRichness")) %>%
mutate(TimeVariable = as.numeric(TimeVariable)) %>%
ggplot(aes(TimeVariable, GeneRichness, group = LinkVariable, colour = LinkVariable)) +
geom_line() +
geom_point(size = 2) +
geom_smooth(stat = "smooth", aes(group = NULL), method = 'loess') +
labs(title = str_c("Gene Richness (IGC, ",thres," reads) vs time")) +
ggsave(str_c(path, "/Longitudinal_IGC_",link_var,".pdf"),
device = cairo_pdf, width = 16, height = 10, dpi = 600)
pp_2 <- categorical_vals %>%
pull(CategoricalVariable) %>%
purrr::set_names() %>%
purrr::map(function(cat_var) {
dataTable %>%
as_tibble() %>%
dplyr::filter(ReadCountReal >= thres) %>%
dplyr::filter(!duplicated(SampleID)) %>%
inner_join(metadata_df, by = 'SampleID') %>%
select(SampleID, !!link_var, !!long_var, !!cat_var, GeneNumber) %>%
setNames(c("SampleID","LinkVariable","TimeVariable","GroupVariable","GeneRichness")) %>%
ggplot(aes(TimeVariable, GeneRichness, group = LinkVariable)) +
geom_line(aes(color = GroupVariable)) +
geom_point(size = 2) +
geom_smooth(aes(group = GroupVariable, color = GroupVariable), method = 'loess') +
labs(title = str_c("Gene Richness (IGC,",thres," reads) vs time for by ",cat_var)) +
ggsave(str_c(path, "/Longitudinal_IGC_by_",cat_var, "_and_", link_var,".pdf"),
device = cairo_pdf, width = 16, height = 10, dpi = 600)
})
return(list(pp_1, pp_2))
})
plot_list[['Loitudinal_plots_2']] <- categorical_vals %>%
pull(CategoricalVariable) %>%
purrr::set_names() %>%
purrr::map(function(cat_var) {
metadata_df %>%
pull(!!cat_var) %>%
levels(.) %>%
purrr::set_names() %>%
purrr::map(function(lev) {
longitudinal_vals %>%
pull(LongitudinalVariable) %>%
purrr::set_names() %>%
purrr::map(function(long_var) {
link_var <- longitudinal_vals %>%
dplyr::filter(LongitudinalVariable == long_var) %>%
pull(LinkVariable)
thres <- quantile(dataTable$NumberMappedReads, probs = 0.05)
pre <- dataTable %>%
as_tibble() %>%
dplyr::filter(ReadCountReal >= thres) %>%
dplyr::filter(!duplicated(SampleID)) %>%
inner_join(metadata_df, by = 'SampleID') %>%
dplyr::filter(!!sym(cat_var) == lev)
if (nrow(pre) < 5) { return(NULL) }
pp_1 <- pre %>%
select(SampleID, !!link_var, !!long_var, GeneNumber) %>%
setNames(c("SampleID","LinkVariable","TimeVariable","GeneRichness")) %>%
ggplot(aes(TimeVariable, GeneRichness, group = LinkVariable)) +
geom_line() +
geom_point(size = 2) +
geom_smooth(stat = "smooth", aes(group = NULL)) +
labs(title = str_c("Gene Richness (IGC,",thres," reads) vs time")) +
ggsave(str_c("CatalogMapping/IGC/Longitudinal/Longitudinal_IGC_",cat_var,"-",lev,"_",long_var,".pdf"),
device = cairo_pdf, width = 16, height = 10, dpi = 600)
if (nrow(categorical_vals) >= 2) {
pp_2 <- categorical_vals %>%
pull(CategoricalVariable) %>%
purrr::set_names() %>%
purrr::map(function(cat_var2) {
if (cat_var == cat_var2) { return(NULL) }
pre_plot <- pre %>%
select(SampleID, !!link_var, !!long_var, !!cat_var2, GeneNumber) %>%
setNames(c("SampleID","LinkVariable","TimeVariable","GroupVariable","GeneRichness")) %>%
ggplot(aes(TimeVariable, GeneRichness, group = LinkVariable)) +
geom_line(aes(color = GroupVariable)) +
geom_point(size = 2) +
labs(title = str_c("Gene Richness (IGC,",thres," reads) vs time for by ", cat_var2)) +
ggsave(str_c("CatalogMapping/IGC/Longitudinal/Longitudinal_IGC_",cat_var,"-",
lev,"_by_", cat_var2, "_and_", long_var,".pdf"),
device = cairo_pdf, width = 16, height = 10, dpi = 600)
})
}
if (nrow(categorical_vals) >= 2) {
return(list(pp_1, pp_2))
} else {
return(pp_1)
}
})
})
})
}
## ===========================================================
## NUMERICAL PLOTS
if(!is.null(numeric_vals)) {
path <- "CatalogMapping/IGC/Correlations"
dir.create(path, recursive = T, showWarnings = F)
plot_list[['Correlation_plots_1']] <- numeric_vals %>%
pull(NumericalVariable) %>%
purrr::set_names() %>%
purrr::map(function(num_var) {
thres <- quantile(dataTable$NumberMappedReads, probs = 0.05)
prepro <- dataTable %>%
as_tibble() %>%
dplyr::filter(ReadCountReal >= thres) %>%
dplyr::filter(!duplicated(SampleID)) %>%
inner_join(metadata_df, by = "SampleID") %>%
select(!!num_var, "GeneNumber") %>%
setNames(c("Response","GeneNumber"))
fit1 <- lm(Response ~ GeneNumber, data = prepro)
ggplotRegression(fit1) +
ggsave(str_c(path, "/IGC_vs_",num_var,"_corr.pdf"),
device = cairo_pdf, width = 16, height = 10, dpi = 600)
})
}
plot_list[['Correlation_plots_2']] <- metadata_df %>%
select(where(is.numeric)) %>%
colnames() %>%
purrr::set_names() %>%
purrr::map(function(num_col) {
thres <- quantile(dataTable$NumberMappedReads, probs = 0.05)
prepro <- dataTable %>%
as_tibble() %>%
dplyr::filter(ReadCountReal >= thres) %>%
dplyr::filter(!duplicated(SampleID)) %>%
inner_join(metadata_df, by = "SampleID") %>%
select(!!num_col, "GeneNumber") %>%
setNames(c("Response","GeneNumber"))
fit1 <- lm(Response ~ GeneNumber, data = prepro)
ggplotRegression(fit1) +
ggsave(str_c(path, "/IGC_vs_",num_col,"_corr.pdf"),
device = cairo_pdf, width = 16, height = 10, dpi = 600)
})
## ===========================================================
## SAVE DATA AND WORKSPACE
dataTable %>%
as_tibble() %>%
dplyr::filter(ReadCountReal >= quantile(dataTable$NumberMappedReads, probs = 0.05)) %>%
readr::write_csv(file = str_c("CatalogMapping/IGC_at_",
quantile(dataTable$NumberMappedReads, probs = 0.05),".tsv"))
save.image(file = "CatalogMapping/IGC/IGC_RAnalysis.RData", compress = 'xz')
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.