#' ETL
#' @description Run a pipeline to prepare all datasets required by the app
#' @param .project character, the project id. Only data belonging
#' to this project will be prepared for loading into the app
#' @param .project_data list, a list of datasets
#' @details In 'production' mode, the app will load required datasets after the user
#' chooses a project id string. For testing individual functions,
#' we can supply a data object directly to the function.
#'
#' @return A list of three lists:
#' - `$meta`, a list of one data frame with the metadata corresponding to `.project`
#' - `$data`, a list of six data frames with the raw data corresponding to `.project`
#' - `$app`, a list of four tables merging or transforming objects from `$data`
#' to be used by app modules
#'
#' @export
etl <- function(.project) {
.project_data <- load_project_data(.project = .project)
qc_table <- prep_qc_table(.project_data = .project_data, .project = NULL)
qc_barplot <- prep_qc_barplot_table(.project_data = .project_data, .project = NULL)
indels <- prep_indel_table(.project_data = .project_data, .project = NULL)
align <- prep_alignment_table(.project_data = .project_data, .project = NULL)
allele <- prep_allele_table(.project_data = .project_data, .project = NULL)
app_tables <-
list(
AppQCTable = qc_table
, AppQCBarplot = qc_barplot
, AppIndels = indels
, AppAlign = align
, AppAllele = allele
)
append(.project_data, list(app = app_tables))
}
#' @describeIn etl Merge CRISPRSummary and QCTable to calculate the percent
#' of usable reads to display in the [read qc module](mod_read_qc_ui)
#' @export
prep_qc_table <-
function(.project_data = NULL,
.project = NULL) {
if (all(is.null(.project_data), is.null(.project))) {
stop("Please provide either a project id string, or project data object")
}
if (is.null(.project_data)) {
.project_data <- load_project_data(.project)
}
tmp_df <-
dplyr::left_join(.project_data$data$CRISPRSummary,
.project_data$data$QCTable,
by = "SampleName")
if ("ExperimentId" %in% names(tmp_df)) {
join_by <- c("ExperimentId", "SampleName")
} else {
join_by <- "SampleName"
}
dplyr::left_join(tmp_df, .project_data$data$SampleMeta, by = join_by) %>%
dplyr::select(dplyr::all_of(names(.project_data$data$SampleMeta)),
TargetId,
SampleName,
dplyr::everything()) %>%
dplyr::mutate(PercentUsableTotalReads = as.numeric(UsableReadsTotal) / as.numeric(NumberOfReads) * 100) %>%
dplyr::mutate(PercentUsableTotalReads = round(PercentUsableTotalReads, 2))
}
#' @describeIn etl Merge the SampleMeta and EditingFreq datasets to generate
#' a barplot of the percent modified/unmodified reads per sample to display in
#' the [read qc barplot module](mod_read_qc_barplot_ui)
#' @export
prep_qc_barplot_table <- function(.project_data = NULL, .project = NULL) {
if (all(is.null(.project_data), is.null(.project))) {
stop("Please provide either a project id string, or project data object")
}
if (is.null(.project_data)) {
.project_data <- load_project_data(.project)
}
.project_data$data$SampleMeta %>%
dplyr::mutate_all(as.character) %>%
dplyr::left_join(
x = .project_data$data$EditingFreq,
y = .,
by = c("ExperimentId", "SampleName")) %>%
dplyr::mutate(TargetId = as.character(TargetId))
}
#' @describeIn etl Merge the SampleMeta and IndelFreq datasets to generate
#' a scatter plot of the indel species distribution, later displayed in the
#' the [indel distributon module](mod_indel_distribution_ui)
#' @export
prep_indel_table <- function(.project_data = NULL, .project = NULL) {
if (all(is.null(.project_data), is.null(.project))) {
stop("Please provide either a project id string, or project data object")
}
if (is.null(.project_data)) {
.project_data <- load_project_data(.project)
}
dplyr::left_join(
x = .project_data$data$IndelFreq,
y = .project_data$data$SampleMeta,
by = c("ExperimentId", "SampleName")
)
}
#' @describeIn etl Transform the AlleleAlignFreq table for loading into the app,
#' to display in the [sequence alignment module](mod_seq_alignment_ui)
#'
#' @export
prep_alignment_table <- function(.project_data = NULL, .project = NULL) {
if (all(is.null(.project_data), is.null(.project))) {
stop("Please provide either a project id string, or project data object")
}
if (is.null(.project_data)) {
.project_data <- load_project_data(.project)
}
.project_data$data$AlleleAlignFreq %>%
dplyr::left_join(., .project_data$data$SampleMeta, by = c("ExperimentId", "SampleName")) %>%
dplyr::mutate(PctReads = round(as.numeric(PctReads), 2)) %>%
dplyr::mutate(NumberOfReads = round(as.numeric(NumberOfReads), 2))
}
#' @describeIn etl Transform the AlleleAlignFreq table for loading into the app,
#' to display in the [allele frequency module](mod_allele_frequency_ui)
#'
#' @export
prep_allele_table <- function(.project_data = NULL, .project = NULL) {
if (all(is.null(.project_data), is.null(.project))) {
stop("Please provide either a project id string, or project data object")
}
if (is.null(.project_data)) {
.project_data <- load_project_data(.project)
}
group_by_list <- c(
names(.project_data$data$SampleMeta),
"TargetId",
"SampleName",
"GdnaId",
"MutationType"
) %>% unique()
.project_data$data$AlleleAlignFreq %>%
dplyr::left_join(.,
.project_data$data$SampleMeta,
by = c("ExperimentId", "SampleName")) %>%
dplyr::mutate(MutationType = ifelse(MutationType == "NA", NA_character_, MutationType)) %>%
dplyr::group_by(!!!rlang::syms(group_by_list)) %>%
dplyr::mutate(PctReads = as.numeric(PctReads)) %>%
dplyr::summarise_at("PctReads", mean, na.rm = TRUE) %>%
tidyr::drop_na(MutationType) %>%
tidyr::pivot_wider(names_from = MutationType, values_from = PctReads) %>%
dplyr::mutate(GrandTotal = sum(Indel, `Indel+Subs`, Subs, na.rm = TRUE)) %>%
dplyr::ungroup() %>%
dplyr::mutate_if(is.numeric, round, 2) %>%
dplyr::mutate_if(is.factor, as.character)
}
#' load_project_data
#' @importFrom parallel detectCores
#' @noRd
load_project_data <- function(.project) {
if (golem::app_prod()) {
print("prod")
meta_data <- readRDS("inst/META/AllExperimentMetaData.RDS")
data_data <- qs::qread(paste0("inst/DATA/", .project, ".qs"))
# meta_data <-
# system.file("AllExperimentMetaData.RDS",
# package = "gene.editing.dash",
# mustWork = TRUE) %>% readRDS()
#
# data_data <-
# system.file(paste0(.project, ".RDS"),
# package = "gene.editing.dash",
# mustWork = TRUE) %>% readRDS()
} else {
meta_data <- readRDS("inst/META/AllExperimentMetaData.RDS")
data_data <- qs::qread(file = paste0("inst/DATA/", .project, ".qs"), nthreads = parallel::detectCores()-1)
}
list(
meta = meta_data,
data = data_data
)
}
#' find possible meta columns
#' @description Try to coerce to numeric, and grab columns that fail as
#' possible categorical variables that can be used to build menus for
#' faceting or color-coding
#' @param .data the table used in a module where we wish to generate menus of column
#' name options
#' @export
find_possible_meta_columns <- function(.data) {
suppressWarnings(
.data %>%
dplyr::mutate_if(is.factor, as.character) %>%
purrr::map(as.numeric) %>%
purrr::keep( ~ all(is.na(.))) %>%
names()
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.