Nothing
#' qc_rcc
#'
#' @inheritParams load_rcc
#' @param nacho_df [[data.frame]] A `data.frame` with all columns from the sample sheet `ssheet_csv`
#' and all computed columns, *i.e.*, quality-control metrics and counts, with one sample per row.
#'
#' @keywords internal
#' @usage NULL
#' @noRd
#'
#' @return [[list]]
qc_rcc <- function(
data_directory,
nacho_df,
id_colname,
housekeeping_genes,
housekeeping_predict,
housekeeping_norm,
normalisation_method,
n_comp
) {
Name <- CodeClass <- NULL # no visible binding for global variable
has_hkg <- grepl("Housekeeping", nacho_df[["CodeClass"]])
if (is.null(housekeeping_genes) & any(has_hkg)) {
housekeeping_genes <- nacho_df[["Name"]][has_hkg]
housekeeping_genes <- unique(housekeeping_genes)
}
control_genes_df <- format_counts(
data = nacho_df[Name %in% housekeeping_genes | !grepl("Endogenous", CodeClass)],
id_colname = id_colname,
count_column = "Count"
)
probes_to_exclude <- probe_exclusion(control_genes_df = control_genes_df)
if (housekeeping_predict) {
message("[NACHO] Searching for the best housekeeping genes.")
message(
'[NACHO] Computing normalisation factors using "',
normalisation_method,
'" method for housekeeping genes prediction.'
)
temp_facs <- factor_calculation(
nacho_df = nacho_df,
id_colname = id_colname,
housekeeping_genes = housekeeping_genes,
housekeeping_predict = housekeeping_predict,
normalisation_method = normalisation_method,
exclude_probes = probes_to_exclude
)
tmp_counts <- merge(
x = nacho_df[
j = .SD,
.SDcols = c(id_colname, setdiff(colnames(nacho_df), colnames(temp_facs)))
],
y = temp_facs,
by = id_colname,
all = TRUE
)
tmp_counts[["count_norm"]] <- normalise_counts(data = tmp_counts, housekeeping_norm = FALSE)
predicted_housekeeping <- find_housekeeping(
data = data.table::setDT(tmp_counts),
id_colname = id_colname,
count_column = "count_norm"
)
if (is.null(predicted_housekeeping) | length(predicted_housekeeping) == 0) {
message("[NACHO] Could not find suitable houskeeping genes, default will be used.")
} else {
message(
"[NACHO] The following predicted housekeeping genes will be used for normalisation:\n",
paste0(" - ", predicted_housekeeping, collapse = "\n")
)
housekeeping_genes <- predicted_housekeeping
control_genes_df <- format_counts(
data = nacho_df[Name %in% housekeeping_genes],
id_colname = id_colname,
count_column = "Count"
)
rownames(control_genes_df) <- control_genes_df[["Name"]]
}
}
message('[NACHO] Computing normalisation factors using "', normalisation_method, '" method.')
qc_values <- qc_features(data = nacho_df, id_colname = id_colname)
norm_factor <- factor_calculation(
nacho_df = nacho_df,
id_colname = id_colname,
housekeeping_genes = housekeeping_genes,
housekeeping_predict = FALSE,
normalisation_method = normalisation_method,
exclude_probes = probes_to_exclude
)
counts_df <- format_counts(
data = nacho_df,
id_colname = id_colname,
count_column = "Count"
)
counts_df <- counts_df[j = .SD, .SDcols = is.numeric]
if (n_comp > (ncol(counts_df) - 1)) {
message(paste('"n_comp" has been set to "n-1:"', (ncol(counts_df) - 1)))
n_comp <- (ncol(counts_df) - 1)
}
if (anyNA(counts_df)) {
message("[NACHO] Missing values have been replaced with zeros for PCA.")
counts_df_tmp <- as.matrix(counts_df)
counts_df_tmp[is.na(counts_df_tmp)] <- 0
} else {
counts_df_tmp <- counts_df
}
pcas <- qc_pca(counts = counts_df_tmp, n_comp = n_comp)
pcsum <- as.data.frame(t(pcas[["pcsum"]]), stringsAsFactors = FALSE)
rownames(pcsum) <- pcsum[["PC"]] <- sprintf("PC%02d", as.numeric(sub("PC", "", rownames(pcsum))))
pcas_pc <- as.data.frame(pcas[["pc"]], stringsAsFactors = FALSE)
colnames(pcas_pc) <- sprintf("PC%02d", as.numeric(sub("PC", "", colnames(pcas_pc))))
pcas_pc[[id_colname]] <- rownames(pcas_pc)
facs_pc_qc <- merge(
x = merge(
x = qc_values,
y = pcas_pc,
by = id_colname,
all = TRUE
),
y = norm_factor,
by = id_colname,
all = TRUE
)
nacho_out <- merge(
x = nacho_df[
j = .SD,
.SDcols = c(id_colname, setdiff(colnames(nacho_df), colnames(facs_pc_qc)))
],
y = facs_pc_qc,
by = id_colname,
all = TRUE
)
list(
access = id_colname,
housekeeping_genes = housekeeping_genes,
housekeeping_predict = housekeeping_predict,
housekeeping_norm = housekeeping_norm,
normalisation_method = normalisation_method,
remove_outliers = FALSE,
n_comp = n_comp,
data_directory = data_directory,
pc_sum = pcsum,
nacho = nacho_out
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.