#' Import data
#' @description Imports .csv-files generated by QuantaSoft.
#' @param paths Character vector. Specifies which files and/or directories to
#' load. For directories all \code{.csv} files within are loaded.
#' @param Ch1_is_mutation Logical. Control if Ch1 is used to measure presence of
#' mutant DNA (and Ch2 for wild type DNA). If this is FALSE Ch2 is used
#' instead (and Ch1 for wild type).
#' @param annotations Named list or \code{data.frame}. Annotations to be added
#' to all samples.
#' @param sample_annotations \code{data.frame}. Similar to \code{annotations},
#' but annotations are specific to each sample. This need to include a column
#' "Sample", which will be used for joining.
#' @param merge_wells String. Controls if wells from the same sample
#' ("Sample") should be merged within a dataset (file). There are 4 options:
#' \itemize{
#' \item \code{merge_wells="yes"}: Merge wells. Discards QS merged wells (e.g. "M01").
#' \item \code{merge_wells="no"}: Do **not** merge wells. Discard QS merged wells (e.g. "M01").
#' \item \code{merge_wells="qs"}: The merged wells from QuantaSoft (e.g. "M01") is used if these are present.
#' \item \code{merge_wells="none"}: No merging is done.
#' }
#' Default if "none".
#' @param merge_files Logical. If this and \code{merge_wells} is TRUE, samples
#' across files are also merged. Default is FALSE.
#'
#' @return A \code{data.frame} with the columns:
#' \item{FileName:}{Name of the file imported.}
#' \item{Well:}{Well identifier.}
#' \item{Sample:}{Sample name from QuantaSoft.}
#' \item{Ch1TargetType:}{The target type for channel 1.}
#' \item{Ch2TargetType:}{The target type for channel 2.}
#' \item{Target:}{Target name from QuantaSoft.}
#' \item{MutantOnlyDroplets:}{The count of mutant only droplets.}
#' \item{WildtypeOnlyDroplets:}{The count of wild type only droplets.}
#' \item{DoubleNegativeDroplets:}{The count of double negative droplets.}
#' \item{DoublePositiveDroplets:}{The count of double positive droplets.}
#' \item{TotalDroplets:}{The total number of droplets.}
#' \item{MergedWells:}{If \code{merge_wells=TRUE} this is a string of the wells merged.
#' Otherwise this will be NA.}
#' \item{NumberOfMergedWells:}{The number of wells merged.}
#'
#' @seealso
#' \itemize{
#' \item \code{\link{train_simple_ddpcr_model}}, \code{\link{test_tumor_sample_simple}}
#' \item \code{\link{train_integrated_ddpcr_model}}, \code{\link{test_tumor_sample_integrated}}
#' }
#'
#' @export
#'
#' @importFrom readr read_csv cols
#' @importFrom utils file_test
#' @import dplyr stringr
import_QS_files <- function(paths,
Ch1_is_mutation = TRUE,
annotations = NULL,
sample_annotations = NULL,
merge_wells = "none",
merge_files = FALSE) {
# Check existence of files/folders
file_exists <- file.exists(paths)
if (any(!file_exists)) {
stop(paste0("The path(s) ", paste0("'", paths[!file_exists], "'", collapse = ", "), " does/do not exist."))
}
# Check if sample annotations has Sample column
if (!is.null(sample_annotations) && !"Sample" %in% colnames(sample_annotations)) {
stop("'sample_annotations' does not include a column 'Sample'.")
}
# Split input into files and directories
# Get unique files
file_paths <-
paths[file_test("-f", paths)] %>%
unique()
# Get unique files in folder - and setdiff files in file_paths above
dir_file_paths <-
paths[file_test("-d", paths)] %>%
list.files(pattern = ".csv", full.names = TRUE) %>%
unique() %>%
setdiff(file_paths)
# Get files
load_files_df <- suppressWarnings(
read_csv(
file_paths, id = "FilePath", show_col_types = FALSE,
col_types = cols(.default = "?", MergedWells = "c")
)
)
# Get .csv files from directories
load_dirs_df <- suppressWarnings(
read_csv(
dir_file_paths, id = "FilePath", show_col_types = FALSE,
col_types = cols(.default = "?", MergedWells = "c")
)
)
# Bind data from dirs and files
df <- bind_rows(load_files_df, load_dirs_df)
# Channel 1 data
ch1_df <- df %>%
filter(
grepl("Ch1", .data$TargetType)
) %>%
mutate(
Ch1TargetType = str_remove(.data$TargetType, pattern = "Ch1")
) %>%
select(-c("TargetType"))
# Channel 2 data
ch2_df <- df %>%
filter(
grepl("Ch2", .data$TargetType)
) %>%
mutate(
Ch2TargetType = str_remove(.data$TargetType, pattern = "Ch2")
) %>%
select("FilePath", "Well", "ExptType", "Experiment", "Sample", "Ch2TargetType")
# Join Ch1 and Ch2 data
df <- full_join(ch1_df, ch2_df, by = c("FilePath", "Well", "ExptType", "Experiment", "Sample"))
# Clean up data
df <- df %>%
mutate(
FileName = basename(.data$FilePath)
) %>%
select(
"FileName",
"Well", "Sample", "Ch1TargetType", "Ch2TargetType", "Target",
"Ch1+Ch2-", "Ch1-Ch2+", "Ch1-Ch2-", "Ch1+Ch2+",
"AcceptedDroplets", "MergedWells"
) %>%
rename(
MutantOnlyDroplets = ifelse(Ch1_is_mutation, "Ch1+Ch2-", "Ch1-Ch2+"),
WildtypeOnlyDroplets = ifelse(Ch1_is_mutation, "Ch1-Ch2+", "Ch1+Ch2-"),
DoubleNegativeDroplets = "Ch1-Ch2-",
DoublePositiveDroplets = "Ch1+Ch2+",
TotalDroplets = .data$AcceptedDroplets
) %>%
mutate(
NumberOfMergedWells = str_count(ifelse(is.na(.data$MergedWells), "", .data$MergedWells), pattern = ",") + 1
)
# Merging wells
if (merge_wells == "none") {
# Do nothing
df <- df
} else if (merge_wells == "yes") {
# Remove QS merged wells
no_qs_df <- df %>%
filter(
!grepl("M", .data$Well)
)
# Single wells
single_df <- no_qs_df %>%
group_by(across(all_of(if (merge_files) "Sample" else c("Sample", "FileName")))) %>%
filter(n() == 1) %>%
ungroup()
# Wells to be merged
merged_df <-
no_qs_df %>%
group_by(across(all_of(if (merge_files) "Sample" else c("Sample", "FileName")))) %>%
filter(n() > 1) %>%
summarise(
FileName = paste0(unique(.data$FileName), collapse = ","),
Target = paste0(unique(.data$Target), collapse = ","),
Ch1TargetType = paste0(unique(.data$Ch1TargetType), collapse = ","),
Ch2TargetType = paste0(unique(.data$Ch2TargetType), collapse = ","),
WildtypeOnlyDroplets = sum(.data$WildtypeOnlyDroplets),
MutantOnlyDroplets = sum(.data$MutantOnlyDroplets),
DoubleNegativeDroplets = sum(.data$DoubleNegativeDroplets),
DoublePositiveDroplets = sum(.data$DoublePositiveDroplets),
TotalDroplets = sum(.data$TotalDroplets),
NumberOfMergedWells = n(),
MergedWells = paste0(c("(", paste0(c(.data$Well), collapse = ","), ")"), collapse = ""),
.groups = "drop"
) %>%
mutate(
Well = sprintf("M%02d", row_number())
)
# Bind data
df <- bind_rows(
merged_df, single_df
)
} else if (merge_wells == "no") {
# Remove QS merged samples
df <- df %>%
filter(
!grepl("M", .data$Well)
)
} else if (merge_wells == "qs") {
# Vector of QS merged samples
qs_merged_samples <- df %>%
filter(grepl("M", .data$Well)) %>%
pull(.data$Sample) %>%
unique()
# Vector of QS un-merged samples
qs_unmerged_samples <- df %>%
pull(.data$Sample) %>%
setdiff(qs_merged_samples) %>%
unique()
# Filter:
# If sample is merged: Keep merged sample
# If sample is NOT merged: Keep original sample(s)
df <- df %>%
filter(grepl("M", .data$Well) | .data$Sample %in% qs_unmerged_samples)
} else {
stop("merge_wells should be 'yes', 'no', 'qs' or 'none'")
}
if (!is.null(annotations)) {
df <- df %>% bind_cols(annotations)
}
if (!is.null(sample_annotations)) {
df <- df %>% left_join(sample_annotations, by = "Sample")
}
# Arrange columns
df <- df %>%
relocate(
"FileName", "Sample",
"Well", "Ch1TargetType", "Ch2TargetType", "Target",
"DoubleNegativeDroplets", "WildtypeOnlyDroplets", "MutantOnlyDroplets", "DoublePositiveDroplets",
"TotalDroplets", "NumberOfMergedWells", "MergedWells"
)
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.