#' Perform quality control and remove doublets
#'
#' This function performs a quality control based on a double-step procedure in all FCS files contained within a \code{fcs.SCE} object or a folder specified by user:\enumerate{
#' \item It executes a quality control based in \href{http://bioconductor.org/packages/release/bioc/vignettes/flowAI/inst/doc/flowAI.html}{\code{flowAI::flow_auto_qc()}},
#' \item Remove doublets according criteria described \href{https://github.com/LennonLab/flow-cytometry/blob/fcf09fc7b4943a386864de9b8ee43ce4e1c8d34d/bin/support_functions.R}{here}.}
#' @param fcs.SCE A \code{fcs.SCE} object generated through \code{\link[FlowCT:fcs.SCE]{FlowCT::fcs.SCE()}}. If \code{NULL} (default), this function will search for FCS files within current working directory.
#' @param filelist A vector with full path of FCS files to be read, commonly generated through \code{\link[base:list.files]{base::list.files()}}. If \code{NULL}, this file list will be generated as indicated below.
#' @param directory If \code{filelist = NULL}, those files stored in this location will be read. Default = \code{getwd()} (current directory).
#' @param pattern Pattern for reading files within \code{directory}. Default = \code{"fcs"}.
#' @param physical.markers Vector with physical markers (i.e., FCS-A, SSC-H, etc.). Mandatory for perfoming quality control.
#' @param output.folder Folder name for storing final FCS files. Default, the same of \code{directory}.
#' @param output.suffix Suffix added to new generated FCS files (in case not being working with a \code{fcs.SCE} object). Default = \code{qc}.
#' @param return.idx Logical indicating whether output is a new \code{fcs.SCE} object without low-quality events or an index with this positions (without removing them from original \code{fcs.SCE}). This option is only available if the input is a \code{fcs.SCE} object. Default = \code{FALSE} (\code{fcs.SCE} output).
#' @param return.fcs Logical indicating whether new FCS file should be generated without low quality events. They will stored in specified \code{output.folder} with the corresponding \code{output.suffix}.
#' @param keep.QCfile Final QC text report should be kept? Attention, this file does not contain removed doublets but only those events removed through {\code{flowAI::flow_auto_qc()}}, all events are showed trough terminal output. Default = \code{FALSE}.
#' @keywords quality control
#' @keywords doublets removal
#' @keywords remove low quality events
#' @export
#' @importFrom flowCore exprs write.FCS
#' @importFrom stats median sd
#' @importFrom utils read.table capture.output tail
#' @return Final output is a new \code{fcs.SCE} object (if \code{fcs.SCE} input) without these events that do not pass the quality control or new FCS files with the suffix \code{"qc"} with these low quality events removed. If user specifies \code{return.idx = TRUE}, output would be a vector with all low-quality events positions.
#' @return A table with percentaje of removed events for each FCS will be also shown in the terminal (those files with more than 30% of events removed will have a '!' signal).
#' @examples
#' \dontrun{
#' # option 1: output a new fcs.SCE object with low-quality events removed.
#' fcs.SCE_qc <- qc.and.removeDoublets(fcs.SCE = fcs,
#' physical.markers = c("FSC_A", "FSC_H", "SSC_A", "SSC_H"))
#'
#' # option 2: output an index with low quality events
#' idx_qc <- qc.and.removeDoublets(fcs.SCE = fcs,
#' physical.markers = c("FSC_A", "FSC_H", "SSC_A", "SSC_H"), return.idx = T)
#'
#' # option 3: working with all FCS files within a folder
#' idx_qc <- qc.and.removeDoublets(directory = "../data/", output.folder = "HQ_files",
#' physical.markers = c("FSC_A", "FSC_H", "SSC_A", "SSC_H"))
#' }
qc.and.removeDoublets <- function(fcs.SCE = NULL, filelist = NULL, directory = getwd(), pattern = "fcs", physical.markers, output.folder = directory, output.suffix = "qc", return.idx = FALSE, return.fcs = FALSE, keep.QCfile = F){
if (!requireNamespace("flowAI", quietly = TRUE)) stop("Package \"flowAI\" needed for this function to work. Please install it.", call. = FALSE)
if(!is.null(fcs.SCE)){
fcs <- as.flowSet.SE(fcs.SCE, assay.i = "raw")
filenames <- fcs@phenoData@data$name
losses <- c()
idx <- list()
for(file in filenames){
fcs[[file]]@description$FILENAME <- file #requirements for flow_auto_qc
invisible(capture.output(idx1 <- flowAI::flow_auto_qc(fcs[[file]], ChExcludeFS = physical.markers, fcs_QC = FALSE, output = 3, html_report = F, folder_results = F, mini_report = "miniQC")[[1]]))
# doublet removal
FSCA <- grep("FS.*.A", physical.markers, value = T)
FSCH <- grep("FS.*.H", physical.markers, value = T)
exprstmp <- exprs(fcs[[file]])
ratio <- exprstmp[,grep(FSCA, colnames(exprstmp))]/(1+ exprstmp[,grep(FSCH, colnames(exprstmp))]) #calculate the ratios
r <- median(ratio)
w <- 2*sd(ratio)
idx2 <- as.vector(which(ratio > r+w)) #define the region that will be removed
idx[[file]] <- unique(c(idx1, idx2))
losses <- append(losses, round(length(idx[[file]])/nrow(fcs[[file]])*100, digits = 2))
names(losses)[length(losses)] <- file
# generate new high QC FCS
if(return.fcs){
extension <- tolower(tail(strsplit(filenames[1], split="\\.")[[1]], n = 1))
exp_matrix <- exprstmp[-idx[[file]],]
colnames(exp_matrix) <- sapply(fcs.SCE@metadata$raw_channel_names, function(x) strsplit(x, split = ":")[[1]][1])
file_return <- as_flowFrame.me(exp_matrix)
file_return@parameters@data$desc <- sapply(fcs.SCE@metadata$raw_channel_names, function(x) strsplit(x, split = ":")[[1]][2])
write.FCS(file_return, paste0(output.folder, "/", gsub(extension, "", file), output.suffix, ".", extension))
}
}
cat("New generated FCSs without low quality events are stored in:", output.folder, "\n")
df <- data.frame(files = names(losses), losses); colnames(df) <- c("filename", "removed events (%)")
asciify(df)
fcs.SCE@metadata$lowQC_events <- idx
if(!keep.QCfile) invisible(file.remove(list.files(pattern = "miniQC")))
## extract cell IDs
idx_cellID <- lapply(names(idx), function(file) fcs.SCE$cell_id[fcs.SCE$filename == file][idx[[file]]])
names(idx_cellID) <- filenames
if(return.idx) return(idx_cellID) else return(fcs.SCE[,-match(unlist(idx_cellID), colnames(fcs.SCE))])
}else{
if(is.null(filelist)) filelist <- list.files(path = directory, pattern = pattern, full.names = T)
for(file in filelist){
print(file)
invisible(capture.output(file_q <- flowAI::flow_auto_qc(file, ChExcludeFS = physical.markers, fcs_QC = FALSE, output = 1, html_report = F, folder_results = F, mini_report = "miniQC")))
# doublet removal
FSCA <- grep("FS.*.A", physical.markers, value = T)
FSCH <- grep("FS.*.H", physical.markers, value = T)
exprstmp <- exprs(fcs[[file]])
ratio <- exprstmp[,grep(FSCA, colnames(exprstmp))]/(1+ exprstmp[,grep(FSCH, colnames(exprstmp))]) #calculate the ratios
r <- median(ratio)
w <- 2*sd(ratio)
file_d <- file_q[which(ratio < r+w),] #define the region that is accepted
extension <- tolower(tail(strsplit(basename(file), "\\.")[[1]], n = 1))
filename <- strsplit(basename(file), "\\.")[[1]][1]
write.FCS(file_d, paste0(output.folder, "/", filename, ".", output.suffix, ".", extension))
}
# notify removed evens
qct <- read.table("miniQC.txt", header = T, sep = "\t", colClasses = c("character", rep("numeric", 2), rep("NULL", 4)))
qct$warning <- ifelse(qct$X..anomalies >= 30, "(!)", "")
colnames(qct) <- c("filenames", "# initial events", "% deleted events", "Warning!")
asciify(qct)
if(!keep.QCfile) invisible(file.remove(list.files(pattern = "miniQC")))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.