fcs_find_channels <- function(fcs_data, channels) {
fcs_data_channels <- colnames(fcs_data)
unlist(
lapply(channels, function(channel) {
channel <- fcs_data_channels[grepl(channel, fcs_data_channels)]
if (length(channel) != 1) {
stop(paste0("unable to find one match for ", channel))
}
channel
})
)
}
fcs_find_gating_channels <- function(fcs_data, channels) {
fcs_data_channels <- colnames(fcs_data)
unlist(
lapply(channels, function(channel) {
channel <- fcs_data_channels[grepl(channel, fcs_data_channels)]
if (identical(channel, character(0))) {
channel <- NA
}
channel
})
)
}
fcs_import_file <- function(filename,
return_original = FALSE,
transformation = TRUE) {
fcs_data <- flowCore::read.FCS(filename, transformation)
data <- dplyr::as_data_frame(as.data.frame(fcs_data@exprs))
colnames(data) <- paste0(fcs_data@parameters@data$name, "_",
fcs_data@parameters@data$desc)
# rename columns to be R-friendly
colnames(data) <- gsub("-| |#", "_", colnames(data))
if (return_original) {
list(
data = data,
obj = fcs_data
)
} else {
dplyr::as_data_frame(data)
}
}
fcs_import_file_error_handler <- function(filename) {
tryCatch(fcs_import_file(filename,
return_original = TRUE),
error = function(e) { NULL }
)
}
flag_abnormal_gating_in_exported_qc_report <- function(abnormal_gating_filename, cytof_qc_report_dir) {
filepath <- paste0(cytof_qc_report_dir, "/", abnormal_gating_filename, "_cytof_qc_report.csv")
qc_report <- read.csv(filepath)
qc_report$Abnormal.Gating <- 'True'
write.csv(qc_report, filepath, row.names = FALSE)
# We implicitly return "Successful" so that abnormal_gating_flag_transfer_status
# is not undefined if we reach the end of the function without raising an error.
"Successful"
}
flag_abnormal_gating_in_exported_qc_report_error_handler <- function(abnormal_gating_filename,
cytof_qc_report_dir) {
tryCatch(flag_abnormal_gating_in_exported_qc_report(abnormal_gating_filename, cytof_qc_report_dir),
error = function(e) { "Unsuccessful" }
)
}
generate_sample_background_report <- function(background_fcs_data,
background_fcs_data_filename,
background_fcs_data_pre_processing,
cofactor) {
sample_background_report_timestamp <- Sys.time()
gating_data <- background_fcs_data$data[background_fcs_data_pre_processing$gating_data$index, ]
# We use a set template for channel names, filling in NA whenever the channel
# does not exist in an FCS file.
gating_data_channel_names <- c("Y89", "Nb93", "Pd102", "Rh103", "Pd104",
"Pd105", "Pd106", "Pd108", "Pd110", "In113",
"In115", "Sn120", "I127" , "Xe131", "Cs133",
"Ba138", "La139", "Ce140", "Pr141", "Nd142",
"Ce142", "Nd143", "Nd144", "Nd145", "Nd146",
"Sm147", "Nd148", "Sm149", "Nd150", "Eu151",
"Sm152", "Eu153", "Sm154", "Gd155", "Gd156",
"Gd158", "Tb159", "Gd160", "Dy161", "Dy162",
"Dy163", "Dy164", "Ho165", "Er166", "Er167",
"Er168", "Tm169", "Er170", "Yb171", "Yb172",
"Yb173", "Yb174", "Lu175", "Lu176", "Yb176",
"Ta181", "Os189", "Ir191", "Os192", "Ir193",
"Pt194", "Pt195", "Pt196", "Pt198", "Pb208",
"Bi209")
num_gating_channels <- 66
# We use Ce140 below since we know it is a standard bead channel and should
# always be present.
num_gating_channels_data_matrix_rows <- length(gating_data[[fcs_find_channels(gating_data,
"Ce140")]])
gating_channels_data_matrix <- matrix(nrow = num_gating_channels_data_matrix_rows,
ncol = num_gating_channels)
colnames(gating_channels_data_matrix) <- gating_data_channel_names
# Vector of data for each channel.
for (i in 1:num_gating_channels) {
current_channel_gating_data <- gating_data[[fcs_find_gating_channels(gating_data, gating_data_channel_names[i])]]
if (is.null(current_channel_gating_data)) {
gating_channels_data_matrix[, i] <- NA
} else {
gating_channels_data_matrix[, i] <- current_channel_gating_data
}
}
# colMedians works with NA values.
gating_data_channel_medians <- matrixStats::colMedians(gating_channels_data_matrix)
sample_background_report <-
dplyr::data_frame(
`Sample Background Report Timestamp` = sample_background_report_timestamp,
Filename = background_fcs_data_filename,
`Start Time` =
paste(background_fcs_data$obj@description$`$DATE`,
background_fcs_data$obj@description$`$BTIM`),
`End Time` =
paste(background_fcs_data$obj@description$`$DATE`,
background_fcs_data$obj@description$`$ETIM`),
`Total Events` = nrow(background_fcs_data$data),
`Total Cells` = nrow(gating_data),
`Y89 Median` = median(gating_channels_data_matrix[, 'Y89']),
`Nb93 Median` = median(gating_channels_data_matrix[, 'Nb93']),
`Pd102 Median` = median(gating_channels_data_matrix[, 'Pd102']),
`Rh103 Median` = median(gating_channels_data_matrix[, 'Rh103']),
`Pd104 Median` = median(gating_channels_data_matrix[, 'Pd104']),
`Pd105 Median` = median(gating_channels_data_matrix[, 'Pd105']),
`Pd106 Median` = median(gating_channels_data_matrix[, 'Pd106']),
`Pd108 Median` = median(gating_channels_data_matrix[, 'Pd108']),
`Pd110 Median` = median(gating_channels_data_matrix[, 'Pd110']),
`In113 Median` = median(gating_channels_data_matrix[, 'In113']),
`In115 Median` = median(gating_channels_data_matrix[, 'In115']),
`Sn120 Median` = median(gating_channels_data_matrix[, 'Sn120']),
`I127 Median` = median(gating_channels_data_matrix[, 'I127']),
`Xe131 Median` = median(gating_channels_data_matrix[, 'Xe131']),
`Cs133 Median` = median(gating_channels_data_matrix[, 'Cs133']),
`Ba138 Median` = median(gating_channels_data_matrix[, 'Ba138']),
`La139 Median` = median(gating_channels_data_matrix[, 'La139']),
`Ce140 Median` = median(gating_channels_data_matrix[, 'Ce140']),
`Pr141 Median` = median(gating_channels_data_matrix[, 'Pr141']),
`Nd142 Median` = median(gating_channels_data_matrix[, 'Nd142']),
`Ce142 Median` = median(gating_channels_data_matrix[, 'Ce142']),
`Nd143 Median` = median(gating_channels_data_matrix[, 'Nd143']),
`Nd144 Median` = median(gating_channels_data_matrix[, 'Nd144']),
`Nd145 Median` = median(gating_channels_data_matrix[, 'Nd145']),
`Nd146 Median` = median(gating_channels_data_matrix[, 'Nd146']),
`Sm147 Median` = median(gating_channels_data_matrix[, 'Sm147']),
`Nd148 Median` = median(gating_channels_data_matrix[, 'Nd148']),
`Sm149 Median` = median(gating_channels_data_matrix[, 'Sm149']),
`Nd150 Median` = median(gating_channels_data_matrix[, 'Nd150']),
`Eu151 Median` = median(gating_channels_data_matrix[, 'Eu151']),
`Sm152 Median` = median(gating_channels_data_matrix[, 'Sm152']),
`Eu153 Median` = median(gating_channels_data_matrix[, 'Eu153']),
`Sm154 Median` = median(gating_channels_data_matrix[, 'Sm154']),
`Gd155 Median` = median(gating_channels_data_matrix[, 'Gd155']),
`Gd156 Median` = median(gating_channels_data_matrix[, 'Gd156']),
`Gd158 Median` = median(gating_channels_data_matrix[, 'Gd158']),
`Tb159 Median` = median(gating_channels_data_matrix[, 'Tb159']),
`Gd160 Median` = median(gating_channels_data_matrix[, 'Gd160']),
`Dy161 Median` = median(gating_channels_data_matrix[, 'Dy161']),
`Dy162 Median` = median(gating_channels_data_matrix[, 'Dy162']),
`Dy163 Median` = median(gating_channels_data_matrix[, 'Dy163']),
`Dy164 Median` = median(gating_channels_data_matrix[, 'Dy164']),
`Ho165 Median` = median(gating_channels_data_matrix[, 'Ho165']),
`Er166 Median` = median(gating_channels_data_matrix[, 'Er166']),
`Er167 Median` = median(gating_channels_data_matrix[, 'Er167']),
`Er168 Median` = median(gating_channels_data_matrix[, 'Er168']),
`Tm169 Median` = median(gating_channels_data_matrix[, 'Tm169']),
`Er170 Median` = median(gating_channels_data_matrix[, 'Er170']),
`Yb171 Median` = median(gating_channels_data_matrix[, 'Yb171']),
`Yb172 Median` = median(gating_channels_data_matrix[, 'Yb172']),
`Yb173 Median` = median(gating_channels_data_matrix[, 'Yb173']),
`Yb174 Median` = median(gating_channels_data_matrix[, 'Yb174']),
`Lu175 Median` = median(gating_channels_data_matrix[, 'Lu175']),
`Lu176 Median` = median(gating_channels_data_matrix[, 'Lu176']),
`Yb176 Median` = median(gating_channels_data_matrix[, 'Yb176']),
`Ta181 Median` = median(gating_channels_data_matrix[, 'Ta181']),
`Os189 Median` = median(gating_channels_data_matrix[, 'Os189']),
`Ir191 Median` = median(gating_channels_data_matrix[, 'Ir191']),
`Os192 Median` = median(gating_channels_data_matrix[, 'Os192']),
`Ir193 Median` = median(gating_channels_data_matrix[, 'Ir193']),
`Pt194 Median` = median(gating_channels_data_matrix[, 'Pt194']),
`Pt195 Median` = median(gating_channels_data_matrix[, 'Pt195']),
`Pt196 Median` = median(gating_channels_data_matrix[, 'Pt196']),
`Pt198 Median` = median(gating_channels_data_matrix[, 'Pt198']),
`Pb208 Median` = median(gating_channels_data_matrix[, 'Pb208']),
`Bi209 Median` = median(gating_channels_data_matrix[, 'Bi209']),
`Sum of Medians` = sum(gating_data_channel_medians, na.rm = TRUE)
)
}
generate_sample_background_report_error_handler <- function(background_fcs_data,
background_fcs_data_filename,
background_fcs_data_pre_processing,
cofactor) {
tryCatch(generate_sample_background_report(background_fcs_data,
background_fcs_data_filename,
background_fcs_data_pre_processing,
cofactor),
error = function(e) { NULL }
)
}
generate_qc_report <- function(fcs_data, fcs_filename, fcs_data_pre_processing, cofactor, qc_reporter_version, ab_gate = "False") {
qc_report_timestamp <- Sys.time()
gating_data <- fcs_data$data[fcs_data_pre_processing$gating_data$index, ]
bead_data <- fcs_data$data[fcs_data_pre_processing$bead_data$index, ]
gating_time <- gating_data[[fcs_find_channels(gating_data, "Time")]]
gating_ir193 <- gating_data[[fcs_find_channels(gating_data, "Ir193")]]
bead_time <- bead_data[[fcs_find_channels(bead_data, "Time")]]
bead_eu153 <- bead_data[[fcs_find_channels(bead_data, "Eu153")]]
bead_ce140 <- bead_data[[fcs_find_channels(bead_data, "Ce140")]]
bead_gd156 <- bead_data[[fcs_find_channels(bead_data, "Gd156")]]
bead_lu175 <- bead_data[[fcs_find_channels(bead_data, "Lu175")]]
bead_176 <- tryCatch(bead_data[[fcs_find_channels(bead_data, "Lu176")]],
error = function(e) { bead_data[[fcs_find_channels(bead_data, "Yb176")]] }
)
qc_report <-
dplyr::data_frame(
`QC Reporter Version` = qc_reporter_version,
`QC Report Timestamp` = qc_report_timestamp,
Filename = fcs_filename,
`Start Time` =
paste0(fcs_data$obj@description$`$DATE`, " ",
fcs_data$obj@description$`$BTIM`),
`End Time` =
paste0(fcs_data$obj@description$`$DATE`, " ",
fcs_data$obj@description$`$ETIM`),
`Total Events` = nrow(fcs_data$data),
`Total Cells` = nrow(gating_data),
`Ir193 Median` = median(gating_ir193),
`Ir193 vs Time` = cor(gating_time, asinh(gating_ir193 / cofactor)),
`Total Beads` = nrow(bead_data),
`Eu153 Median` = median(bead_eu153),
`Eu153 rCV` = rcv(bead_eu153),
`Ce140 Median` = median(bead_ce140),
`Gd156 Median` = median(bead_gd156),
`Oxide%` = `Gd156 Median` / `Ce140 Median`,
`Eu153 vs Time` = cor(bead_time, asinh(bead_eu153 / cofactor)),
`Lu175 Median` = median(bead_lu175),
`Lu/Yb176 Median` = median(bead_176),
`Ratio (Lu175)/(Lu/Yb176)` = `Lu175 Median` / `Lu/Yb176 Median`,
`Abnormal Gating` = ab_gate
)
}
generate_qc_report_error_handler <- function(fcs_data,
fcs_filename,
fcs_data_pre_processing,
cofactor,
qc_reporter_version,
ab_gate = "False") {
tryCatch(generate_qc_report(fcs_data,
fcs_filename,
fcs_data_pre_processing,
cofactor,
qc_reporter_version,
ab_gate),
error = function(e) { NULL }
)
}
mass_cytometry_pre_processing <- function(
fcs_data, cofactor, event_channel, dna_channel, bead_gates,
debris_num_gmms = 10, debris_gmm_subsample_n = 1000, debris_vote_thresh = 0.5,
verbose = TRUE) {
start_t <- proc.time()
# find the explicit channel name for each mass
fcs_event_channel <- fcs_find_channels(fcs_data, event_channel)
fcs_dna_channel <- fcs_find_channels(fcs_data, dna_channel)
fcs_bead_channels <- fcs_find_channels(fcs_data, bead_gates$channel)
# transform the data and add an index for later tracking
gating_data <- fcs_data[, c(fcs_event_channel,
fcs_dna_channel,
fcs_bead_channels)]
gating_data[, c(fcs_dna_channel, fcs_bead_channels)] <-
asinh(gating_data[, c(fcs_dna_channel, fcs_bead_channels)] / cofactor)
gating_data$index <- 1:nrow(gating_data)
# step 1: identify beads and bead-cell doublets using threshold
if (verbose) cat("identifying beads\n")
bead_data <- gating_data
for (bead_gate_idx in 1:nrow(bead_gates)) {
bead_gate <- bead_gates[bead_gate_idx, ]
bead_gate_channel <- fcs_find_channels(gating_data, bead_gate$channel)
bead_gate_data <- bead_data[[bead_gate_channel]]
bead_gate_indices <- which(bead_gate$min < bead_gate_data &
bead_gate_data < bead_gate$max)
bead_data <- bead_data[bead_gate_indices, ]
}
bead_indices <- bead_data$index
gating_data <- dplyr::filter(gating_data, !(index %in% bead_indices))
# step 2: separate cells from debris
if (verbose) cat("identifying debris ... ")
t <- proc.time()
cell_indices <- gating_data$index
dna <- gating_data[[fcs_dna_channel]]
min_dna_index <- head(which(dna == min(dna)), 1)
debris_votes <- lapply(1:debris_num_gmms, function(iter) {
# fit a GMM with three components to a subset of the data and use it to
# assign all of the data. lowest cluster is classified as debris
indices <- sample(1:length(dna), min(debris_gmm_subsample_n, length(dna)))
gmm <- Mclust(dna[indices], G = 3)
assignment <- predict(gmm, dna)$classification
min_cluster <- assignment[min_dna_index]
# sometimes the GMM finds a very "wide" cluster that covers debris and some
# doublets. in order to avoid removing doublets, cells with DNA intensity
# higher than most populated cluster are not removed
assignment_counts <- table(assignment)
most_populated_mean <- gmm$parameters$mean[
which(assignment_counts == max(assignment_counts))]
# return debris identification for each cell
dplyr::data_frame(
iter = iter,
index = cell_indices,
debris = (assignment == min_cluster) & (dna < most_populated_mean)
)
}) %>% dplyr::bind_rows()
debris_indices <- debris_votes %>%
dplyr::group_by(index) %>%
dplyr::summarize(per_debris = mean(debris)) %>%
dplyr::filter(per_debris >= debris_vote_thresh) %>%
`$`(index)
debris_data <- dplyr::filter(gating_data, index %in% debris_indices)
gating_data <- dplyr::filter(gating_data, !(index %in% debris_indices))
cat(paste0(round((proc.time() - t)[3]), " seconds\n"))
# step 3: separate bead + cell doublets
bead_singlet_indices <- which(bead_data[[fcs_dna_channel]] <
min(gating_data[[fcs_dna_channel]]))
bead_cell_doublet_indices <- which(bead_data[[fcs_dna_channel]] >
min(gating_data[[fcs_dna_channel]]))
bead_cell_doublets_data <- bead_data[bead_cell_doublet_indices, ]
bead_data <- bead_data[bead_singlet_indices, ]
list(
bead_data = bead_data,
debris_data = debris_data,
gating_data = gating_data,
bead_cell_doublets_data = bead_cell_doublets_data,
cell_indices = gating_data$index
)
}
mass_cytometry_pre_processing_error_handler <- function(fcs_data, cofactor,
event_channel, dna_channel,
bead_gates) {
tryCatch(mass_cytometry_pre_processing(fcs_data, cofactor,
event_channel, dna_channel,
bead_gates),
error = function(e) { NULL }
)
}
prepare_for_gating_inspection <- function(fcs_filename, fcs_data_pre_processing, cytof_qc_gating_inspection) {
pre_processed_data <- cytof_qc_gating_inspection$pre_processed_data
bead_data <- fcs_data_pre_processing$bead_data
bead_data$category = "bead"
debris_data <- fcs_data_pre_processing$debris_data
debris_data$category = "debris"
# We flag bead-cell doublets as debris to ensure that our data visualization
# more closely matches FlowJo visualization.
bead_cell_doublets_data <- fcs_data_pre_processing$bead_cell_doublets_data
bead_cell_doublets_data$category = "debris"
gating_data <- fcs_data_pre_processing$gating_data
gating_data$category = "cell"
fcs_data_for_visualization <- rbind(bead_data, debris_data, bead_cell_doublets_data, gating_data)
fcs_data_for_visualization$filename = fcs_filename
for (i in 1:length(pre_processed_data)) {
if (is.null(pre_processed_data[[i]])) {
target_index <- i
break
}
}
cytof_qc_gating_inspection$pre_processed_data[[target_index]] <- fcs_data_for_visualization
}
qc_report_export_error_handler <- function(cytof_qc_report_dir_path,
qc_report,
fcs_filename) {
filepath <- paste0(cytof_qc_report_dir_path, "/", fcs_filename, "_cytof_qc_report.csv")
tryCatch(write.csv(qc_report,
filepath, row.names = FALSE),
error = function(e) { "Unsuccessful" })
}
# Computes the robust" CV, as defined by FlowJo
# See http://www.flowjo.com/v76/en/statdefinitions.html
rcv <- function(x) {
as.numeric(100 * 0.5 *
(quantile(x, 0.8413) - quantile(x, 0.1587)) / median(x))
}
reformat_manually_gated_pre_processed_data <- function(pre_processed_manually_gated_data) {
bead_data_rows <- which(pre_processed_manually_gated_data$category == "bead")
# We do not remove the category and/or filename columns added after initially
# running mass_cytometry_pre_processing because they do not affect QC report
# generation.
bead_data <- pre_processed_manually_gated_data[bead_data_rows, ]
debris_data_rows <- which(pre_processed_manually_gated_data$category == "debris")
debris_data <- pre_processed_manually_gated_data[debris_data_rows, ]
gating_data_rows <- which(pre_processed_manually_gated_data$category == "cell")
gating_data <- pre_processed_manually_gated_data[gating_data_rows, ]
list(
bead_data = bead_data,
debris_data = debris_data,
gating_data = gating_data,
cell_indices = gating_data$index
)
}
sample_background_report_export_error_handler <- function(sample_background_report_dir_path,
sample_background_report,
fcs_filename) {
filepath <- paste0(sample_background_report_dir_path, "/", fcs_filename, "_sample_background_report.csv")
tryCatch(write.csv(sample_background_report,
filepath, row.names = FALSE),
error = function(e) { "Unsuccessful" })
}
# http://jeromyanglim.tumblr.com/post/33418725712/how-to-source-all-r-files-in-a-directory
source_dir <- function(path, trace = TRUE, ...) {
for (nm in list.files(path, pattern = "\\.[RrSsQq]$")) {
if(trace) cat(nm,":")
source(file.path(path, nm), ...)
if(trace) cat("\n")
}
}
unflag_abnormal_gating_in_previously_exported_qc_report_error_handler <- function(normal_gating_filename,
cytof_qc_report_dir) {
tryCatch(unflag_abnormal_gating_in_previously_exported_qc_report(normal_gating_filename, cytof_qc_report_dir),
error = function(e) { "Unsuccessful" }
)
}
unflag_abnormal_gating_in_previously_exported_qc_report <- function(normal_gating_filename, cytof_qc_report_dir) {
filepath <- paste0(cytof_qc_report_dir, "/", normal_gating_filename, "_cytof_qc_report.csv")
qc_report <- read.csv(filepath)
qc_report$Abnormal.Gating <- 'False'
write.csv(qc_report, filepath, row.names = FALSE)
# We implicitly return "Successful" so that abnormal_gating_flag_transfer_status
# is not undefined if we reach the end of the function without raising an error.
"Successful"
}
## Darwin added functions
plot_channel_time <- function(fcs_data, fcs_filename, cytof_qc_report_dir) {
df <- fcs_data
cols <- colnames(df)
colnames(df)[grep("Time", cols)] <- "Time"
channels <- cols[3:(length(cols)-4)]
# omit <- c("Center", "Time", "Event_length", "Offset", "Width", "Residual", "bc_separation_dist", "mahalanobis_dist")
# print(channels)
# print(head(df))
plotChannelsOverTime <- function(yvar){
plot_path <- paste0(cytof_qc_report_dir, "/", fcs_filename, "_plots")
if(!dir.exists(plot_path)) {
dir.create(plot_path)
}
ggplot(df, aes_string(x="Time", y=as.name(yvar))) +
geom_hex(bins = 100, aes(fill=stat(log10(count)))) +
scale_y_log10(oob=scales::squish_infinite) +
scale_fill_viridis() +
theme(legend.position = "None") +
ggtitle(fcs_filename)
ggsave(paste0(plot_path, "/Time_vs_", yvar, "_plot.png"))
}
lapply(channels, plotChannelsOverTime)
}
plot_channel_time_error_handler <- function(fcs_data, fcs_filename, cytof_qc_report_dir) {
tryCatch(
plot_channel_time(fcs_data, fcs_filename, cytof_qc_report_dir),
error = function(e) { print(e) }
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.