Nothing
#### filter 1: mismatched peaks ###
filter_pactr$set(
"public", "check_mismatched_peaks",
function(ringwin,
isowin,
trwin,
max_iso_shift,
merge_peaks,
merge_method = NULL) {
l <- nrow(self$mpactr_data$get_peak_table())
cli::cli_alert_info("Checking {l} peaks for mispicked peaks.")
self$mpactr_data$set_peak_table(self$mpactr_data$get_peak_table()[
order(self$mpactr_data$get_peak_table()$mz,
decreasing = FALSE
),
])
ion_filter_list <- list()
results <- FilterMispickedIons(self$mpactr_data$get_peak_table(), ringwin,
isowin, trwin, max_iso_shift)
ion_filter_list[["cut_ions"]] <- results$cut_ions
ion_filter_list[["merge_groups"]] <- results$merge_groups
self$logger[["check_mismatched_peaks"]] <- ion_filter_list
if (isTRUE(merge_peaks)) {
cli::cli_alert_info(c("Argument merge_peaks is: {merge_peaks}. ",
"Merging mispicked peaks with method ",
"{merge_method}."))
private$merge_ions(ion_filter_list, merge_method)
} else {
cli::cli_alert_warning(c("Argument merge_peaks is: {merge_peaks}. ",
"Mispicked peaks will not be merged."))
}
self$logger$list_of_summaries$mispicked <- summary$new(
filter = "mispicked", failed_ions = results$cut_ions,
passed_ions = self$mpactr_data$get_peak_table()$Compound
)
self$logger$list_of_summaries$mispicked$summarize()
}
)
filter_pactr$set("private", "merge_ions", function(ion_filter_list, method) {
if (is.null(method)) {
cli::cli_abort(c("No method has been supplied for merging peaks. ",
"{.var method} must be one of: sum"))
}
if (method == "sum") {
dat <- melt(self$mpactr_data$get_peak_table(),
id.vars = c("Compound", "mz", "rt", "kmd"),
variable.name = "sample",
value.name = "intensity",
variable.factor = FALSE
)
for (ion in names(ion_filter_list$merge_groups)) {
dat <- dat[Compound %in% c(ion, ion_filter_list$merge_groups[[ion]]),
intensity := sum(intensity),
by = .(sample)
]
}
self$mpactr_data$set_peak_table(dcast(dat,
Compound + mz + kmd + rt ~ sample,
value.var = "intensity"
)[
(!Compound %in% ion_filter_list$cut_ions),
])
}
})
#### filter 2: group filter ###
# Calculates statisics for each feature (rsd, n)
# accross biological groups and technical replicates
filter_pactr$set("public", "filter_blank", function() {
b <- data.table::melt(self$mpactr_data$get_peak_table(),
id.vars = c("Compound", "mz", "rt", "kmd"), variable.name =
"sample", value.name = "intensity", variable.factor = FALSE
)[
data.table(self$mpactr_data$get_meta_data()),
on = .(sample = Injection)
][
, .(
average = mean(intensity),
BiolRSD = rsd(intensity),
Bioln = length(intensity)
),
by = .(Compound, Biological_Group)
]
t <- data.table::melt(self$mpactr_data$get_peak_table(),
id.vars = c("Compound", "mz", "rt", "kmd"),
variable.name = "sample",
value.name = "intensity",
variable.factor = FALSE
)[
data.table(self$mpactr_data$get_meta_data()),
on = .(sample = Injection)
][
, .(sd = rsd(intensity), n = length(intensity)),
by = .(Compound, Biological_Group, Sample_Code)
][
, .(techRSD = mean(sd), techn = mean(n)),
by = .(Compound, Biological_Group)
]
group_stats <- b[t, on = .(Compound, Biological_Group)]
setorder(group_stats, Compound, Biological_Group)
self$logger[["group_filter-group_stats"]] <- group_stats
})
# this function determines group ions above the group threshold
# given group statistics (see filter_blank).
# The result is a list of ions by group whose relative abundance
# is greater than the threshold.
filter_pactr$set(
"public", "parse_ions_by_group",
function(group_threshold = 0.01) {
group_avgs <- dcast(self$logger[["group_filter-group_stats"]],
Compound ~ Biological_Group,
value.var = "average"
)
max <- self$logger[["group_filter-group_stats"]][
, .(Compound, Biological_Group, average)
][
, .(max = max(average)),
by = Compound
]
group_max <- group_avgs[max, on = .(Compound = Compound)][
, lapply(.SD, function(x) {
x / max
}),
.SDcols = unique(
self$logger[["group_filter-group_stats"]]$Biological_Group
)
]
biol_groups <- as.list(group_max)
biol_groups <- lapply(biol_groups, setNames, group_avgs[, Compound])
group_filter_list <- lapply(biol_groups, function(x) {
names(x)[which(x > group_threshold)]
})
self$logger[["group_filter-failing_list"]] <- group_filter_list
}
)
# Given a group name, removes flagged ions from the peak table.
filter_pactr$set(
"public", "apply_group_filter",
function(group, remove_ions = TRUE) {
groups <- unique(self$mpactr_data$get_meta_data()$Biological_Group)
if (isFALSE(group %in% groups)) {
cli::cli_abort(c("{.var group} {group} is not in ",
"{.var Biological_Group}.",
"Options are: {groups}"))
}
l <- nrow(self$mpactr_data$get_peak_table())
cli::cli_alert_info("Parsing {l} peaks based on the sample group: {group}.")
if (isFALSE(remove_ions)) {
cli::cli_alert_warning(c("Argument remove_ions is {remove_ions}. ",
"Peaks from {group} will not be removed."))
return()
}
cli::cli_alert_info(c("Argument remove_ions is: {remove_ions}.",
"Removing peaks from {group}."))
ions <- self$logger[["group_filter-failing_list"]][[group]]
self$mpactr_data$set_peak_table(self$mpactr_data$get_peak_table()[
!(self$mpactr_data$get_peak_table()$Compound
%in% ions),
])
self$logger$list_of_summaries[[paste0("group-", group)]] <- summary$new(
filter = group,
failed_ions = ions,
passed_ions = self$mpactr_data$get_peak_table()$Compound
)
self$logger$list_of_summaries[[paste0("group-", group)]]$summarize()
}
)
#### filter 3: cv filter ###
filter_pactr$set(
"public", "cv_filter",
function(cv_threshold = NULL, cv_params) {
if (is.null(cv_threshold)) {
cli::cli_abort("{.var cv_threshold} must be supplied.")
}
## abort if an incorrect cv_params argument is supplied.
if (!(cv_params %in% c("mean", "median"))) {
cli::cli_abort("{.var cv_params} must be one of mean or median.")
}
## abort if there are no technical replicates.
if (isFALSE(self$mpactr_data$isMultipleTechReps())) {
cli_abort(c("There are no technical replicates in the dataset provided. ",
"In order to run the replicability filter, technical ",
"replicates are required."))
}
input_ions <- self$mpactr_data$get_peak_table()$Compound
cli <- cli::cli_alert_info
n <- length(input_ions)
cli("Parsing {n} peaks for replicability across technical replicates.")
cv <- data.table::melt(self$mpactr_data$get_peak_table(),
id.vars = c("Compound", "mz", "rt", "kmd"), variable.name =
"sample", value.name = "intensity", variable.factor = FALSE
)[
self$mpactr_data$get_meta_data(),
on = .(sample = Injection)
][
, .(cv = rsd(intensity)),
by = .(Compound, Biological_Group, Sample_Code)
][
, .(
mean_cv = mean(cv, na.rm = TRUE),
median_cv = median(cv, na.rm = TRUE)
),
by = .(Compound)
]
self$logger[["cv_values"]] <- cv
if (cv_params == "mean") {
failed_ions <- cv[mean_cv > cv_threshold, Compound]
} else {
failed_ions <- cv[median_cv > cv_threshold, Compound]
}
self$mpactr_data$set_peak_table(self$mpactr_data$get_peak_table()[
Compound %in% setdiff(
input_ions,
failed_ions
),
])
self$logger$list_of_summaries$replicability <- summary$new(
filter = "cv_filter",
failed_ions = failed_ions,
passed_ions = self$mpactr_data$get_peak_table()$Compound
)
self$logger$list_of_summaries$replicability$summarize()
}
)
#### filter 4: insource ions ###
filter_pactr$set(
"public", "filter_insource_ions",
function(cluster_threshold = 0.95) {
input_ions <- self$mpactr_data$get_peak_table()$Compound
cli::cli_alert_info("Parsing {length(input_ions)} peaks for insource ions.")
self$mpactr_data$set_peak_table(self$mpactr_data$get_peak_table()[
, cor := private$deconvolute_correlation(.SD, cluster_threshold),
by = .(rt)
][cor == TRUE, ])
self$logger$list_of_summaries$insource <- summary$new(
filter = "insource",
failed_ions = setdiff(
input_ions,
self$mpactr_data$get_peak_table()$Compound
),
passed_ions = self$mpactr_data$get_peak_table()$Compound
)
self$logger$list_of_summaries$insource$summarize()
}
)
filter_pactr$set(
"private", "deconvolute_correlation",
function(group_1, cluster_threshold = 0.95) {
if (nrow(group_1) <= 1) {
return(TRUE)
}
dat <- melt(group_1,
id.vars = c("Compound", "mz", "kmd"),
variable.name = "sample",
value.name = "intensity",
variable.factor = FALSE
)[
, c("Compound", "sample", "intensity")
]
data <- dcast(dat, sample ~ Compound, value.var = "intensity")
data[, c("sample") := NULL]
corr <- stats::cor(data, method = c("pearson"))
dist <- stats::dist(corr, method = "euclidian")
cluster <- stats::hclust(dist, method = "complete")
cut_tree <- stats::cutree(cluster, h = 1 - cluster_threshold)
x <- as.data.table(cut_tree, keep.rownames = "Compound")[
group_1,
on = .(Compound = Compound)
][
, keep := private$cluster_max(mz),
by = .(cut_tree)
]
return(x$keep)
}
)
# Given a list of mz values, this function will determine
# which value has the largest mz and modify its "KEEP" Column.
filter_pactr$set("private", "cluster_max", function(mz) {
keep <- rep(FALSE, length(mz))
keep[which.max(mz)] <- TRUE
return(keep)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.