test_fnames <- system.file(c('cdf/WT/wt15.CDF', 'cdf/WT/wt16.CDF'), package = "faahKO")
test_fname <- test_fnames[1]
test_basenames <- sapply(test_fnames, function(fname) {
strsplit(basename(fname), split = "[.]")[[1]][1]
}, USE.NAMES = F)
test_basename <- test_basenames[1]
data_dir <- "~/Documents/Projects/massFlowR/packageDEV/unittest"
# data_dir <- file.path(system.file(package = "massFlowR"), "testdata/")
####---- single datafile preparation
cwt <- xcms::CentWaveParam(ppm = 25,
snthresh = 10,
noise = 1000,
prefilter = c(3, 100),
peakwidth = c(30, 80),
verboseColumns = TRUE,
mzdiff = 0)
test_raw <- MSnbase::readMSData(files = test_fname, mode = "onDisk")
test_res <- xcms::findChromPeaks(object = test_raw, param = cwt)
test_pks <- data.frame(xcms::chromPeaks(test_res))
## arrange by peak intensity and give a peak number
test_pks_rd <- test_pks[order(test_pks$into, decreasing = T), ]
test_pks_rd$peakid <- 1:nrow(test_pks_rd)
## remove artefactural, duplicating peaks
pks_unique <- unique(test_pks_rd[,c("mz", "rt")])
test_pks_rd <- lapply(1:nrow(pks_unique),
FUN = cleanPEAKS,
dt_unique = pks_unique,
dt = test_pks_rd)
test_pks_rd <- do.call("rbindCLEAN", test_pks_rd)
test_pks_rd$peakid <- 1:nrow(test_pks_rd)
test_eic_rd <- xcms::chromatogram(
test_raw,
rt = data.frame(rt_lower = test_pks_rd$rtmin,
rt_upper = test_pks_rd$rtmax),
mz = data.frame(mz_lower = test_pks_rd$mzmin,
mz_upper = test_pks_rd$mzmax)
)
test_eic_rd <- lapply(1:nrow(test_eic_rd), function(ch) {
MSnbase::clean(test_eic_rd[ch, ], na.rm = T)})
## prepare metadata
metadata <- data.frame(filename = test_basenames,
run_order = 1:2,
raw_filepath = test_fnames,
proc_filepath = paste0(file.path(data_dir, test_basenames), "_peakgrs.csv"),
stringsAsFactors = F
)
write.csv(metadata, file.path(data_dir, "metadata.csv"), quote = F, row.names = FALSE)
meta_fname <- file.path(data_dir, "metadata.csv")
####---- multiple datafile prepation with the pipeline
groupPEAKS(file = meta_fname, out_dir = data_dir, cwt = cwt)
grouped_fnames <- paste0(file.path(data_dir, test_basenames), "_peakgrs.csv")
# Duplicated tables -----------------------------------------------------------------------------------------------
## write two duplicated csv for sample wt15
single_table <- read.csv(grouped_fnames[1], header = T, stringsAsFactors = F)
dup_basenames <- c("test_file1", "test_file2")
dup_fnames <- paste0(file.path(data_dir, dup_basenames), "_peakgrs.csv")
write.csv(single_table, dup_fnames[1], quote = F, row.names = FALSE)
write.csv(single_table, dup_fnames[2], quote = F, row.names = FALSE)
metadata_dup <- data.frame(filename = dup_basenames,
run_order = 1:2,
raw_filepath = rep(test_fnames[1], 2),
proc_filepath = dup_fnames
)
write.csv(metadata_dup, file.path(data_dir, "metadata_dup.csv"), quote = F, row.names = FALSE)
dup_meta_fname <- file.path(data_dir, "metadata_dup.csv")
# Noisy tables -----------------------------------------------------------------------------------------------------
## write a sligthly modified peak table for sample wt15
## take the biggest peakgroup and add it again with modified peaks
biggest_pkg <- order(table(single_table$peakgr), decreasing = T)[1]
## take only half of the peaks in the biggest peakgr
biggest <- single_table[which(single_table$peakgr == biggest_pkg), ]
biggest <- biggest[1:(nrow(biggest)/2), ]
## make noisy table, which contains all original peakgrs, but the biggest
noisy <- rbind(single_table[single_table$peakgr != biggest_pkg, ],
## add (half the peaks - 1) of the biggests peakgr
biggest[-nrow(biggest), ])
noisy$noisy[which(noisy$peakgr == biggest_pkg)] <- 1
## modify mz/rt/into values of the biggest peakgr and give new peakgr id
noisy_pkg <- max(single_table$peakgr) + 1
rep_biggest <- biggest
rep_biggest$peakid <- NA # will assign new once added to the table
rep_biggest$into <- rep_biggest$into * 1.5 * 1:nrow(rep_biggest)
rt_new <- min(rep_biggest$rt) - 0.1
rep_biggest$rtmin <- rep_biggest$rtmin - (rep_biggest$rt - rt_new)
rep_biggest$rtmax <- rep_biggest$rtmax - (rep_biggest$rt - rt_new)
rep_biggest$rt <- rt_new
rep_biggest$peakgr <- noisy_pkg
rep_biggest$noisy <- 2
## add modified biggest peakgr as a new peakgr
noisy <- rbind(noisy,
rep_biggest)
## arrange by peak intensity and give a peak number
noisy <- noisy[order(noisy$into, decreasing = T), ]
noisy$peakid <- 1:nrow(noisy)
noisy_basenames <- c("test_file1", "test_file3")
noisy_fnames <- paste0(file.path(data_dir, noisy_basenames), "_peakgrs.csv")
write.csv(noisy, noisy_fnames[2], quote = F, row.names = FALSE)
noisy_metadata <-
data.frame(
filename = noisy_basenames,
run_order = 1:2,
raw_filepath = rep(test_fnames[1], 2),
proc_filepath = noisy_fnames
)
write.csv(noisy_metadata, file.path(data_dir, "metadata_noisy.csv"), quote = F, row.names = FALSE)
noisy_meta_fname <- file.path(data_dir, "metadata_noisy.csv")
# large study tables -----------------------------------------------------------------------------------------------------
large_fnames <-
list.files(file.path(system.file(package = "faahKO"), "cdf/WT/"), full.names = T)
large_basenames <- sapply(large_fnames, function(fname) {
strsplit(basename(fname), split = "[.]")[[1]][1]
}, USE.NAMES = F)
## prepare metadata
large_metadata <- data.frame(filename = large_basenames,
run_order = 1:length(large_basenames),
raw_filepath = large_fnames,
proc_filepath = paste0(file.path(data_dir, large_basenames), "_peakgrs.csv"),
stringsAsFactors = F)
write.csv(large_metadata, file.path(data_dir, "metadata_large.csv"), quote = F, row.names = FALSE)
large_meta_fname <- file.path(data_dir, "metadata_large.csv")
groupPEAKS(file = large_meta_fname, out_dir = data_dir, cwt = cwt)
# other vars -----------------------------------------------------------------------------------------------------
## list db template
## template includes three first peak-groups of sample wt15
db_fname <- file.path(data_dir, "DBtemplate.csv")
## alignPEAKS parameters
rt_err <- 10
mz_err <- 0.01
bins <- 0.01
cutoff <- 0.5
min_samples_prop <- 0.3
## prep for the default 2-core implementation for low-level un-exported functions
ncores <- 2
doParallel::registerDoParallel(cores = ncores)
# massFlowAnno ------------------------------------------------------------
# anno_dir <- file.path(system.file(package = "massFlowR"), "testdata/")
# db_file <- file.path(anno_dir, "database.csv")
# ds_file <- file.path(anno_dir, "devset_filled_intensity_data.csv")
# meta_file <- file.path(anno_dir, "metadata.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.