context("helper functions for alignPEAKS method")
# checkFILE---------------------------------------------------------------------------------------------------
test_that("Checking grouped peaks csv tables via checkFILE is correct", {
## correct file
checked_file <- massFlowR:::checkFILE(file = grouped_fnames[[1]])
correct_file <-
read.csv(file = grouped_fnames[[1]], stringsAsFactors = F)
expect_equal(checked_file, correct_file)
## wrong filepath
wrong_filepath <- gsub(".csv", "", grouped_fnames[[1]])
expect_error(
massFlowR:::checkFILE(file = wrong_filepath),
paste("incorrect filepath for:", wrong_filepath)
)
## wrong column names
wrong_file <-
setNames(correct_file, nm = c("peakID", "MZ", names(correct_file)[3:ncol(correct_file)]))
wrong_file_filepath <-
gsub(".csv", "wrong.csv", grouped_fnames[[1]])
write.csv(wrong_file, file = wrong_file_filepath, row.names = F)
expected_error <-
paste("incorrect file:",
wrong_file_filepath,
"\n",
"missing columns: peakid, mz")
expect_error(massFlowR:::checkFILE(file = wrong_file_filepath),
expected_error)
unlink(x = wrong_file_filepath)
})
# do_alignPEAKS -----------------------------------------------------------
test_that("do_alignPEAKS() returns correct output ", {
do_alignPEAKS_out <- do_alignPEAKS(ds = single_table, tmp = single_table, ds_var_name = "peakgr", tmp_var_name = "peakgr", mz_err = mz_err, rt_err = rt_err, bins = bins, ncores = ncores)
expect_equal(length(do_alignPEAKS_out), length(unique(single_table$peakgr)))
expect_true(all(sapply(lapply(do_alignPEAKS_out, names), function(m) all(m == c("ds", "tmp", "mat","cos")))))
expect_true(all(unlist(lapply(do_alignPEAKS_out, "[[", "cos")) == 1))
})
# getRTbins ---------------------------------------------------------------------------------------------------------
test_that("getRTbins() correctly splits two dataframes to rt bins", {
## make dummy table:
## 20 peaks, 4 peakgroups: peakgroups number 2 and 3 are very close
## split dummy table into two rt bins
## as number 2 and 3 are very close, they will be included in both bin 1 and 2
dummy <- data.frame(peakid = 1:20,
peakgr = rep(1:4, each = 5),
rt = rep(c(100, 299, 301, 400), each = 5))
ncores <- 2
## use the same table as both ds and tmp
out <-
getRTbins(
ds = dummy,
tmp = dummy,
ds_var_name = "peakgr",
tmp_var_name = "peakgr",
mz_err = mz_err,
rt_err = rt_err,
ncores = ncores
)
## number of created rt bins equals ncores
expect_true(all(length(out$ds) == ncores, length(out$tmp) == ncores))
## all peakids are exported
expect_true(all(dummy$peakid %in% c(out$tmp[[1]]$peakid, out$tmp[[2]]$peakid)))
expect_true(all(dummy$peakid %in% c(out$ds[[1]]$peakid, out$ds[[2]]$peakid)))
## since ds and tmp are identical, all peaks in the ds bin must also be present in the corresponding tmp bin
expect_true(all(
out$ds[[1]]$peakid %in% out$tmp[[1]]$peakid,
out$ds[[2]]$peakid %in% out$tmp[[2]]$peakid
))
## obtained tmp bins must overlap for common rt values
rt_bin_1 <- c(min(out$tmp[[1]]$rt - rt_err),
max(out$tmp[[1]]$rt + rt_err))
rt_bin_2 <- c(min(out$tmp[[2]]$rt - rt_err),
max(out$tmp[[2]]$rt + rt_err))
peakgrs_in_common <-
dummy$peakgr[which((dummy$rt - rt_err) >= rt_bin_2[1] &
(dummy$rt + rt_err) <= rt_bin_1[2])]
peaks_in_common <- dummy$peakid[which(dummy$peakgr %in% peakgrs_in_common)]
expect_identical(peaks_in_common,
base::intersect(out$tmp[[1]]$peakid, out$tmp[[2]]$peakid))
})
# orderBYrt ---------------------------------------------------------------------------------------------------------
test_that("orderBYrt() correctly orders dataset using rt", {
## create sample dataset
dt <- data.frame(
peakid = c(3, 9, 21, 36, 46, 78, 81, 93, 124, 149, 4, 17, 52, 62, 67, 119, 123, 6, 19, 20, 60, 101, 186, 13, 106, 117, 118, 1, 10, 35, 2, 12, 50, 5, 18, 63, 8, 22, 68, 11, 33, 87, 7, 24),
mz = c(343, 365, 344, 366, 381, 345, 388.100006103516, 362.100006103516, 382, 444.100006103516, 524.200012207031, 525.200012207031, 526.200012207031, 249.100006103516, 255.199996948242, 282.200012207031, 309.200012207031, 522.200012207031, 531.200012207031, 523.200012207031, 389.200012207031, 344.200012207031, 557.100036621094, 360, 431.100006103516, 370.100006103516, 416.100006103516, 508.200012207031, 509.200012207031, 367.200012207031, 496.200012207031, 497.200012207031, 498.200012207031, 526.100036621094, 527.100036621094, 385.100006103516, 522.200012207031, 523.200012207031, 524.200012207031, 496.200012207031, 497.200012207031, 498.200012207031, 502.100006103516, 503.100006103516),
rt = c(2678.218, 2679.783, 2678.218, 2679.783, 2679.783, 2678.218, 2678.218, 2678.218, 2676.653, 2679.783, 3662.573, 3662.573, 3662.573, 3662.573, 3661.009, 3662.573, 3661.009, 3344.888, 3344.888, 3344.888, 3343.323, 3344.888, 3343.323, 2684.478, 2684.478, 2682.913, 2684.478, 3515.468, 3517.033, 3517.033, 3384.012, 3384.012, 3384.012, 3168.048, 3168.048, 3168.048, 3409.051, 3409.051, 3409.051, 3316.719, 3316.719, 3316.719, 3157.093, 3157.093),
into = c(26239739.4908571, 15389161.888963, 5872251.05168, 3427455.12957693, 2193772.47438462, 945843.066319147, 866478.105453334, 733169.002746268, 463242.195000001, 277061.806153847, 26223613.1127728, 7829455.07127275, 1933029.53740001, 1307022.7299762, 1168879.00468966, 515198.332384615, 492462.693627908, 23482537.5170256, 7268277.8061628, 6909134.43804878, 1374905.10836585, 672994.049432435, 122603.611878788, 10062819.6947222, 637630.751882354, 525470.723055555, 523740.547333333, 53910492.9448513, 15123847.3582767, 3493079.33254168, 38200390.2189089, 10219380.1002272, 1972152.95488889, 25760336.3028836, 7615134.77321737, 1269534.71757576, 19327938.1211429, 5775851.12990477, 1156396.91788889, 13639592.8685714, 3764370.88609524, 780206.794606061, 20113368.9417143, 5122895.84110345),
peakgr = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 1, 1, 1, 2, 2, 2, 5, 5, 5, 8, 8, 8, 9, 9, 9, 7, 7)
)
## get peak-group order by their median rt
dt_vars <- unique(dt$peakgr)
dt_rt_med <- sapply(dt_vars, function(x) {
median(dt[dt$peakgr == x, "rt"])
})
expect_true(all(sapply(2:length(dt_rt_med), function(x) {
dt_rt_med[order(dt_rt_med)][x] > dt_rt_med[order(dt_rt_med)][x-1]
})))
dt_var_levels <- factor(dt[, "peakgr"], levels = dt_vars[order(dt_rt_med)])
dt_expected <- dt[order(dt_var_levels),]
## run function
dt_ordered <- orderBYrt(dt = dt, var_name = "peakgr")
## compare
expect_identical(dt_expected, dt_ordered)
dt_rt_ordered_med <- sapply(unique(dt_ordered$peakgr), function(x) {
median(dt_ordered[dt_ordered$peakgr == x, "rt"])
})
expect_true(all(sapply(2:length(dt_rt_ordered_med), function(x) {
dt_rt_ordered_med[x] > dt_rt_ordered_med[x - 1]
})))
expect_true(all(sapply(unique(dt_ordered$peakgr), function(x) {
all(diff(order(dt_ordered[dt_ordered$peakgr == x, "peakid"])) == 1)
})))
})
# addERRS ---------------------------------------------------------------------------------------------------------
test_that("addERR adds mz/rt windows correctly", {
dt <- test_pks_rd
dt[, c("mz_l",
"mz_h",
"rt_l",
"rt_h")] <- c(dt$"mz" - mz_err,
dt$"mz" + mz_err,
dt$"rt" - rt_err,
dt$"rt" + rt_err)
addERRS_out <-
addERRS(dt = test_pks_rd,
mz_err = mz_err,
rt_err = rt_err)
expect_identical(dt, addERRS_out)
expect_true(addERRS_out[1, "mz_l"] == (test_pks_rd[1, "mz"] - mz_err))
})
# getCOSmat ---------------------------------------------------------------------------------------------------------
test_that("getCOSmat() returns correct cosine matrix", {
####---- compare the noisy peakgr and its original peakgr between two ALMOST identical tables ----####
tmp <- read.csv(noisy_fnames[1],
header = T,
stringsAsFactors = F)
doi <- read.csv(noisy_fnames[2],
header = T,
stringsAsFactors = F)
rt_bins <- getRTbins(ds = doi,
tmp = tmp,
ds_var_name = "peakgr",
tmp_var_name = "peakgr",
mz_err = mz_err,
rt_err = rt_err,
ncores = 2)
doi_bins <- rt_bins$ds
tmp_bins <- rt_bins$tmp
doi_vars_by_bins <- lapply(doi_bins, function(x) unique(x$peakgr))
bin <- which(sapply(doi_vars_by_bins, function(x) noisy_pkg %in% x))
ds_bin <- doi_bins[[bin]]
tmp_bin <- tmp_bins[[bin]]
cos_matches <- getCOSmat(
ds_bin = doi_bins[[bin]],
ds_var = "peakgr",
tmp_bin = tmp_bins[[bin]],
tmp_var = "peakgr",
mz_err = mz_err,
rt_err = rt_err,
bins = bins
)
cos_mat <- cos_matches[[1]]
####---- 30 peak-group pairs have cos of 1
expect_true(table(cos_mat[which(cos_mat > 0)])["1"] == 30)
####---- those that have cos < 1 are the noisy peakgroups
r_names <- names(Filter(length, apply(cos_mat, 1, function(x) {
which(x > 0 & x < 1)
})))
c_names <- names(Filter(length, apply(cos_mat, 2, function(x) {
which(x > 0 & x < 1)
})))
## in tmp, the peakgroup from which two noisy peakgrs were generated in the later file is biggest_pkg (ie 19)
## in doi, the noisy peakgroups generated from tmp peakgr are noisy_pkg (ie 63) and biggest_pkg( ie 19)
## cos mat rownames correspond to tmp peakgroups and colnames correspond to doi peakgroups
expect_true(r_names == biggest_pkg)
expect_true(all(c_names == c(noisy_pkg, biggest_pkg)))
####---- identical tables ----####
doi <- read.csv(grouped_fnames[1],
header = T,
stringsAsFactors = F)
tmp <- read.csv(grouped_fnames[1],
header = T,
stringsAsFactors = F)
tmp <- tmp[, c("peakid", "mz", "rt", "into", "peakgr")]
rt_bins <- getRTbins(ds = doi,
tmp = tmp,
ds_var_name = "peakgr",
tmp_var_name = "peakgr",
mz_err = mz_err,
rt_err = rt_err,
ncores = 1)
doi_bins <- rt_bins$ds
tmp_bins <- rt_bins$tmp
doi_vars_by_bins <- lapply(doi_bins, function(x) unique(x$peakgr))
cos_matches <- getCOSmat(
ds_bin = doi_bins[[bin]],
ds_var = "peakgr",
tmp_bin = tmp_bins[[bin]],
tmp_var = "peakgr",
mz_err = mz_err,
rt_err = rt_err,
bins = bins
)
cos_mat <- cos_matches[[1]]
## since doi and tmp are identical, all cos for corresponding peakgroups should be 1
expect_true(table(cos_mat[which(cos_mat > 0)])["1"] == length(unique(doi$peakgr)))
r_names <- names(Filter(length, apply(cos_mat, 1, function(x) {
which(x > 0)
})))
c_names <- names(Filter(length, apply(cos_mat, 2, function(x) {
which(x > 0)
})))
expect_true(all(r_names == c_names))
####---- different tables ----####
tmp <- read.csv(grouped_fnames[1],
header = T,
stringsAsFactors = F)
doi <- read.csv(grouped_fnames[2],
header = T,
stringsAsFactors = F)
tmp <- tmp[, c("peakid", "mz", "rt", "into", "peakgr")]
ncores <- 1
rt_bins <- getRTbins(ds = doi,
tmp = tmp,
ds_var_name = "peakgr",
tmp_var_name = "peakgr",
mz_err = mz_err,
rt_err = rt_err,
ncores = ncores)
doi_bins <- rt_bins$ds
tmp_bins <- rt_bins$tmp
doi_vars_by_bins <- lapply(doi_bins, function(x) unique(x$peakgr))
cos_matches <- list(
getCOSmat(
ds_bin = doi_bins[[ncores]],
ds_var = "peakgr",
tmp_bin = tmp_bins[[ncores]],
tmp_var = "peakgr",
mz_err = mz_err,
rt_err = rt_err,
bins = bins)
)
expect_equal(length(cos_matches[[ncores]][[2]]), length(unique(doi$peakgr)))
expect_equal(nrow(cos_matches[[ncores]][[1]]), length(unique(tmp$peakgr)))
expect_equal(ncol(cos_matches[[ncores]][[1]]), length(unique(doi$peakgr)))
# expect_equal(max(cos_matches[[ncores]][[1]]), 0.99996)
expect_true(cos_matches[[ncores]][[1]]["22","27"] == max(cos_matches[[ncores]][[1]]))
})
# getMATCHES ---------------------------------------------------------------------------------------------------------
test_that("getMATCHES() ", {
tmp <- addERRS(single_table, mz_err = mz_err, rt_err = rt_err)
## target peaks are:
## 1) a peak from the same table
## 2) a modified version of the same peak, within matching range
## 3) a modified version of the same peak, outside of matching range
target_peak1 <- tmp[1, ]
target_peak2 <- tmp[1, ]
target_peak2$mz <- target_peak2$mz - (mz_err/2)
target_peak2$rt <- target_peak2$rt + (rt_err/2)
target_peak3 <- tmp[1,]
target_peak3$mz <- target_peak3$mz - (mz_err*3)
target_peak3$rt <- target_peak3$rt + (rt_err*3)
target_peaks <- rbind(target_peak1, target_peak2, target_peak3)
target_peaks <- addERRS(target_peaks, mz_err = mz_err, rt_err = rt_err)
matches <- lapply(1:nrow(target_peaks), getMATCHES, target = target_peaks, tmp = tmp, tmp_var = "peakgr", target_var = "peakgr")
expect_true(length(matches) == nrow(target_peaks))
expect_equal(sapply(matches, nrow), list(1, 1, NULL))
expect_equal(sapply(matches, "[[", "peakid"), list(tmp[1, "peakid"], tmp[1, "peakid"], NULL))
expect_equal(sapply(matches, colnames),
c(rep(list(
c(
"peakid",
"mz",
"rt",
"into",
"tmp_var",
"target_peakid",
"target_var"
)
), 2), list(NULL)))
})
# buildVECTOR ---------------------------------------------------------------------------------------------------------
test_that("buildVECTOR()", {
spec <- data.frame(mz = seq(from = 385.1000061,
to = 527.1000366,
by = bins))
spec$bin <- 1:nrow(spec)
peaks <- data.frame(
mz = c(526.1000366,
527.1000366,
386.1000061),
into = c(27029849.76,
7613856.402,
361034.734)
)
sample_vec <- buildVECTOR(spec = spec, peaks = peaks)
expect_true(all(which(sample_vec > 0) == c(101, 14101, 14201)))
sum(sample_vec[which(sample_vec > 0)])
## check for correct scaling and normalisation
spec_magn <- sqrt(sum(sqrt(peaks$into) * sqrt(peaks$into)))
expect_equal(c((sample_vec[which(sample_vec > 0)] * spec_magn) ^2),
peaks$into[order(peaks$mz)])
})
# normSPEC ----------------------------------------------------------------
test_that("normSPEC() ", {
## (A) create vector of unit length
## all intensities sum up to 1, thus normalisation doesn't affect
spec <- data.frame(into = c(
rep(0, 10),
0.0128555075994933,
rep(0, 10),
0.271109618049552,
rep(0, 10),
0.9624626283266
), mz = 1:33)
spec_norm <- normSPEC(into = spec$into)
expect_identical(length(spec_norm), nrow(spec))
expect_equal(spec_norm, spec$into)
## (B) create vector with intensities that sum to > 1
## one peak is very dominant
spec <- data.frame(into = c(
rep(0, 10),
1,
rep(0, 10),
2,
rep(0, 10),
10
), mz = 1:33)
spec_norm <- normSPEC(into = spec$into)
expect_identical(length(spec_norm), nrow(spec))
expect_true(sum(spec_norm) > 1)
spec_magn <- sqrt(sum(spec$into * spec$into))
expect_equal(c(spec_norm * spec_magn), spec$into)
})
# assignCOS -------------------------------------------------------------------------------------------------
test_that("assignCOS() correctly assigns pairs", {
## version 1: DS peak-groups are columns, columns no2 and no4 get assigned to the same tmp peak-group
cos_list <- c(0, 0, 0, 0,
0, 0.9, 0.8, 0.5,
0.5, 0, 0, 0.4,
0, 0.8, 0.7, 0.6)
cos <- matrix(cos_list, nrow = 4, ncol = 4)
cos_assigned <- massFlowR:::assignCOS(cos)
expected <- matrix(c(rep(NA, 4),
NA, TRUE, NA, NA,
TRUE, rep(NA, 3),
NA, TRUE, rep(NA, 2)),
nrow = 4, ncol = 4)
expect_equal(cos_assigned, expected)
## version2, columns 2,3,4,5,7 get assigned to peak-groups:
## columns 3 and 4 are assigned to the same
## column5 is assigned twice because cos values are identical
cos_list <- c(0, 0, 0, 0,
0, 0.9, 0.7, 0,
0, 0.1, 0, 0.9,
0, 0.1, 0.6, 0.8,
0.5, 0, 0.5, 0,
0.1, 0.2, 0, 0.4,
0, 0.85, 0.8, 0)
cos <- matrix(cos_list, nrow = 4, ncol = 7)
cos_assigned <- massFlowR:::assignCOS(cos)
expected <- matrix(c(rep(NA, 4),
NA, TRUE, rep(NA, 2),
rep(NA, 3), TRUE,
rep(NA, 3), TRUE,
TRUE, NA, TRUE, NA,
rep(NA, 4),
NA, TRUE, rep(NA, 2)),
nrow = 4, ncol = 7)
expect_equal(cos_assigned, expected)
## version3
cos_list <- c(0, 0, 0, 0, 0,
0, 0.9, 0.7, 0, 0,
0, 0.1, 0, 0.9, 0,
0, 0.1, 0.6, 0.8, 0.5,
0.5, 0, 0.5, 0, 0,
0.1, 0.2, 0, 0.4, 0,
0, 0.85, 0.8, 0, 0.9)
cos <- matrix(cos_list, nrow = 5, ncol = 7)
cos_assigned <- massFlowR:::assignCOS(cos)
expected <- matrix(c(rep(NA, 5),
NA, TRUE, NA, NA, NA,
rep(NA, 3), TRUE, NA,
rep(NA, 3), TRUE, NA,
TRUE, NA, TRUE, rep(NA, 2),
rep(NA, 5),
rep(NA, 4), TRUE),
nrow = 5, ncol = 7)
expect_equal(cos_assigned, expected)
## version4
cos_list <- c(0, 0, 0.9, 0,
0.6, 0.9, 0.7, 0,
0.7, 0, 0.8 , 0.6,
0.8, 0.85, 0, 0)
cos <- matrix(cos_list, nrow = 4, ncol = 4)
cos_assigned <- massFlowR:::assignCOS(cos)
expected <- matrix(c(NA, NA, TRUE, NA,
NA, TRUE, NA, NA,
NA, NA, TRUE, NA,
NA, TRUE, NA, NA),
nrow = 4, ncol = 4)
expect_equal(cos_assigned, expected)
})
# rankCOS ---------------------------------------------------------------------------------------------------------
test_that("rankCOS() correctly ranks cosines, giving 1 to the highest cosine", {
cos_list <- c(0, 0, 0.9, 0,
0.6, 0.9, 0.7, 0,
0.7, 0, 0.8 , 0.6,
0.8, 0.85, 0, 0)
cos <- matrix(cos_list, nrow = 4, ncol = 4)
## rank for columns
cos_ranked <- apply(cos, 2, FUN = massFlowR:::rankCOS)
expected <- matrix(c(0, 0, 1, 0,
3, 1, 2, 0,
2, 0, 1, 3,
2, 1, 0, 0),
nrow = 4, ncol = 4)
expect_equal(cos_ranked, expected)
## rank for rows
cos_ranked <- apply(cos, 1, FUN = massFlowR:::rankCOS)
expected <- matrix(c(0, 3, 2, 1,
0, 1, 0, 2,
1, 3, 2, 0,
0, 0, 1, 0),
nrow = 4, ncol = 4)
expect_equal(cos_ranked, expected)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.