#' find.Overlapping.mzs
#'
#' This function matches features between two datasets using the following user
#' defined criteria: 1) Maximum m/z difference (+/-) ppm 2) Maximum retention
#' time difference in seconds
#'
#' The find.Overlapping.mzs function operates on two sets of feature lists with
#' m/z and retention times for each feature, denoted by L1 and L2, and iterates
#' over all m/z values in L1 to find those that are within a user defined m/z
#' (ppm) and retention time (sec) threshold in L2. Optionally, the user can
#' match features based on only the m/z values by setting time.thresh=NA. The
#' find.Unique.mzs function uses a similar algorithm to find unique features
#' that are not within a user defined mass and retention time threshold level.
#'
#' @param dataA apLCMS or XCMS feature table for dataset A
#' @param dataB apLCMS or XCMS feature table for dataset B
#' @param mz.thresh Maximum m/z difference (+/-) ppm. eg: 10
#' @param time.thresh Maximum retention time difference (+/-) secs. eg: 300
#' @param alignment.tool Name of the feature alignment tool eg: "apLCMS" or
#' "XCMS" or "NA" Use "NA" if the input matrix includes only m/z or both m/z
#' and retnetion time values.
#' @return Matrix of overlapping features with columns: index.data.A: index of
#' overlapping m/z in dataset A mz.data.A: m/z in dataset A time.data.A:
#' retention time in dataset A index.data.B: index of overlapping m/z in
#' dataset B mz.data.B: m/z in dataset B time.data.B: retention time in dataset
#' B
#' @author Karan Uppal <kuppal2@@emory.edu>
#' @seealso apLCMS.align, XCMS.align, find.Unique.mzs, getVenn
#' @keywords ~overlap ~common
find.Overlapping.mzs <- function(dataA, dataB, mz.thresh = 10,
time.thresh = NA, alignment.tool = NA) {
data_a <- as.data.frame(dataA)
data_b <- as.data.frame(dataB)
# data_a<-unique(data_a)
rm(dataA)
rm(dataB)
# data_b<-unique(data_b)
com_mz_num = 1
unique_mz = {
}
ppm_v = {
}
rt_v = {
}
commat = {
}
col.names.dataA = colnames(data_a)
col.names.dataB = colnames(data_b)
if (is.na(alignment.tool) == FALSE) {
if (alignment.tool == "apLCMS") {
sample.col.start = 5
} else {
if (alignment.tool == "XCMS") {
sample.col.start = 9
col.names.dataA[1] = "mz"
col.names.dataA[2] = "time"
col.names.dataB[1] = "mz"
col.names.dataB[2] = "time"
colnames(data_a) = col.names.dataA
colnames(data_b) = col.names.dataB
}
}
} else {
# stop(paste('Invalid value for alignment.tool.
# Please use either \'apLCMS\' or \'XCMS\'',
# sep=''))
col.names.dataA[1] = "mz"
col.names.dataB[1] = "mz"
if (is.na(time.thresh) == FALSE) {
col.names.dataA[2] = "time"
col.names.dataB[2] = "time"
print("Using the 1st columns as \"mz\" and 2nd columsn as \"retention time\"")
} else {
print("Using the 1st columns as \"mz\"")
}
colnames(data_a) = col.names.dataA
colnames(data_b) = col.names.dataB
}
# data_a<-data_a[order(data_a$mz),]
# data_b<-data_b[order(data_b$mz),]
data_a <- as.data.frame(data_a)
data_b <- as.data.frame(data_b)
colnames(data_a) = col.names.dataA
colnames(data_b) = col.names.dataB
# create header for the matrix with common
# features
if (is.na(time.thresh) == FALSE) {
mznames = c("index.A", "mz.data.A", "time.data.A",
"index.B", "mz.data.B", "time.data.B",
"time.difference")
} else {
mznames = c("index.A", "mz.data.A", "index.B",
"mz.data.B")
}
# Step 1 Group features by m/zdim(data_a)[1]
mz_groups <- lapply(1:dim(data_a)[1], function(j) {
commat = {
}
commzA = new("list")
commzB = new("list")
ppmb = (mz.thresh) * (data_a$mz[j]/1e+06)
getbind_same <- which(abs(data_b$mz - data_a$mz[j]) <=
ppmb)
if (is.na(time.thresh) == FALSE) {
if (length(getbind_same) > 0) {
nearest_time_diff = 10000
bestmatch = {
}
rnames = {
}
temp = {
}
commat = {
}
for (comindex in 1:length(getbind_same)) {
tempA = cbind(j, data_a[j, c(1,
2)])
tempB = cbind(getbind_same[comindex],
data_b[getbind_same[comindex],
c(1, 2)])
temp = cbind(tempA, tempB)
timediff = abs(data_a[j, 2] - data_b[getbind_same[comindex],
2])
temp <- cbind(temp, timediff)
if (timediff < time.thresh && timediff <=
nearest_time_diff) {
bestmatch = as.data.frame(temp)
nearest_time_diff = timediff
temp <- as.data.frame(temp)
commat <- rbind(commat, temp)
rnamestemp <- paste("mz", j, "_",
comindex, sep = "")
rnames <- c(rnames, rnamestemp)
}
}
# commat=as.data.frame(bestmatch)
if (length(commat) >= 4) {
rownames(commat) = rnames
}
}
} else {
if (length(getbind_same) > 0) {
temp1 <- {
}
for (comindex in 1:length(getbind_same)) {
tempA = cbind(j, data_a[j, c(1)])
tempB = cbind(getbind_same[comindex],
data_b[getbind_same[comindex],
c(1)])
temp = cbind(tempA, tempB)
# temp=cbind(data_a[j,c(1)],data_b[getbind_same[comindex],c(1)])
temp1 <- rbind(temp1, temp)
}
commat = as.data.frame(temp1)
rnames <- paste("mz", j, "_", seq(1,
length(getbind_same)), sep = "")
rownames(commat) = rnames
}
}
return(as.data.frame(commat))
})
# Step 2 Sub-group features from Step 1 by
# Retention time find the features with RT values
# within the defined range as compared to the
# query feature
uniqueinA = {
}
uniqueinB = {
}
commat = data.frame()
if (length(mz_groups) > 0) {
for (j in 1:length(mz_groups)) {
temp_diff = {
}
if (is.list(mz_groups) == TRUE) {
tempdata = mz_groups[[j]]
} else {
tempdata = mz_groups[j]
}
if (length(tempdata) > 1) {
colnames(tempdata) = mznames
tempdata = as.data.frame(t(tempdata))
temp = tempdata
temp = as.data.frame(temp)
if (is.null(commat) == TRUE) {
commat = t(temp)
} else {
commat = rbind(commat, t(temp))
}
}
}
if (is.null(dim(commat)) == FALSE) {
commat = as.data.frame(commat)
}
}
return(commat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.