#' merge.Results
#'
#' Function that merges results from different parameter settings.
#'
#' We use a four-step process to merge features from different parameter
#' settings. In step one, features detected at settings P1 and P2 are combined
#' into one list. In step two, features are grouped by a user-defined m/z
#' tolerance (5 ppm is appropriate for high resolution MS but may not be
#' suitable for lower resolution instruments; for the LTQ-FT/MS, examination of
#' m/z tolerance shows little difference between 5 and 10 ppm). In step three,
#' features are further sub-grouped based on a user-defined retention time
#' tolerance. Users are recommended to use the find.Overlapping.mzs function
#' below to optimize the retention time tolerance threshold. In step four, a
#' paired t-test & a Spearman correlation test is used to compare the intensity
#' levels of the metabolites only for the redundant features that have m/z and
#' retention time within defined tolerance levels as described above. Features
#' with minimum median PID (or median CV; for more than two technical
#' replicates) are chosen as representatives of each sub-group, and added to
#' the final list. This scheme allows identification of unique features, and
#' selection of the most consistent feature as a representative for features
#' that overlap.
#'
#' @param dataA feature alignment output matrix from apLCMS or XCMS with
#' intensities at parameter settings P1
#' @param dataB feature alignment output matrix from apLCMS or XCMS with
#' intensities at parameter settings P2
#' @param feat.eval.A feature evaluations results from evaluate.Features for
#' results at parameter settings P1
#' @param feat.eval.B feature evaluations results from evaluate.Features for
#' results at parameter settings P2
#' @param max.mz.diff +/- mz tolerance in ppm for feature matching
#' @param max.rt.diff retention time tolerance for feature matching
#' @param merge.eval.pvalue Threshold for defining signifcance level of the
#' paired t-test or the Pearson correlation during the merge stage in
#' xMSanalyzer. The p-value is used to determine whether two features with same
#' m/z and retention time have identical intensity profiles.
#' @param mergecorthresh Correlation threshold to be used during the merge
#' stage in xMSanalyzer to determine whether two features with same m/z and
#' retention time have identical intensity profiles.
#' @param alignment.tool Name of the feature alignment tool eg: "apLCMS" or
#' "XCMS"
#' @param numnodes Number of computing nodes to use. Default: 1
#' @param mult.test.cor Should Bonferroni multiple testing correction method be
#' applied for comparing intensities of overlapping m/z? Default: FALSE
#' @param missingvalue How are missing values represented. eg: 0 or NA
#' @return Returns a matrix of columns of unique m/z values, elution times,
#' signal strengths in each spectrum after merging results
#' @author Karan Uppal <kuppal2@@emory.edu>
#' @keywords ~merge
merge.Results <- function(dataA, dataB, feat.eval.A,
feat.eval.B, max.mz.diff = 15, max.rt.diff = 300,
merge.eval.pvalue = 0.05, alignment.tool = "apLCMS",
numnodes = 1, mult.test.cor = FALSE, mergecorthresh = 0.7,
missingvalue = 0) {
dataA <- unique(dataA)
dataB <- unique(dataB)
if (alignment.tool == "apLCMS") {
sample.col.start = 5
dataA <- cbind(dataA, feat.eval.A[, c(sample.col.start:(dim(feat.eval.A)[2]))])
dataB <- cbind(dataB, feat.eval.B[, c(sample.col.start:(dim(feat.eval.B)[2]))])
curdata <- rbind(dataA, dataB)
curdata <- curdata[order(curdata$mz), ]
curdata <- as.data.frame(curdata)
npeaks <- apply(curdata[, sample.col.start:(dim(curdata)[2] -
8)], 1, countpeaks)
curdatatemp <- cbind(curdata[, c(1:4)], npeaks)
curdata <- cbind(curdatatemp, curdata[, sample.col.start:dim(curdata)[2]])
sample.col.start = 6
curdata <- as.data.frame(curdata)
} else {
if (alignment.tool == "XCMS") {
sample.col.start = 9
cnames <- colnames(dataA)
cnames[1] = "mz"
cnames[4] = "time"
colnames(dataA) = cnames
colnames(dataB) = cnames
dataA <- cbind(dataA, feat.eval.A[, c(sample.col.start:(dim(feat.eval.A)[2]))])
dataB <- cbind(dataB, feat.eval.B[, c(sample.col.start:(dim(feat.eval.B)[2]))])
curdata <- rbind(dataA, dataB)
curdata <- curdata[order(curdata$mz),
]
curdata <- as.data.frame(curdata)
# curdata<-curdata[-which(curdata$min==curdata$max),]
} else {
stop(paste("Invalid value for alignment.tool. Please use either \"apLCMS\" or \"XCMS\"",
sep = ""))
}
}
# dataA<-dataA[order(dataA$mz),]
# dataB<-dataB[order(dataB$mz),]
sub_data_a <- new("list")
sub_data_b <- new("list")
lindex <- 1
min_mz <- min(curdata[, 1])
max_mz <- max(curdata[, 1])
mz_group = 10
mzdefect <- 1 * ((curdata$mz - floor(curdata$mz)))
# curdata<-cbind(mzdefect,curdata)
curdata <- as.data.frame(curdata)
curdata <- unique(curdata)
max_rt_thresh <- max(curdata$time, na.rm = TRUE)
# d1<-density(mzdefect,bw='nrd',from=min(mzdefect),to=(0.01+max(mzdefect)))
# sub_data_a<-sapply(list(myData1=curdata),function(x)
# split(x,cut(curdata$mz,breaks=seq(min(curdata$mz)-0.001,max(curdata$mz)+d1$bw,d1$bw))))
# d1$bw=d1$bw*2
# sub_data_a<-sapply(list(myData1=curdata),function(x)
# split(x,cut(curdata$mzdefect,breaks=seq(0,1,0.03))))
# i<-2 j<-3
# 10^6*abs((850+max(sub_data_a[[i]][,1]))-(850+min(sub_data_a[[j]][,1])))/(850+min(sub_data_a[[j]][,1]))
# lindex<-length(sub_data_a)
diff_mz_num = 1
# Step 1 Group features by m/z
mz_groups <- lapply(1:dim(curdata)[1], function(j) {
commat = {
}
diffmz = new("list")
ppmb = (500) * (curdata$mz[j]/1e+06)
getbind_same <- which(abs(curdata$mz - curdata$mz[j]) <=
ppmb)
diffmz[[diff_mz_num]] = getbind_same #dataA[getbind_same,]
diff_mz_num = diff_mz_num + 1
return(diffmz)
})
options(warn = -1)
del_list <- {
}
for (m in 1:length(mz_groups)) {
if ((m %in% del_list) == FALSE) {
for (n in (m + 1):length(mz_groups)) {
if (n > length(mz_groups)) {
break
}
com1 <- intersect(mz_groups[[m]][[1]],
mz_groups[[n]][[1]])
if (length(com1) > 0) {
mz_groups[[m]][[1]] <- c(mz_groups[[m]][[1]],
mz_groups[[n]][[1]])
del_list <- c(del_list, n)
}
}
mz_groups[[m]][[1]] <- unique(mz_groups[[m]][[1]])
}
}
if (length(del_list) > 0) {
mz_groups <- mz_groups[-del_list]
}
mz_groups <- unique(mz_groups)
sub_data_a <- lapply(1:length(mz_groups), function(j) {
commat = {
}
# diffmz=new('list')
getbind_same = mz_groups[[j]][[1]]
diffmz = curdata[getbind_same, ]
return(diffmz)
})
rm(mz_groups)
lindex <- length(sub_data_a)
sub_data_a <- unique(sub_data_a)
if (lindex > 2) {
cl <- makeSOCKcluster(numnodes)
clusterEvalQ(cl, "merge.Results.child.ttest")
clusterEvalQ(cl, "compare_intensities_ttest")
merge.res <- parLapply(cl, sub_data_a, merge.Results.child.ttest,
max.mz.diff = max.mz.diff, max.rt.diff = max.rt.diff,
merge.eval.pvalue = merge.eval.pvalue,
alignment.tool = alignment.tool, mult.test.cor = mult.test.cor,
mergecorthresh = mergecorthresh, col.rm.index = NA,
missingvalue = missingvalue)
if (FALSE) {
merge.res <- new("list")
for (lsub in 1:length(sub_data_a)) {
tempres <- merge.Results.child.ttest(dataA = sub_data_a[[lsub]],
max.mz.diff = max.mz.diff, max.rt.diff = max.rt.diff,
merge.eval.pvalue = merge.eval.pvalue,
alignment.tool = alignment.tool,
mult.test.cor = mult.test.cor, mergecorthresh = mergecorthresh,
col.rm.index = NA, missingvalue = missingvalue)
merge.res[[lsub]] <- tempres
}
}
stopCluster(cl)
# save(merge.res,file='mergeres.Rda')
end_ind <- dim(curdata)[2] - 8
l1 <- lapply(1:length(merge.res), function(x) {
nrow(merge.res[[x]])
})
l2 <- unlist(l1)
if (length(which(l2 < 1)) > 0) {
merge.res <- merge.res[-which(l2 < 1)]
}
l1 <- lapply(1:length(merge.res), function(x) {
is.na(merge.res[[x]][1])[1]
})
l2 <- unlist(l1)
if (length(which(l2 == TRUE)) > 0) {
merge.res <- merge.res[-which(l2 == TRUE)]
}
final.res = {
}
# final.res<-ldply(merge.res,rbind) final.res={}
for (s in 1:length(merge.res)) {
tempd <- as.data.frame(merge.res[[s]])
final.res <- rbind(final.res, tempd)
}
final.res <- unique(final.res)
} else {
final.res <- merge.Results.child.ttest(curdata,
max.mz.diff = max.mz.diff, max.rt.diff = max.rt.diff,
merge.eval.pvalue = merge.eval.pvalue,
alignment.tool = alignment.tool, mult.test.cor = mult.test.cor,
mergecorthresh = mergecorthresh, col.rm.index = NA,
missingvalue = missingvalue)
}
final.res <- as.data.frame(final.res)
# final.res<-na.omit(final.res)
final.res <- unique(final.res)
final.res <- final.res[order(final.res$mz), ]
# if(FALSE)
{
curdata <- final.res
curdata <- as.data.frame(curdata)
# system.time(global_cor<-WGCNA::cor(t(curdata[,c(sample.col.start:end_ind)]),nThreads=numnodes,method='spearman',use
# = 'p'))
# count_cor<-lapply(1:dim(curdata)[1],function(j){length(which(global_cor[j,]>=0.9))})
# count_cor<-lapply(1:dim(curdata)[1],function(j){diff_rt<-abs(curdata[j,2]-curdata[,2]);
# length(which(global_cor[j,]>=0.9 &
# diff_rt<10))})
count_cor <- lapply(1:dim(curdata)[1], function(j) {
diff_mz <- 10^6 * abs(curdata[j, 1] -
curdata[, 1])/curdata[j, 1]
diff_rt <- abs(curdata[j, 2] - curdata[,
2])
length(which(diff_mz < max.mz.diff & diff_rt <
90))
})
count_cor <- unlist(count_cor)
# print('length of count_cor')
# print(length(count_cor))
if (length(which(count_cor <= 1)) > 0) {
non_cor_data <- curdata[which(count_cor <=
1), ]
curdata <- curdata[-which(count_cor <=
1), ]
# global_cor<-global_cor[-which(count_cor<=1),-which(count_cor<=1)]
}
if (dim(curdata)[1] > 0) {
# print('dim of curdata 2nd round check')
# print(dim(curdata))
final.res2 <- merge.Results.child.ttest(curdata,
max.mz.diff = max.mz.diff, max.rt.diff = max.rt.diff,
merge.eval.pvalue = merge.eval.pvalue,
alignment.tool = alignment.tool, mult.test.cor = mult.test.cor,
mergecorthresh = mergecorthresh, col.rm.index = NA,
missingvalue = missingvalue)
# final.res2<-na.omit(final.res2)
final.res2 <- unique(final.res2)
final.res <- rbind(final.res2, non_cor_data)
final.res <- as.data.frame(final.res)
final.res <- final.res[order(final.res$mz),
]
}
curdata <- final.res
# rm(final.res)
curdata <- replace(as.matrix(curdata), which(curdata ==
0), NA)
system.time(global_cor <- WGCNA::cor(t(curdata[,
c(sample.col.start:end_ind)]), nThreads = numnodes,
use = "p"))
count_cor <- lapply(1:dim(curdata)[1], function(j) {
diff_mz <- 10^6 * abs(curdata[j, 1] -
curdata[, 1])/curdata[j, 1]
diff_rt <- abs(curdata[j, 2] - curdata[,
2])
length(which(diff_mz < max.mz.diff & global_cor[j,
] >= 0.9))
})
count_cor <- unlist(count_cor)
# print('length of count_cor')
# print(length(count_cor))
if (length(which(count_cor <= 1)) > 0) {
non_cor_data <- curdata[which(count_cor <=
1), ]
curdata <- curdata[-which(count_cor <=
1), ]
# global_cor<-global_cor[-which(count_cor<=1),-which(count_cor<=1)]
}
if (dim(curdata)[1] > 0) {
# print('dim of curdata 2nd round check')
# print(dim(curdata))
curdata <- as.data.frame(curdata)
final.res2 <- merge.Results.child.ttest(curdata,
max.mz.diff = max.mz.diff, max.rt.diff = max_rt_thresh,
merge.eval.pvalue = merge.eval.pvalue,
alignment.tool = alignment.tool, mult.test.cor = mult.test.cor,
mergecorthresh = mergecorthresh, col.rm.index = NA,
missingvalue = NA)
rm(curdata)
# final.res2<-na.omit(final.res2)
final.res2 <- unique(final.res2)
print("here2")
final.res2 <- replace(as.matrix(final.res2),
which(is.na(final.res2) == TRUE),
0)
final.res <- rbind(final.res2, non_cor_data)
final.res <- as.data.frame(final.res)
final.res <- final.res[order(final.res$mz),
]
}
}
if (is.na(missingvalue) == FALSE) {
final.res <- replace(as.matrix(final.res),
which(is.na(final.res) == TRUE), missingvalue)
}
final.res <- as.data.frame(final.res)
print("dim of final res")
print(dim(final.res))
options(warn = 0)
return(final.res)
# return(sub_data_a)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.