##############
##
matchmzSet<-function(obj,Analyte=NULL,annotdb=NULL,ipdb=NULL,lIP=NULL,
dppm=20,mz2use="MZ",mass2use="Mass",
annotinf2add=NULL,objinf2add=NULL,groupby=NULL,chunk=200,ncl=1,collsep=";;"){
if(inherits(obj, "metaboSet")){
cat("Matching from a metaboSet\n")
dfIn=obj$Annot
if(is.null(Analyte)) Analyte=obj$Analyte
lanalytes=Analyte[Analyte%in%obj$Analyte]
}else if(inherits(obj,"data.frame")){
cat("Matching from a data.frame\n")
dfIn=obj
if(is.null(rownames(dfIn))) rownames(dfIn)=paste("Ana",1:nrow(dfIn),sep="")
if(any(duplicated(rownames(dfIn)))) rownames(dfIn)=paste("Ana",1:nrow(dfIn),sep="")
if(is.null(Analyte)) Analyte=rownames(dfIn)
lanalytes=Analyte[Analyte%in%rownames(dfIn)]
# print(names(dfIn))
}else if(inherits(obj,"numeric") & is.null(dim(obj))){
cat("Matching from a vector\n")
if(is.null(names(obj))) names(obj)=paste("Ana",1:length(obj),sep="")
dfIn=data.frame(MZ=obj,stringsAsFactors=FALSE)
rownames(dfIn)=names(obj)
if(is.null(Analyte)) Analyte=rownames(dfIn)
lanalytes=Analyte[Analyte%in%rownames(dfIn)]
}else stop("Provide metaboSet, data.frame or a vector")
if(is.null(ipdb)){
data(IPDB)
ipdb=IPDB
# rm('IPDB')
}
if(is.null(annotdb)){
data(AnnotationDB)
annotdb=AnnotationDB
# rm('AnnotationDB')
}
if(length(lanalytes)==0) stop("No analyte found in the object\n")
######################
if(!mz2use%in%names(dfIn)) stop(paste("m/z variable",mz2use,"not found in the object\n"))
cmz=as.numeric(dfIn[lanalytes,mz2use])
names(cmz)=lanalytes
cmz=cmz[!is.na(cmz)]
if(length(cmz)==0) stop(paste("nothing to match in the m/z variable",mz2use,"\n"))
######################
if(!mass2use%in%names(annotdb)) stop(paste("Mass variable",mass2use,"not found in the annotation dataframe\n"))
lentries=which(!is.na(as.numeric(annotdb[,mass2use])))
if(length(lentries)<1) stop("nothing to match in the annotation dataframe\n")
annotdb=annotdb[lentries,,drop=F]
lmz=annotdb[,mass2use]
ladd=unique(which(ipdb$Id%in%lIP | ipdb$Name%in%lIP | ipdb$Set%in%lIP))
if(length(ladd)==0)
stop(paste("IPs invalid - must be in :\n",
" *positive mode: ",paste(IPDB$Name[IPDB$Charge>0],collapse=" "),"\n",
" *negative mode: ",paste(IPDB$Name[IPDB$Charge<0],collapse=" "),"\n",sep=""))
cat("Matching on: ",ipdb$Name[ladd],"\n",sep=" ")
mmz=matrix(sapply(ladd,function(i) (ipdb[i,]$xM*lmz+ipdb[i,]$adMass)/abs(ipdb[i,]$Charge)),nrow=length(lmz))
colnames(mmz)=ipdb$Name[ladd]
######################
if(length(cmz)<=chunk) lx=list(names(cmz)) else{
ngrps=max(1,round(length(cmz)/chunk))
lst=seq(1,length(cmz),length.out=ngrps+1)
lx=suppressWarnings(split(names(cmz),1:ngrps))
}
### parallel? lx/mmz/cmz/dppm
.infctMZM<-function(cx,mmz,cmz,dppm){
allm=list()
for(i in 1:ncol(mmz)){
mdppm=sweep(outer(cmz[cx],mmz[,i],"-"),1,cmz[cx],"/")*10^6
lmatch=which(abs(mdppm)<=dppm,arr=T)
if(length(lmatch)>0)
allm[[i]]=data.frame(Analyte=cx[lmatch[,1]],IP=i,DPPM=round(mdppm[lmatch],3),Entry=lmatch[,2],stringsAsFactors=F)
}
if(length(allm)>0) return(do.call("rbind",allm))
return(NULL)
}
d0=proc.time()[3]
cat("Started at ",date(),sep="")
if(ncl==1 | length(lx)==1){
cat(" on 1 processor\n",sep="")
allr=list()
for(k in 1:length(lx)){
cx=lx[[k]]
cat(ifelse(k%%10==0,"X","."))
allr[[k]]=.infctMZM(cx,mmz,cmz,dppm)
}
}
if(ncl>1 & length(lx)>1){
require("snowfall")
ncl=max(1,min(ncl,parallel:::detectCores()))
cat(" on ",ncl," processors\n",sep="")
sfInit(parallel=TRUE, cpus=ncl, type='SOCK',slaveOutfile='loggetTIC')
sfExport( ".infctMZM", local=TRUE )
allr=sfClusterApplyLB(lx,.infctMZM,mmz,cmz,dppm)
sfStop()
}
d1=proc.time()[3]
cat("\nCompleted at ",date()," -> took ",round(d1-d0,1)," secs \n",sep="")
allr=allr[which(!sapply(allr,is.null))]
cat("\n")
if(length(allr)==0) return(NULL)
matchres=do.call("rbind",allr)
############
matchres$IP=colnames(mmz)[matchres$IP]
rownames(matchres)=NULL
if(!is.null(annotinf2add)){
annotinf2add=unique(c(annotinf2add[annotinf2add%in%names(annotdb)],groupby[groupby%in%names(annotdb)]))
if(length(annotinf2add)>0){
cat("* adding from annotdb: ",annotinf2add,"\n",sep=" ")
for(i in annotinf2add) matchres[,i]=annotdb[matchres$Entry,i]
}
}
if(!is.null(objinf2add)){
objinf2add=unique(objinf2add[objinf2add%in%names(dfIn) & !objinf2add%in%names(matchres)])
# print(objinf2add)
if(length(objinf2add)>0){
cat("* adding from obj: ",objinf2add,"\n",sep=" ")
for(i in objinf2add) matchres[,i]=dfIn[matchres$Analyte,i]
}
}
matchres$Entry=lentries[matchres$Entry]
############
iDPPM=matchres$DPPM
if(!is.null(groupby)){
groupby=groupby[groupby%in%names(matchres)]
if(length(groupby)>0){
cat("* grouping by: ",groupby,"\n",sep=" ")
grpfac=apply(matchres[,c("Analyte","IP",groupby)],1,function(x) paste(as.character(x),collapse=".;."))
lnam=names(matchres)
iDPPM=round(tapply(iDPPM,grpfac,median),3)
matchres=lapply(names(matchres),function(x) tapply(matchres[,x],grpfac,function(y) paste(unique(y),collapse=collsep)))
matchres=data.frame(do.call("cbind",matchres),stringsAsFactors=FALSE)
names(matchres)=lnam
if(any(grepl(collsep,matchres$DPPM))) matchres$DPPMmed=iDPPM
}
}
############
matchres=matchres[order(factor(matchres$Analyte,levels=lanalytes),factor(matchres$IP,levels=colnames(mmz)),abs(iDPPM)),]
rownames(matchres)=NULL
cat(" -> ",length(unique(matchres$Analyte))," unique analytes\n",sep="")
cat(" -> ",length(unique(matchres$Entry))," unique entries\n",sep="")
summ=c(median(matchres$DPPM),mad(matchres$DPPM),mean(matchres$DPPM),sd(matchres$DPPM),range(matchres$DPPM))
cat(" -> ",sprintf("Med= %.2f(%.2f) Mean=%.2f(%.2f) [%.2f;%.2f]",summ[1],summ[2],summ[3],summ[4],summ[5],summ[6])," unique entries\n",sep="")
return(matchres)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.