### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
#' @name refinePIRoi
#' @title refine PI ROI
#'
#' Refine/integra ROI around spectra after association of spectra to ROIs from potential precursors
#'
#' @param obj metaboGoS obj
#' @param parDeco deconvolution parameter list
#' @param refFct refining function
#' @param doPlot plotting at each step
#' @param lmzExcl list of m/z excluded form the blank correction
#' @param blList list of blank filles xcmsRaw object
#' @param nSlaves number of slaves for parallel
#' @param useMS1file use the MS1 scan in MS1file
#' @param keepOld keep the former RoiInfos data frame
#' @import foreach
#' @import doParallel
#' @return metaboGoS obj with RoiInfos / PeakInfos /params
#' @export
refinePIRoi<-function(obj,
parDeco, #=list(span=7,bw=0.01,minNoiseMS1=10000),
refFct=metaboGoS:::.MGrefFct0,lmzExcl=c(),
doPlot=TRUE,blList=NULL, nSlaves=1,useMS1file=TRUE,keepOld=TRUE){
## blList=NULL;nSlaves=1;useMS1file=TRUE;doPlot=TRUE;keepOld=TRUE
if(!"ddaSet"%in%class(obj)) stop("Error: not a ddaSet object")
strt=as.integer(as.POSIXct( Sys.time() ))
######## ######## ########
oparDeco=obj$parDeco
if(!is.null(oparDeco)) for(i in names(oparDeco)[!names(oparDeco)%in%parDeco]) parDeco[[i]]=oparDeco[[i]]
# blList=list()
# if(!is.null(blFiles)){
# blFiles=blFiles[file.exists(blFiles)]
# if(length(blFiles)>0) for(i in 1:length(blFiles)) blList[[i]]=xcmsRaw(blFiles[i],includeMSn = FALSE)
# }
#
cefi=normalizePath(paste0(obj$File$dirName,"/",obj$File$fileName))
## check MS1
if(useMS1file & !is.null(obj$File$MS1file)){
ncefi<-suppressWarnings(normalizePath(paste0(obj$File$dirName,"/",obj$File$MS1file)))
if(!file.exists(ncefi) & file.exists(obj$File$MS1file)) ncefi<-suppressWarnings(normalizePath(obj$File$MS1file))
if(file.exists(ncefi)) cefi=ncefi
}
xr=xcmsRaw(cefi,includeMSn = FALSE)
sc2rt=round(xr@scantime/60,5)
cat("Refining/grouping potential PIs from '",unclass(xr@filepath),"'\n",sep="")
psdrt=ifelse(is.null(parDeco$psdrt),NA,parDeco$psdrt)
if(is.na(psdrt)) psdrt=parDeco$psdrt=quantile(sc2rt/60,.1)
### set new pseudo Scan
newrtx=(1:ceiling(max(xr@scantime/60/psdrt)))*psdrt
sc2nrt=apply(abs(outer(newrtx,xr@scantime/60,"-")),2,which.min)
sc2nrt=cbind(scan=1:length(xr@scantime),nscan=sc2nrt,nrt=round(newrtx[sc2nrt],5))
###
# if(!is.null(blList)) blList=lapply(blList,function(xrb){
# sc2nrt2=apply(abs(outer(newrtx,xrb@scantime/60,"-")),2,which.min)
# sc2nrt2=cbind(scan=1:length(xrb@scantime),nscan=sc2nrt2,nrt=round(newrtx[sc2nrt2],5))
# list(xrb,sc2nrt2)
# })
minRTwin=ifelse(is.null(parDeco$minRTwin),NA,parDeco$minRTwin)
if(is.na(minRTwin)) parDeco$minRTwin=minRTwin=quantile(diff(xr@scantime/60),.9)+2.1*psdrt ## set it to near largest delta rt + 2*psdrt
linpardeco=c("ppm","dmz","psdrt","drt","rtlim","minHeightMS1","minNoiseMS1", "minZero","minRTwin","span","bw" )
lmiss=linpardeco[!linpardeco%in%names(parDeco)]
if(length(lmiss)) stop(paste0('Missing parameters: ',paste(lmiss,sep=" ")))
ROImat=obj$RoiInfos
if(keepOld) obj$OldRoiInfos=obj$RoiInfos
if(is.null(ROImat)) stop('Matrix defing ROI is missing!')
######## ######## ########
# Excluded masses from blank calculation
# ROImat=cbind(ROImat,"dobl"=1)
# if(length(lmzExcl)>0){
# l2rm=which(outer(ROImat[,"mzmin"],lmzExcl,"-")<0 & outer(ROImat[,"mzmax"],lmzExcl,"-")>0,arr=T)[,1]
# if(length(l2rm)) ROImat[l2rm,"dobl"]=0
# }
#
#
###################### Refine frames
llre=list()
ll=ROImat$RoiId#[111:120]
lperc=ll[round(seq(1,length(ll),length=12)[2:11])]
if(nSlaves>1) nSlaves=max(1, min(nSlaves,detectCores()-1))
if(nSlaves>1){
clProc<-makeCluster(nSlaves)
doParallel::registerDoParallel(clProc)
cat(" -- registering ",nSlaves," clusters\n",sep="")
}
### Parallele bit
if(nSlaves>1)
#llre=foreach(i = ll, .export = fct2exp,.packages = c("igraph","xcms","GRMeta"), .verbose =F) %dopar%{
llre=foreach(idx = ll,.packages = c("metaboGoS"), .verbose =FALSE) %dopar%{
# compbl=any(ROImat[idx,c("dobl")]>0)
lmz=range(ROImat[idx,c("mzmin","mzmax")])
lrt=range(ROImat[idx,c("rtmin","rtmax")])+c(-3,3)*parDeco$psdrt*(parDeco$span+1) ## add 3*span to make use not in the downslope
eic=GRMeta:::.GRrawMat(xr,mzrange = lmz, rtrange = lrt*60,padsc =TRUE)
if(sum(!is.na(eic[,"y"]))<2) return(list())
if(max(eic[,"y"],na.rm=T)<parDeco$minHeightMS1) return(list())
eicbl=NULL
if(length(blList)>0){
eicbl=do.call("rbind",lapply(1:length(blList),function(ibl){
i=cbind(GRMeta:::.GRrawMat(blList[[ibl]],mzrange = lmz, rtrange = lrt*60+c(-10,10),padsc =T),blid=ibl)
if(sum(!is.na(i[,"mz"]))<5) return(NULL)
i
}))
}
eic=cbind(eic,sc2nrt[eic[,"scan"],2:3])
re=refFct(eic,eicbl,parDeco,idx,doPlot = FALSE,lExcl=lmzExcl) ##
if(is.null(re)) return(list())
re
}
## Serial bit
if(nSlaves<=1) for(idx in ll){
if(idx %in% lperc) cat(idx,"(",which(ll==idx),") ",sep="")
# compbl=any(ROImat[idx,c("dobl")]>0)
lmz=range(ROImat[idx,c("mzmin","mzmax")])
lrt=range(ROImat[idx,c("rtmin","rtmax")])+c(-3,3)*parDeco$psdrt*(parDeco$span+1) ## add 3*span to make use not in the downslope
eic=GRMeta:::.GRrawMat(xr,mzrange = lmz, rtrange = lrt*60,padsc =T)
if(sum(!is.na(eic[,"y"]))<2) next
if(max(eic[,"y"],na.rm=T)<parDeco$minHeightMS1) next
eicbl=NULL
if(length(blList)>0 ){
eicbl=do.call("rbind",lapply(1:length(blList),function(ibl){
i=cbind(GRMeta:::.GRrawMat(blList[[ibl]],mzrange = lmz, rtrange = lrt*60+c(-10,10),padsc =T),blid=ibl)
if(sum(!is.na(i[,"mz"]))<5) return(NULL)
i
}))
}
eic=cbind(eic,sc2nrt[eic[,"scan"],2:3])
re=refFct(eic,eicbl,parDeco,idx,doPlot = doPlot,lExcl=lmzExcl) ## change here + update the
if(is.null(re)) next
llre=c(llre,list(re))
}
if(nSlaves>1) stopCluster(clProc)
## combine results
llre=llre[sapply(llre,length)>0]
if(length(llre)==0){
cat("Something went wrong no acceptable ROI were found!!!")
return(obj)
}
ares=do.call("rbind",llre)
ares=ares[which(ares$pk.rt>=parDeco$rtlim[2] & ares$pk.rt<=parDeco$rtlim[3]),]
## Peak matrix
lvar=unique(c("pk.id","cl.id","roi.id",names(ares)[c(grep("pk\\.",names(ares)))]))
PKmat=ares[,lvar]
names(PKmat)[1:3]=c("PkId",'ClId','RoiId')
names(PKmat)=gsub("^pk.","",names(PKmat))
ldups=names(which(table(PKmat$PkId)>1))
for(i in ldups) PKmat$PkId[PKmat$PkId==i]=paste0(PKmat$PkId[PKmat$PkId==i],"-",1:sum(PKmat$PkId==i))
## ROimatrix
lvar=unique(c("roi.id",names(ares)[grep("^roi",names(ares))]))
ROImat2=ares[,lvar]
names(ROImat2)[1]=c('RoiId')
names(ROImat2)=gsub("^roi.","",names(ROImat2))
add2roi=do.call("rbind",tapply(1:nrow(PKmat),PKmat$RoiId,
function(x) round(c(max(PKmat$int.ap[x]),max(PKmat$sbr[x]),round(range(PKmat[x,c("mzmin",'mzmax')]),5),
rt=PKmat$rtap[x[which.max(PKmat$int.ap[x])]],range(PKmat[x,c("rtmin",'rtmax')])),4)))
colnames(add2roi)=c('intensity',"sbr","mzmin","mzmax","rt","rtmin","rtmax")
ROImat2=cbind(ROImat2[match(rownames(add2roi),ROImat2$RoiId),],add2roi)
cat(" -- number of Peaks/ROIs after refinement: ",nrow(PKmat),"/",nrow(ROImat2)," [+",as.integer(as.POSIXct( Sys.time() ))-strt,"sec.]\n",sep="")
l2k=which(ROImat2[,"intensity"]>=parDeco$minHeightMS1 & abs(ROImat2[,"rtmax"]-ROImat2[,"rtmin"])>=parDeco$minRTwin &
ROImat2[,"rtmin"]<=max(parDeco$rtlim) & ROImat2[,"rtmax"]>=min(parDeco$rtlim) & ROImat2$coda>0.2 & ROImat2$sbr>=2)
ROImat2=ROImat2[l2k,,drop=F]
ROImat2=ROImat2[order(ROImat2[,'mz50'],ROImat2[,"rtmin"]),,drop=F]
rownames(ROImat2)=ROImat2$RoiId #sprintf("R%.4f@%.1f-%.1f",ROImat2[,"mz50"],ROImat2[,"rtmin"],ROImat2[,"rtmax"])
PKmat=PKmat[which(PKmat$RoiId%in%ROImat2$RoiId),]
rownames(PKmat)=PKmat$PkId
###########################
### Associate MS/MS
parDeco$maxRTwin=rtwin=ifelse(is.null(parDeco$maxRTwin),parDeco$minRTwin*2,parDeco$maxRTwin)
####### ROI
ms2p=obj$MS2toPrec
llsplit=split(1:nrow(ms2p), ceiling(seq_along(1:nrow(ms2p))/200))
Roi2sp=do.call("rbind",lapply(llsplit,function(x){
ddmz=outer(ms2p$mz[x],ROImat2[,"mzmin"],"-")>=(-parDeco$dmz/2) & outer(ms2p$mz[x],ROImat2[,"mzmax"],"-")<=(parDeco$dmz/2) ## slight padding zeros instaeds?
ddrt=outer(ms2p$rt[x],ROImat2[,"rtmin"],"-")>=(-rtwin) & outer(ms2p$rt[x],ROImat2[,"rtmax"],"-")<=rtwin
re=which(ddrt & ddmz,arr=T)
re[,1]=x[re[,1]]
re}))
# col: ROImat2, row=oMS2ROI
nMS2ROI=ms2p[Roi2sp[,"row"],]
nMS2ROI$RoiId=ROImat2$RoiId[Roi2sp[,"col"]]
############### Peak cluster
clmat=do.call("rbind",tapply(1:nrow(PKmat),PKmat$ClId,
function(x) data.frame(rtmin=min(PKmat[x,c("rtmin")]),rtmax=max(PKmat[x,c("rtmax")]),
mzmin=min(PKmat[x,c("mzmin")]),mzmax=max(PKmat[x,c("mzmax")]),check.rows = F,check.names = F)))
Cl2sp=do.call("rbind",lapply(llsplit,function(x){
ddmz=outer(ms2p$mz[x],clmat[,"mzmin"],"-")>=(-parDeco$dmz/2) & outer(ms2p$mz[x],clmat[,"mzmax"],"-")<=(parDeco$dmz/2) ## slight padding zeros instaeds?
ddrt=outer(ms2p$rt[x],clmat[,"rtmin"],"-")>=(-rtwin) & outer(ms2p$rt[x],clmat[,"rtmax"],"-")<=rtwin
re=which(ddrt & ddmz,arr=T)
re[,1]=x[re[,1]]
re}))
nMS2Cl=ms2p[Cl2sp[,"row"],]
nMS2Cl$ClId=rownames(clmat)[Cl2sp[,"col"]]
############### Peak
Pk2sp=do.call("rbind",lapply(llsplit,function(x){
ddmz=outer(ms2p$mz[x],PKmat[,"mzmin"],"-")>=(-parDeco$dmz/2) & outer(ms2p$mz[x],PKmat[,"mzmax"],"-")<=(parDeco$dmz/2) ## slight padding zeros instaeds?
ddrt=outer(ms2p$rt[x],PKmat[,"rtmin"],"-")>=(-rtwin) & outer(ms2p$rt[x],PKmat[,"rtmax"],"-")<=rtwin
re=which(ddrt & ddmz,arr=T)
re[,1]=x[re[,1]]
re}))
nMS2Pk=ms2p[Pk2sp[,"row"],]
nMS2Pk$PkId=PKmat$PkId[Pk2sp[,"col"]]
##########
rownames(nMS2Pk)=rownames(nMS2Cl)=rownames(nMS2ROI)=NULL
cat(" --> Final number of ROIs: ",nrow(ROImat2)," assoc. to ",length(unique(nMS2ROI$SpId)),"/",nrow(obj$MS2Infos)," MS/MS\n",
" --> Final number of peak clusters: ",length(unique(PKmat$ClId))," assoc. to ",length(unique(nMS2Cl$SpId)),"/",nrow(obj$MS2Infos)," MS/MS\n",
" --> Final number of peaks: ",nrow(PKmat)," assoc. to ",length(unique(nMS2Pk$SpId)),"/",nrow(obj$MS2Infos)," MS/MS",
" [+",as.integer(as.POSIXct( Sys.time() ))-strt,"sec.]\n",sep="")
obj$RoiInfos=ROImat2
obj$PeakInfos=PKmat
obj$Sp2ROI=nMS2ROI
obj$Sp2Cl=nMS2Cl
obj$Sp2Pk=nMS2Pk
obj$parDeco=parDeco
class(obj) = unique(append(class(obj), "ddaSet"))
invisible(obj)
}
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
#' @name extractPIRoi
#' @title Extract ROI around spectra
#'
#' Fast association of spectra to ROIs from potential precursors
#'
#' @param obj metaboGoS obj
#' @param parDeco deconvolution parameter list
#' @param minPIwindow window size in min. around the spectra
#' @param PIweight weighting
#' @param useMS1file use the MS1 scan in MS1file
###' @param nSlaves number of slaves for parallel
#' @import foreach
#' @import doParallel
#' @return metaboGoS obj with RoiInfos / MS2ROI /params
#' @export
extractPIRoi<-function(obj,
parDeco=list(minPurity=c(0.2,0.4),
ppm=5,dmz=0.001,psdrt=NA,
drt=1,rtlim=c(-Inf,-Inf,Inf,Inf),
minHeightMS1=100000,minZero=200),
minPIwindow=NA,PIweight=2,useMS1file=TRUE){
## minPIwindow=0.05;PIweight=2;nSlaves=10
if(!"ddaSet"%in%class(obj)) stop("Error: not a ddaSet object")
strt=as.integer(as.POSIXct( Sys.time() ))
ms2inf=obj$MS2Infos
cefi=normalizePath(paste0(obj$File$dirName,"/",obj$File$fileName))
## check MS1
if(useMS1file & !is.null(obj$File$MS1file)){
ncefi<-suppressWarnings(normalizePath(paste0(obj$File$dirName,"/",obj$File$MS1file)))
if(!file.exists(ncefi) & file.exists(obj$File$MS1file)) ncefi<-suppressWarnings(normalizePath(obj$File$MS1file))
if(file.exists(ncefi)) cefi=ncefi
}
xr=xcmsRaw(cefi,includeMSn = FALSE)
cat("Extract potential PIs from '",unclass(xr@filepath),"' comprising ",nrow(ms2inf)," MS/MS and ",length(xr@scantime)," MS1\n",sep="")
sc2rt=round(xr@scantime/60,5)
psdrt=ifelse(is.null(parDeco$psdrt),NA,parDeco$psdrt)
if(is.na(psdrt)) psdrt=parDeco$psdrt=quantile(sc2rt/60,.1)
if(is.na(minPIwindow)) minPIwindow=quantile(diff(xr@scantime/60),.9)+2.1*psdrt ## set it to near largest delta rt + 2*psdrt
parDeco$minRTwin=minRTwin=minPIwindow
if(any(is.na(ms2inf$WinSize))) stop('Please set the delta m/z around precursor ions')
rtlim=parDeco$rtlim
rtlim[which(rtlim>max(sc2rt))]=max(sc2rt)
rtlim[which(rtlim<min(sc2rt))]=min(sc2rt)
parDeco$rtlim=rtlim
##### Check
linpardeco=c("minPurity" , "ppm","dmz","psdrt","drt","rtlim","minHeightMS1", "minZero","minRTwin" )
lmiss=linpardeco[!linpardeco%in%names(parDeco)]
if(length(lmiss)) stop('Missing parameters:',paste(lmiss,sep=" "))
### Get potential precursor within minRTwin of the spectra
amzrt=list()
for(ipmz in 1:nrow(ms2inf)){
linrange=which(abs(sc2rt-ms2inf[ipmz,'RT'])<=minRTwin)
if(length(linrange)==0) next
scrange=range(linrange)
imz=ms2inf[ipmz,]$PrecMZ
mzrange=imz+c(-1,1)*(ms2inf[ipmz,]$WinSize+parDeco$dmz)
re=GRMeta:::.GRrawMat(xr,scanrange=scrange,mzrange=mzrange)
if(nrow(re)==0) next #return(NULL)# return(c(PInt=NA,MZ=NA,SBR=NA,CodaWide=NA))
we=.MGgetMZweight(re[,"mz"]-imz,PIweight)
re[,"y"]=re[,"y"]*we
pint=re[,"y"]/tapply(re[,"y"],re[,"scan"],sum)[as.character(re[,"scan"])]
if(max(pint)<parDeco$minPurity[1]) next #return(NULL)# return(c(PInt=NA,MZ=NA,SBR=NA,CodaWide=NA))
re=cbind(i=ipmz,pint=pint,re[,c("mz","rt","y"),drop=F])
amzrt[[ipmz]]=re[pint>=parDeco$minPurity[1],,drop=F]
}
amzrt=do.call("rbind",amzrt)
cat(" -- ",nrow(amzrt)," ions found: ",length(unique(amzrt[,1]))," spectra out of ",nrow(ms2inf),"\n Purity summary:\n",sep="")
print(summary(amzrt[,2]))
### keep the actual mz/pint for each spectra for later
suppressWarnings(sp2prec<-data.frame(do.call("rbind",tapply(1:nrow(amzrt),amzrt[,1],function(x){
itmp=amzrt[x,,drop=F]
drt=itmp[,"rt"]-obj$MS2Infos[itmp[1,1],]$RT
l=c()
if(any(drt<0)) l=which(drt==max(drt[drt<0]))
if(any(drt>0)) l=c(l,which(drt==min(drt[drt>0])))
itmp[l,,drop=F]
}))))
names(sp2prec)[1]="SpId"
sp2prec$rt=round(sp2prec$rt,4)
sp2prec$SpId=obj$MS2Infos$SpId[sp2prec$SpId]
### merge them
llppm<-GRMeta:::.GRsplistMZ(amzrt[,"mz"],dppm = parDeco$ppm,dmz=parDeco$dmz,typ="min") ## min distance b/w dppm/dmz
ROImat=do.call("rbind",lapply(llppm,function(x) c(median(amzrt[x,"mz"]),range(amzrt[x,"mz"]),round(range(amzrt[x,"rt"]),4))))
colnames(ROImat)=c("mz","mzmin","mzmax","rtmin","rtmax")
ROImat=ROImat[order(ROImat[,1]),,drop=F]
cat(" -- reduced to ",nrow(ROImat)," ROIs [+",as.integer(as.POSIXct( Sys.time() ))-strt,"sec.]\n",sep="")
### merge wide m/z/RT windows
ROImat1=lapply(1:nrow(ROImat),function(ix){
xroi=ROImat[ix,]
lmz=range(c(xroi[c( "mz","mz","mzmin", "mzmax")]*(1+1*c(-2,2,-1,1)*parDeco$ppm*10^-6),xroi[c("mzmin", "mzmax")]+c(-1,1)*parDeco$dmz))+c(-1.1,1.1)*parDeco$dmz ## very large window!!
lrt=range(xroi[c( "rtmin","rtmax")])+c(-1,1)*parDeco$drt
ieic=xcms::rawEIC(xr, mzrange =lmz, rtrange = lrt*60)
l=which(ieic$intensity>=parDeco$minZero)
ll=GRMeta:::.GRsplist(sc2rt[l],l,d=parDeco$drt*2+2.1*psdrt)
do.call("rbind",lapply(ll,function(x){
newx=sc2rt[range(ieic$scan[x])]+c(-1,1)*(parDeco$drt)
#x2=x[ieic$scan[x]>=rangeSc[2] & ieic$scan[x]<=rangeSc[3]]
#if(length(x2)<1) return(c(ieicoi[1],lmz[1],lmz[2],newx,0,0,0))
aprt=ieic$scan[x[which.max(ieic$intensity[x])]]
return(c(xroi[1],lmz[1],lmz[2],newx,diff(newx),max(ieic$intensity[x]),sc2rt[aprt]))
}))
})
ROImat1=do.call("rbind",ROImat1)
colnames(ROImat1)=c(colnames(ROImat),"drt","intensity","rt")
l2k=which(ROImat1[,"intensity"]>=parDeco$minHeightMS1 & ROImat1[,"drt"]>=minRTwin & ROImat1[,"rtmin"]<=rtlim[4] & ROImat1[,"rtmax"] >= rtlim[1])
ROImat1=ROImat1[l2k,,drop=FALSE]
l=which(ROImat1[,"rtmin"]<=rtlim[1])
if(length(l)) ROImat1[l,"rtmax"]=rtlim[1]
l=which(ROImat1[,"rtmax"]<=rtlim[4])
if(length(l)) ROImat1[l,"rtmax"]=rtlim[4]
### Combine dupl -> remerge retention time -> could be improved!?!
llover=GRMeta:::.GRisover(ROImat1[,"mzmin"],ROImat1[,"mzmax"],retOne = T)
ROImat2=do.call("rbind",lapply(llover,function(x){
if(length(x)==0) return(ROImat1[x,,drop=F])
c(mean(ROImat1[x,"mz"]),min(ROImat1[x,"mzmin"]),max(ROImat1[x,"mzmax"]),min(ROImat1[x,"rtmin"]),max(ROImat1[x,"rtmax"]),0,
max(ROImat1[x,"intensity"]),ROImat1[x[which.max(ROImat1[x,"intensity"])],"rt"])
}))
colnames(ROImat2)=colnames(ROImat1)
ROImat2[,"drt"]=ROImat2[,"rtmax"]-ROImat2[,"rtmin"]
l2k=which(ROImat2[,"intensity"]>=parDeco$minHeightMS1 & ROImat2[,"drt"]>=minRTwin & ROImat2[,"rtmin"]<=rtlim[4] & ROImat2[,"rtmax"]>=rtlim[1])
ROImat2=ROImat2[l2k,]
cat(" -- found: ",nrow(ROImat2)," potential ROIs [+",as.integer(as.POSIXct( Sys.time() ))-strt,"sec.]\n",sep="")
### Associate MS/MS to ROI
llsplit=split(1:nrow(amzrt), ceiling(seq_along(1:nrow(amzrt))/200))
Roi2sp=do.call("rbind",lapply(llsplit,function(x){
ddmz=outer(amzrt[x,"mz"],ROImat2[,"mzmin"],"-")>=(-parDeco$dmz/2) & outer(amzrt[x,"mz"],ROImat2[,"mzmax"],"-")<=parDeco$dmz/2
ddrt=outer(amzrt[x,"rt"],ROImat2[,"rtmin"],"-")>=0 & outer(amzrt[x,"rt"],ROImat2[,"rtmax"],"-")<=0
re=which(ddrt & ddmz,arr=T)
re[,1]=x[re[,1]]
re}))
Roi2sp=cbind(amzrt[Roi2sp[,1],],Roi2sp)
Roi2sp=cbind(Roi2sp,rtms2=obj$MS2Infos[Roi2sp[,"i"],]$RT)
lso=order(Roi2sp[,"i"],Roi2sp[,"col"],abs(Roi2sp[,"rt"]-Roi2sp[,"rtms2"]))
Roi2sp=Roi2sp[lso,]
## keep the maximum Pint around the original RT
llsp=unlist(tapply(1:nrow(Roi2sp),Roi2sp[,"i"],function(x) tapply(x,Roi2sp[x,"col"],function(y) y[1:min(3,length(y))])),recursive = F)
llsp=sapply(llsp,function(x) x[which.max(Roi2sp[x,"pint"])])
# llsp=unlist(tapply(1:nrow(Roi2sp),Roi2sp[,1],function(x) tapply(x,Roi2sp[x,6],function(y) y)),recursive = F)
Roi2sp=Roi2sp[llsp,]
Roi2sp=Roi2sp[Roi2sp[,"col"]%in%names(which(tapply(Roi2sp[,"pint"],Roi2sp[,"col"],max)>=parDeco$minPurity[2])),]
lurois=sort(unique(Roi2sp[,"col"]))
lnurois=1:length(lurois)
Roi2sp[,"col"]=lnurois[match(Roi2sp[,"col"],lurois)]
ROImat3=data.frame(RoiId=NA,ROImat2[lurois,,drop=FALSE])
ROImat3$RoiId=rownames(ROImat3)=sprintf("R%.4f@%.1f-%.1f",ROImat3[,"mz"],ROImat3[,"rtmin"],ROImat3[,"rtmax"])
SpId2ROI=data.frame(SpId=ms2inf$SpId[Roi2sp[,"i"]],
RoiId=ROImat3$RoiId[Roi2sp[,"col"]],
PInt=round(Roi2sp[,"pint"],4),
yPrec=round(Roi2sp[,"y"],6),
mzPrec=round(Roi2sp[,"mz"],6),
rtPrec=round(Roi2sp[,"rt"],6))
cat(" --> Final number of ROIs: ",nrow(ROImat3)," assoc. to ",length(unique(SpId2ROI$SpId)),"/",nrow(ms2inf),
" MS/MS [+",as.integer(as.POSIXct( Sys.time() ))-strt,"sec.]\n",sep="")
obj$RoiInfos=ROImat3
obj$MS2toMS1=SpId2ROI
obj$MS2toPrec=sp2prec
obj$parDeco=parDeco
class(obj) = unique(append(class(obj), "ddaSet"))
invisible(obj)
}
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
## Refine ROIs
#'
#' Refining ROIs
#'
#' @keywords internal
#'
#' @export
.MGrefineROIs<-function(eic,parDeco,minRTwin,drt){
ll2use=GRMeta:::.GRsplitBoth2(eic[,"mz"],eic[,"rt"],dppm=parDeco$ppm*2,dmz=parDeco$dmz*2,drt=parDeco$drt+7*drt) ## allow +/- 3 pseudo scan around window
## only keep segments
todo=which(sapply(ll2use,function(x){
l2chk=x[which(eic[x,"rt"]>=parDeco$rtlim[2] & eic[x,"rt"]<=parDeco$rtlim[3])]
diffrt=ifelse(length(l2chk)>1,diff(range(eic[l2chk,"rt"])),0)
any(eic[l2chk,"y"]>parDeco$minHeightMS1) & diffrt>=minRTwin}))
if(length(todo)==0) return(NULL)
ll2use=ll2use[todo]
winmat=NULL
## loop over segments
if(length(ll2use)) for(idx in 1:length(ll2use)){
xeic=eic[ll2use[[idx]],]
rmz=range(xeic[,"mz"])
mindmz=min(c(mean(rmz)*parDeco$ppm*10^-6,parDeco$dmz))/2
re=.MGgetConsecutive(xeic,ncons=2,ppm=parDeco$ppm,dmz=parDeco$dmz,mindmz,minRTwin=minRTwin,minNoise=parDeco$minNoiseMS1,minHeight=0)
if(is.null(re[[1]])) next
l=which(re$st[,6]>=parDeco$minHeightMS1)
if(length(l)==0) next
winmat=rbind(winmat,.MGgetWindow(re$ll[l],xeic,ppm=parDeco$ppm,dmz=parDeco$dmz,minRTwin=minRTwin,maxRTwin=parDeco$drt/2,maxIter = 6))
colnames(winmat)=c( "rtmin","rtmax","mz50","mz10","mz90","intensity" ,"mzap","mzmin","mzmax" )
}
return(winmat)
}
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
### looking for consecutive scans
#'
#' Getting consecutive m/z within ppm/dmz ROIs
#'
#' @keywords internal
#'
#' @export
.MGgetConsecutive<-function(xeic,ncons=3,ppm=5,dmz=0.001,mindmz=dmz,minRTwin=0.01,minNoise=10000,minHeight=minNoise*2){
# re=.MGgetConsecutive(xeic,ncons=2,ppm=parDeco$ppm,dmz=parDeco$dmz,mindmz,minRTwin=minRTwin,minNoise=parDeco$minNoiseMS1,minHeight=0)
# ncons=2;ppm=parDeco$ppm;dmz=parDeco$dmz;mindmz;minRTwin=minRTwin;minNoise=parDeco$minNoiseMS1;minHeight=0
llx<-.MGinquickSplit(xeic,ncons = ncons,dppm = ppm,dmz=dmz,minNoise = minNoise)
if(length(llx[[1]])==0) return(list(NULL,NULL))
ldrmz=sapply(llx[[1]],function(x) diff(range(xeic[x,"mz"])))
l=which(ldrmz<=max(quantile(ldrmz,.75),mindmz))
# cols=(1:nrow(xeic)%in%unique(unlist(llx[[1]])))*1+(1:nrow(xeic)%in%unique(unlist(llx[[1]][l])))*1+1
# if(doPlot) plot(xeic,cex=.5,pch=16,col=cols,main=imain)
mllx=lapply(GRMeta:::.GRmergellx(llx[[1]][l]),unique)
llx2=llx[[1]][-l]
add2close=do.call("cbind",lapply(llx2,function(y) sapply(mllx,function(x) any(y%in%x))))
toloop=ifelse(length(llx2)>0,any(colSums(add2close)==1),FALSE)
while(toloop){
l= which(colSums(add2close)==1)
for(i in l) mllx[[which(add2close[,i])]]=c(mllx[[which(add2close[,i])]],llx2[[i]])
llx2=llx2[-l]
if(length(llx2)==0) break
add2close=do.call("cbind",lapply(llx2,function(y) sapply(mllx,function(x) any(y%in%x))))
toloop=ifelse(length(llx2)>0,any(colSums(add2close)==1),FALSE)
}
mllx=lapply(mllx,function(x) sort(unique(x)))
# if(doPlot) for(i in 1:length(mllx)) lines(xeic[mllx[[i]],1:2])
if(length(llx2)>0){
llx2=GRMeta:::.GRmergellx(llx2)
add2close=do.call("cbind",lapply(llx2,function(y) sapply(mllx,function(x) any(y%in%x))))
for(ik in 1:2){
toloop=ifelse(length(llx2)>0,any(colSums(add2close)==ik),FALSE)
while(toloop){
l= which(colSums(add2close)==ik)
for(i in l) for(k in which(add2close[,i])) mllx[[k]]=c(mllx[[k]],llx2[[i]])
llx2=llx2[-l]
mllx=GRMeta:::.GRmergellx(mllx)
if(length(llx2)==0) break
add2close=do.call("cbind",lapply(llx2,function(y) sapply(mllx,function(x) any(y%in%x))))
toloop=ifelse(length(llx2)>0,any(colSums(add2close)==ik),FALSE)
}
}
}
if(length(llx2)>0) llx2=llx2[sapply(llx2,function(x) max(xeic[x,3]))>minHeight]
#if(doPlot) if(length(llx2)>0) for(i in 1:length(llx2)) lines(xeic[llx2[[i]],1:2],lwd=2,col=2)
if(length(llx2)>0) mllx=c(mllx,llx2)
stllx=do.call("rbind",lapply(mllx,function(x) c(range(xeic[x,"rt"]),quantile(xeic[x,"mz"],c(.5,.1,.9)),max(xeic[x,"y"]),
xeic[x[which.max(xeic[x,"y"])],"mz"],range(xeic[x,"mz"]))))
lso=order(stllx[,3])
stllx=stllx[lso,,drop=F]
mllx=mllx[lso]
for(ik in 1:3){
winsize=(ik+.1)*minRTwin
if(nrow(stllx)>1) hasovermz<-GRMeta:::.GRisover2(stllx[,4],stllx[,5],stllx[,1],stllx[,2],retOne = T,thr1=mindmz,thr2=winsize) else hasovermz=list(1)
while(any(sapply(hasovermz,length)>1)){
mllx<-lapply(hasovermz,function(x) sort(unique(unlist(mllx[x]))))
stllx=do.call("rbind",lapply(mllx,function(x) c(range(xeic[x,"rt"]),quantile(xeic[x,"mz"],c(.5,.1,.9)),max(xeic[x,"y"]),
xeic[x[which.max(xeic[x,"y"])],"mz"],range(xeic[x,"mz"]))))
if(nrow(stllx)>1) hasovermz<-GRMeta:::.GRisover2(stllx[,4],stllx[,5],stllx[,1],stllx[,2],retOne = T,thr1=mindmz,thr2=winsize) else hasovermz=list(1)
}
}
#### fill in gaps
for(i in 1:nrow(stllx)){
x=stllx[i,]
l=which(xeic[,"rt"]>=x[1] & xeic[,"rt"]<=x[2] &
((xeic[,"mz"]>=x[4] & xeic[,"mz"]<=x[5]) | abs(1-xeic[,"mz"]/x["mz"])<ppm*10^-6 | abs(1-xeic[,"mz"]/x[3])<ppm*10^-6))
mllx[[i]]=sort(unique(c(mllx[[i]],l)))
}
stllx=do.call("rbind",lapply(mllx,function(x) c(range(xeic[x,"rt"]),quantile(xeic[x,"mz"],c(.5,.1,.9)),max(xeic[x,"y"]),
xeic[x[which.max(xeic[x,"y"])],"mz"],range(xeic[x,"mz"]))))
lso=order(stllx[,3])
stllx=stllx[lso,,drop=F]
mllx=mllx[lso]
return(list(ll=mllx,st=stllx))
}
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
#### expand/reduces windows around initial guesses
#'
#' Merge consecutive set of scans
#'
#' @param mllx initial list of cons scans
#' @param xeic scan/mz/rt/y matrix
#' @param ppm tolerance in ppm
#' @param dmz tolerance in Da
#' @param minRTwin window stuff
#' @param maxRTwin window stuff
#' @param maxIter max number of iteration, usually done within 1-3
#'
#' @keywords internal
#' @export
.MGgetWindow<-function(mllx,xeic,ppm,dmz,minRTwin,maxRTwin,maxIter=6){
## Comp stats
stllx=stllx0=do.call("rbind",lapply(mllx,function(x) c(range(xeic[x,"rt"]),quantile(xeic[x,"mz"],c(.5,.1,.9)),max(xeic[x,"y"]),
xeic[x[which.max(xeic[x,"y"])],"mz"],range(xeic[x,"mz"]))))
lmiss0=lmiss=1:nrow(xeic)
lmiss=lmiss[!lmiss%in%unlist(mllx)]
if(length(lmiss)==0 & nrow(stllx)==1) return(stllx)
iround=1
winover=maxdrt=minRTwin*3.1
rmz=range(xeic[,"mz"])
mindmz=min(c(mean(rmz)*ppm*10^-6,dmz))/2
doLoop=TRUE
while(doLoop){
lmiss=lmiss[!lmiss%in%unlist(mllx)]
mdppm=abs(10^6*(1-outer(xeic[lmiss,"mz"],stllx[,7],"/")))
mdmz=abs(outer(xeic[lmiss,"mz"],stllx[,7],"-"))
mdppm50=abs(10^6*(1-outer(xeic[lmiss,"mz"],stllx[,3],"/")))
mdmz50=abs(outer(xeic[lmiss,"mz"],stllx[,3],"-"))
mdmzwi=(mdppm<=ppm | mdmz<=dmz | mdppm50<=ppm | mdmz50<=dmz)
mdmzin=outer(xeic[lmiss,"mz"],stllx[,4],"-")>=0 & outer(xeic[lmiss,"mz"],stllx[,5],"-")<=0 ## in the 10/90
hasmz=(mdmzwi | mdmzin)
mdrtle=outer(xeic[lmiss,"rt"],stllx[,1],"-") ## pos inside
mdrtri=outer(xeic[lmiss,"rt"],stllx[,2],"-") ## neg inside
## check if anything to add
toadd=which(hasmz & mdrtle>=(-maxdrt) & mdrtri<=maxdrt,arr=T)
if(length(toadd)) if(nrow(toadd)>0){
toadd[,1]=lmiss[toadd[,1]]
toadd=toadd[!duplicated(toadd[,1]),,drop=F]
if(nrow(toadd)>0){
for(k in unique(toadd[,2])) mllx[[k]]=c(mllx[[k]],toadd[toadd[,2]==k,1])
stllx=do.call("rbind",lapply(mllx,function(x) c(range(xeic[x,"rt"]),quantile(xeic[x,"mz"],c(.5,.1,.9)),max(xeic[x,"y"]),
xeic[x[which.max(xeic[x,"y"])],"mz"],range(xeic[x,"mz"]))))
}}
## merge windows -> greater window for each round
if(nrow(stllx)>1)
hasovermz<-GRMeta:::.GRisover2(stllx[,4],stllx[,5],stllx[,1],stllx[,2],retOne = T,thr1=mindmz,thr2=winover) else hasovermz=list(1)
while(any(sapply(hasovermz,length)>1)){
mllx<-lapply(hasovermz,function(x) sort(unique(unlist(mllx[x]))))
stllx=do.call("rbind",lapply(mllx,function(x) c(range(xeic[x,"rt"]),quantile(xeic[x,"mz"],c(.5,.1,.9)),max(xeic[x,"y"]),
xeic[x[which.max(xeic[x,"y"])],"mz"],range(xeic[x,"mz"]))))
if(nrow(stllx)>1) hasovermz<-GRMeta:::.GRisover2(stllx[,4],stllx[,5],stllx[,1],stllx[,2],retOne = T,thr1=mindmz,thr2=winover) else hasovermz=list(1)
}
iround=iround+1
doLoop=(nrow(stllx0)!=nrow(stllx0) | all(lmiss0%in%lmiss)) & iround<=maxIter
### first round->small rt expansion!
if(iround==2) doLoop=TRUE
maxdrt=maxRTwin+minRTwin
winover=minRTwin*5.1
###
lmiss0=lmiss
stllx0=stllx
}
return(stllx)
}
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
## same of for the in getROIsToF2.R
#'
#' Quick split/merge consecutive scan within ppm or dmz
#'
#' @param dat scan/mz/rt/y matrix
#' @param ncons number of cons scans
#' @param dppm tolerance in ppm
#' @param dmz tolerance in Da
#' @param minNoise only use points greter than minNoise
#' @keywords internal
#'
#' @export
.MGinquickSplit<-function(dat,ncons=4,dppm=5,dmz=0.005,minNoise=1000){
# dat=xeic;ncons=parDeco$ncons;dppm=parDeco$ppm;minNoise=parDeco$minNoise;dmz=parDeco$dmz
vscan=dat[,"scan"]
vint=dat[,"y"]
vmz=dat[,"mz"]
lsup=which(vint>=minNoise)
luscn=min(vscan[lsup]):max(vscan[lsup])
if((diff(range(luscn))+1)<ncons) return(NULL)
lst=luscn[1:(length(luscn)-ncons+1)]
lend=luscn[(ncons):length(luscn)]
llx=unlist(lapply(1:length(lst),function(i){
lx=lsup[which(vscan[lsup]%in%c(lst[i]:lend[i]))]
GRMeta:::.GRsplistMZ(vmz[lx],iv = lx,dppm = dppm*1.001,dmz=dmz) ##
}),recursive = F)
# cat("0: ",Sys.time()-strtx,"\n",sep="");strtx=Sys.time()
# print(length(llx))
if(length(llx)==0) return(NULL)
llxnsc=t(sapply(llx,function(x){y=unique(vscan[x]);c(length(y),range(y),length(x))}))
l2k=which(llxnsc[,1]>=ncons & llxnsc[,1]>(llxnsc[,3]-llxnsc[,2]))
llx=llx[l2k]
llxnsc=llxnsc[l2k,,drop=F]
l2chk=which(llxnsc[,4]>llxnsc[,1])
if(length(l2k)>0){
dmz2=min(1.001*10^-6*dppm*median(vmz[lsup]),dmz)
llx2=unlist(lapply(llx[l2chk],function(lx) GRMeta:::.GRsplist(vmz[lx],iv = lx,dmz2,ismass = F)),rec=F)
llxnsc2=do.call("rbind",lapply(llx2,function(x){y=unique(vscan[x]);c(length(y),range(y),length(x))}))
l2k=which(llxnsc2[,1]>=ncons & llxnsc2[,1]>(llxnsc2[,3]-llxnsc2[,2]))
llx2=llx2[l2k]
llxnsc2=llxnsc2[l2k,,drop=F]
if(length(llx2)>0){llx=c(llx,llx2)[-l2chk];llxnsc=rbind(llxnsc,llxnsc2)[-l2chk,,drop=F]}
}
list(llx,llxnsc)
}
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.