#' @name calibMS1
#'
#' @title Convert CFM file to MGF
#' some stufff
#'
#' @param filein Path to the input file or xcmsRaw object
#' @param lmzcalib Calibration masses or method name in BGIons
#' @param dppm Mass error
#' @param bw Bandwidth to group masses
#' @param minpts Minimum of points
#' @param bkpoint breakpoint mass to calculate robust regression
#' @param calInt calibration interval to compute
#' @param doPlot should plot
#' @import segmented
#' @return list of matches and calibration function/paramters
#' @export
calibMS1<-function(filein,lmzcalib,dppm=20,bw=2.5,minpts=31,bkpoint=median(calInt),calInt=c(70,500),typ=1,doPlot=T,retMat=F){
## dppm=20;bw=2.5;minpts=31;bkpoint=median(calInt);calInt=c(70,500);typ=1;doPlot=T;retMat=F
###
.infct<-function(matdf,calpars,calfct,typ=3){
fail3=FALSE
if(typ==3){
fct2<-function(x,a,b,brk) b*(x >= brk) + (b - (a/2) * (brk-x)^2) * (x < brk)
y=matdf$dppm[matdf$out]
x=matdf$calmz[matdf$out]
b=median(y[x>calpars["brk"]])
x=x-(calpars["brk"]*1.2) ## a few more points above the estimatated breakpoint
y=y-b
a=abs(coef(lm(y[x<0]~I(x[x<0]^2)))[2])*sign(mean(y[x<0]))*-2 ## make sure this is the correct sign
m <- try(nls(dppm ~ fct2(calmz,a,b,brk),
start = list(a = a, b = b,brk=calpars["brk"]),
data=matdf,subset = out),TRUE)
if(!"try-error"%in%class(m)){
calpars<-c(coef(m),low=calInt[1],high=calInt[2])
calfct<-function(x,pars,round=3){
pr= pars["b"]*(x >= pars["brk"]) + (pars["b"] - (pars["a"]/2) * ( pars["brk"]-x)^2) * (x < pars["brk"])
x2=pars[c("low","high")]
pr2= pars["b"]*(x2 >= pars["brk"]) + (pars["b"] - (pars["a"]/2) * ( pars["brk"]-x2)^2) * (x2 < pars["brk"])
pr[x<pars["low"]]=pr2[1]
pr[x>pars["high"]]=pr2[2]
round(pr,round)
}
} else fail3=TRUE
}
### Flat after breakpoint
if(typ==2 | fail3){
fct2<-function(x,a,b,brk) ifelse(x <= brk, a+b * x, a+b*brk)
m <- try(nls(dppm ~ fct2(calmz,a,b,brk),
start = list(a = calpars["a"], b = calpars["b"],brk=calpars["brk"]),
data=matdf,subset = out),TRUE)
if(!"try-error"%in%class(m)){
calpars<-c(coef(m),low=calInt[1])
calfct<-function(x,pars,round=3){
pr=ifelse(x <= pars["brk"], pars["a"]+pars["b"]*x, pars["a"]+pars["b"]*pars["brk"])
pr[x<pars["low"]]=pars["a"]+pars["b"]*pars["low"]
round(pr,round)
}
}
}
matdf$pred=calfct(matdf$calmz,calpars)
matdf$res=matdf$dppm-matdf$pred
return(list(matdf,calpars,calfct))
}
if(is.character(lmzcalib) & length(lmzcalib)==1){
data(BGIons)
if(!lmzcalib%in%names(BGIons)) stop('Method not recognised')
cat("Using predefined background ions for method ",lmzcalib," -> n=",sep="")
lmzcalib=BGIons[[lmzcalib]]
cat(length(lmzcalib),"\n")
}
# dppm=20;bw=2.5;minpts=31;bkpoint=200;calInt=c(80,350)
if("xcmsRaw"%in%class(filein)){
xRaw=filein
cat("xRaw file from ",unclass(xRaw@filepath),"\n",sep="")
}
if("character"%in%class(filein)){
cat("Opening ",filein,"\n")
xRaw=xcmsRaw(filein,includeMSn = FALSE)
}
allcal=list()
for(ical in 1:length(lmzcalib)){
imz=.GRrawMat(xRaw,mzrange=lmzcalib[ical]*(1+c(-1,1)*dppm*10^-6))
if(nrow(imz)<minpts) next
allcal[[ical]]=cbind(imz,cal=ical,cl=.GRsplitOne(imz[,"mz"],bw=bw,ismass = T)$cl)
# plot(imz[,"rt"],10^6*(1-imz[,"mz"]/ical),col=imz[,"cols"],main=ical,ylim=c(-dppm,dppm))
# abline(h=0)
}
matcal=do.call("rbind",allcal)
cal2cl=paste(matcal[,"cal"],matcal[,"cl"],sep=";")
cat(" --found ",length(unique(matcal[,"cal"])),"/",length(lmzcalib)," and ",length(unique(cal2cl))," mass groups\n",sep="")
matdf=data.frame(do.call("rbind",tapply(1:nrow(matcal),cal2cl,function(x)
c(mz=matrixStats::weightedMedian(matcal[x,"mz"],matcal[x,"y"]),n=length(x),matcal[x[1],c("cal" , "cl") ]))))
matdf$calmz=lmzcalib[matdf$cal]
matdf$dppm=10^6*(matdf$mz/matdf$calmz-1)
matdf=matdf[matdf$n>=minpts,]
matdf=matdf[order(matdf$calmz,matdf$mz),]
if(retMat) return(matdf)
### estimate outliers around breakpoint
matdf$res=0
l1=which(matdf$calmz<=bkpoint)
matdf$res[l1]=resid(MASS:::rlm(dppm~calmz,matdf[l1,]))
l2=which(matdf$calmz>bkpoint)
matdf$res[l2]=resid(MASS:::rlm(dppm~1,matdf[l2,]))
matdf$out=(1:nrow(matdf)%in%tapply(1:nrow(matdf),matdf$cal,function(x){
minres=min(abs(matdf$res[x]))
x[abs(matdf$res[x])<(2*minres)]}))
### fit segments+estimate breakpoints
mod=segmented(lm(dppm~calmz,data=matdf,subset = out),seg.Z = ~calmz)
out=car:::outlierTest(lm(resid(mod)~1), cutoff=0.05,n.max=10)
while(any(out$signif)){
matdf[names(out$bonf.p),]$out=FALSE
mod=segmented(lm(dppm~calmz,data=matdf,subset = out),seg.Z = ~calmz)
out=car:::outlierTest(lm(resid(mod)~1), cutoff=0.05,n.max=10)
}
### Regressing around breakpoint
calpars<-c(a=unname(coef(mod)[1]),b=unname(coef(mod)[2]),c=unname(coef(mod)[3]),brk=unname(mod$psi[2]),low=calInt[1],high=calInt[2])
calfct<-function(x,pars,round=3){
pr=pars["a"]+pars["b"]*x
pr[x>pars["brk"]]=(pr+pars["c"]*(x-pars["brk"]))[x>pars["brk"]]
pr[x<pars["low"]]=pars["a"]+pars["b"]*pars["low"]
pr[x>pars["high"]]=pars["a"]+pars["b"]*pars["high"]+pars["c"]*(pars["high"]-pars["brk"])
round(pr,round)
}
matdf$pred=calfct(matdf$calmz,calpars)
matdf$res=matdf$dppm-matdf$pred
oldout=rep(FALSE,nrow(matdf))
## loop over since pb with duplicated mass flagged as outlying
while(typ>1 & sum(oldout!=matdf$out)){
oldout=matdf$out
re=.infct(matdf,calpars,calfct,typ=typ)
matdf=re[[1]]
calpars=re[[2]]
calfct=re[[3]]
l2chk=unique(intersect(matdf$calmz[matdf$out],matdf$calmz[!matdf$out]))
if(length(l2chk)>0){
for(i in l2chk){
l=which(matdf$calmz==i)
matdf$out[l[which.min(abs(matdf$res[l]))]]=TRUE
matdf$out[l[-which.min(abs(matdf$res[l]))]]=FALSE
}
}
}
if(doPlot){
par(mfrow=c(1,2),mar=c(5,4.5,1,.2),cex.main=1)
xl=pretty(range(c(matdf$calmz,calInt)))
pred=calfct(min(xl):max(xl),calpars)
yl=pretty(range(c(matdf$dppm,0,pred),na.rm=T))
plot(matdf$calmz,matdf$dppm,col=matdf$out+1,ylim=range(yl),axes=F,xlim=range(xl),xlab="m/z",ylab="delta ppm",main=basename(filein))
abline(h=-1:1,lty=c(2,1,2))
abline(v=calpars["brk"],col="grey",lwd=2)
abline(v=calpars[names(calpars)%in%c("low","high")],col="grey",lwd=1,lty=2)
axis(2,at=yl,las=2,pos=min(xl));axis(1,at=xl,pos=min(yl))
lines(min(xl):max(xl),pred,col=2,lwd=2)
pred=predict(mgcv:::gam(res~s(calmz),data=matdf,subset=out),newdata = data.frame(calmz=min(xl):max(xl)))
yl=pretty(range(c(matdf$res[matdf$out],-1:1,pred),na.rm=T))
plot(matdf$calmz,matdf$res,col=matdf$out+1,ylim=range(yl),axes=F,xlim=range(xl),xlab="m/z",ylab="Residuals",main=basename(filein))
lines(min(xl):max(xl),pred,col=4,lwd=2)
abline(h=-1:1,lty=c(2,1,2))
abline(v=calpars["brk"],col="grey",lwd=2)
abline(v=calpars[names(calpars)%in%c("low","high")],col="grey",lwd=1,lty=2)
axis(2,at=yl,las=2,pos=min(xl));axis(1,at=xl,pos=min(yl))
par(mfrow=c(1,1))
}
list(mat=matdf,calfct=calfct,calpars=calpars)
# mod=scam(dppm~s(calmz,bs="mpi",m=2),family=gaussian(link="identity"),data=matdf[matdf$out,],not.exp=FALSE)
# modd=scam(dppm~s(calmz,bs="mpd",m=2),family=gaussian(link="identity"),data=matdf[matdf$out,],not.exp=FALSE)
# if(modd$gcv.ubre<mod$gcv.ubre) mod=modd
# pred=predict(mod,predType = "full-model",newdata = data.frame(calmz=1:ceiling(max(xRaw@mzrange))))
# names(pred)=1:ceiling(max(xRaw@mzrange))
# n1=max(calInt[1],ceiling(min(matdf$calmz[matdf$out])))
# n2=min(calInt[2],floor(max(matdf$calmz[matdf$out])))
# pred=pred[n1:n2]
# list(mat=matdf,pred=pred,coef=c(coef(mod),n1=n1,n1dppm=unname(pred[1]),n2=n2,n2dppm=unname(rev(pred)[1])))
}
######################################################################################
## Single point MZ calib over scan/mz
#' get the xcmsRaw lighter
#'
#' @name .MGcorrRawPPM
#' @rdname .MGcorrRawPPM-methods
#' @import xcms
#' @export
setGeneric(name=".MGcorrRawPPM",def=function(object,calfct,calpars) standardGeneric(".MGcorrRawPPM"))
#' @rdname .MGcorrRawPPM-methods
#' @aliases .MGcorrRawPPM
setMethod(".MGcorrRawPPM","xcmsRaw",
.MGcorrRawPPM<-function(object,calfct,calpars){
## based on calibration function/parameeters
if("character"%in%class(object)){
cat("Opening ",object,"\n")
object=xcmsRaw(object,includeMSn = FALSE)
}
scsten=cbind(object@scanindex+1,c(object@scanindex[-1],length(object@env$intensity)))
rownames(scsten)=1:length(object@scanindex)
newmz=object@env$mz
for(ix in rownames(scsten)){
oldy<-xcms:::getScan(object,as.numeric(ix))[,1]
ppmy<- 1-calfct(oldy,calpars) *10^-6
# ppmy<- 1-addppm*10^-6
newy=round(oldy*ppmy,6)
newmz[scsten[ix,1]:scsten[ix,2]]=newy
}
print(summary((1-newmz/object@env$mz)*10^6))
ob<-new("xcmsRaw")
ob@env <- new.env(parent = .GlobalEnv)
ob@env$mz<-as.numeric(newmz)
ob@env$intensity<-object@env$intensity
ob@scanindex<-object@scanindex
ob@scantime<-object@scantime
ob@acquisitionNum<-1:length(ob@scanindex)
ob@filepath<-object@filepath
ob@mzrange<-range(ob@env$mz)
ob@profmethod<-object@profmethod
ob@tic<-object@tic
ob@profparam<-list()
ob<-xcms:::remakeTIC(ob)
return(ob)
}
)
######################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.