R/cp_simid.R

Defines functions icms discan getdistr metan

#  setwd(oldi)
# oldi<-getwd()
#  source(infile)
# print(infile)
#metan(infile="../filescamid/sw620",cdfdir="../filescamid/SW620/",fiout="out.csv",mode='scan')
# metan(infile="../INES/ScanLacList.csv",cdfdir="../INES/PIM/KO_Hypoxia/KO_Hypoxia_SCANLAC.AIA/",fiout="out.csv",mode='scan')  
 library(ncdf4)
metan<-function(infile="../filesimid/sw620",cdfdir="../filesimid/SW620/",fiout="out.csv",mode='sim'){
   if(mode=='uhr') {a<-icms(); return(a)}
   start.time <- Sys.time()
   pat=".CDF"
   lcdf<-dir(path = cdfdir,pattern=pat)
   outdir="files/"
   intab<-read.table(infile,header=T)

title<-ftitle()

     df0<-data.frame(); # data frame to write Ramid output in PhenoMeNal format
     res<-character(); res1<-character(); res2<-character(); phen<-""
     if(mode=='scan') for(fi in lcdf){
                                    fi<-paste(cdfdir,fi, sep="")
                                    a <-discan(fi,intab) 
                                    res<-c(res,a[[1]])
                                    res1<-c(res1,a[[2]])
                                    res2<-c(res2,a[[3]])
                                    phen<-c(phen,a[[4]])
       } else for(fi in lcdf){
                              fi<-paste(cdfdir,fi, sep="")
                              a <-getdistr(fi,intab)
                              res<-c(res,a[[1]])
                              res1<-c(res1,a[[2]])
                              res2<-c(res2,a[[3]])
                              phen<-c(phen,a[[4]])
       }
       write(title,fiout)
       write(phen,fiout,append=T)
       a<-strsplit(cdfdir,"/")[[1]];  len<-length(a)
       if(len==1) { if(!(file.exists(outdir))) dir.create(outdir)
           celdir<-paste(outdir,a[length(a)],"/",sep='') } else if(len>1){ celdir<-paste("../",a[2],"/",outdir,sep='')
           if(!(file.exists(celdir))) dir.create(celdir)
           celdir<-paste(celdir,a[len],"/",sep='')
       }
        print(paste(celdir," len=",len))
       if(!(file.exists(celdir))) dir.create(celdir)
       
       for(nam in intab$Name) {ofi<-paste(celdir,nam,sep="")
          tmp<-subset(res,(grepl(as.character(nam),res)))
          if(length(tmp)){
            mzrow<-subset(tmp,(grepl("mz:",tmp)))
           mzs<- strsplit(strsplit(mzrow[1],"c: ")[[1]][1],"mz: ")[[1]][2];
           titl<-paste("absolute_values,CDF_file: Max",mzs)
           titl1<-paste("relative_values,CDF_file: Max",mzs)
          tmp<-gsub(as.character(nam)," ",tmp)
          tmp<-gsub("  ","",tmp)
        write(tmp,ofi)
          tmp<-subset(res1,(grepl(paste(as.character(nam)),res1)))
          tmp<-gsub(as.character(nam)," ",tmp)
          tmp<-gsub("  ","",tmp)
          tmp<-c(titl,tmp)
        write("\n",ofi,append=T)
        write(tmp,ofi,append=T)
          tmp<-subset(res2,(grepl(paste(as.character(nam)),res2)))
          tmp<-gsub(as.character(nam)," ",tmp)
          tmp<-gsub("  ","",tmp)
          tmp<-c(titl1,tmp)
        write("\n",ofi,append=T)
        write(tmp,ofi,append=T)
        }}
   Sys.time() - start.time
  }
       
getdistr<-function(fi,intab, tlim=100){
# fi: file name
# intab: parameters of metabolite (mz for m0, retention time)
    a<-readcdf(fi); 
     result<-character(); res1<-character(); res2<-character()
     mz<-round(a[[1]],1); iv<-a[[2]]   # all mz and respecive intensities
     npoint<-a[[3]];      rett<-a[[4]] # number of mz points and respective rt
#     totiv<-a[[5]]                     # sum of intensities at each rt
    a<-strsplit(strsplit(fi,".CDF")[[1]][1],"/")[[1]];
    fi<-a[length(a)]; print(fi);  phenom<-""
#    summary: 
 a<-info(mz,iv,npoint); mzpt<-a[[1]] # number of points in all the mz ranges
  tpos<-a[[2]]# time points of beginning of registration corresponding mz ranges
  mzind<-a[[3]]                      # corresponding mz indexes
  mzrang<-a[[4]]                     # all registered mz ranges 
     rts<-intab$RT*60.; mz0<-round(intab$mz0,1); mzcon<-round(intab$control,1)
#  search for specified metabolites
 for(i in 1:nrow(intab)) {nm<-as.character(intab$Name[i])
        ltp<- rts[i]<rett[tpos]         # time interval that includes rts
        ranum<-(c(1:length(tpos))[ltp])[1]-1
        ranum[is.na(ranum)]<-0; if(ranum<1) next
        mzrang[[ranum]]<-round(mzrang[[ranum]],1)
   if((mz0[i] %in% mzrang[[ranum]])&(mzcon[i] %in% mzrang[[ranum]])) {
#   check whether mid for a given metabolite is presented in the found time interval
        tpclose<-which.min(abs(rett-rts[i]))
#        tlim<-min(tlim,tpclose-tpos[ranum],tpos[ranum+1]-tpclose)
        tplow<-max(tpclose-tlim,tpos[ranum]); tpup<-min(tpclose+tlim,tpos[ranum+1])     # boundaries that include desired peak
#   mzi<-mzind[ranum]+mzpt[ranum]*(tplow-tpos[ranum])#index of initial mz point
   mzi<-mzind[ranum]+sum(npoint[tpos[ranum]:tplow])-npoint[tpos[ranum]]
#   mzfi<-mzi+mzpt[ranum]*(tpup-tplow)   	#index of final mz point
   mzfi<-mzi+sum(npoint[tplow:tpup])-npoint[tplow]
   rtpeak<-rett[tplow:tpup] # retention times within the boundaries
        tpclose<-which.min(abs(rtpeak-rts[i]))
# additional peak
      nmass<-3; rtdev<-15;
    misoc<-c(intab$control[i],intab$control[i]+1,intab$control[i]+2)#desired mz values
    lmisoc<-mzrang[[ranum]] %in% misoc
    intens<-matrix(ncol=nmass,nrow=(tpup-tplow),0)
    intens<-sweep(intens,2,iv[mzi:mzfi][lmisoc],'+')
    pospiks<-apply(intens,2,which.max)
    pikintc<-apply(intens,2,max)
   if(max(abs(diff(pospiks)))>9) goodiso<-which.min(abs(pospiks-tpclose))  else goodiso<-which.max(pikintc)
        pikposc<-pospiks[goodiso]
        
  if(abs(rtpeak[pikposc]-rtpeak[tpclose])<10) {
        maxpikc<-pikintc[goodiso]
    for(k in 1:nmass) pikintc[k]<-sum(intens[(pikposc-2):(pikposc+2),k])
      basc<-apply(intens,2,basln,pos=pikposc,ofs=11)
                deltac<-round(pikintc-basc)
                ratc<-deltac/basc
# main peak
  if(ratc[goodiso]>3){ frag<-as.character(intab$Fragment[i])
     frpos<-gregexpr("C[0-9]",frag)[[1]]+1
      c1=as.numeric(substr(frag,frpos[1],frpos[1]));  c2=as.numeric(substr(frag,frpos[2],nchar(frag)))
   nCfrg<-c2-c1+1
#                   a<-as.character(intab$Formula[i])
#          Cpos<-regexpr("C",a); Hpos<-regexpr("H",a); Spos<-regexpr("S",a); Sipos<-regexpr("Si",a)
#    nCder<-as.numeric(substr(a,Cpos+1,Hpos-1))
#    if(Sipos>0) nSi<-as.numeric(substr(a,Sipos+2,Sipos+2)) else nSi<-0
#    if((Spos>0)&(Spos!=Sipos)) nS<-as.numeric(substr(a,Spos+1,Spos+1)) else nS<-0
        nmass<-nCfrg+5 # number of isotopomers to present calculated from formula
    misofin<-array((mz0[i]-1):(mz0[i]+nmass-2)) # isotopores to present in the spectrum
    lmisofin<-mzrang[[ranum]] %in% misofin # do they are present in the given mzrang?
    pikmz<-mzrang[[ranum]][lmisofin] # extrat those that are present
    nmass<-length(pikmz)
    intens<-matrix(ncol=nmass,nrow=(tpup-tplow),0)
    intens<-sweep(intens,2,iv[mzi:mzfi][lmisofin],'+') # create matrix iv(col=mz,row=rt) that includes the peak

    pospiks<-apply(intens,2,which.max)
    difpos<-abs(pikposc - pospiks)
    if(min(difpos)<3) {
     goodpos<-pospiks[which.min(difpos)]
     pikint<-intens[goodpos,]
     pikpos<-which.max(pikint)
     maxpik<-intens[goodpos,pikpos]; smaxpik<-"max_peak:";
     if(maxpik>8300000) {smaxpik<-"**** !?MAX_PEAK:"; print(paste("** max=",maxpik,"   ",nm,"   **")); next;}
     bas<-apply(intens,2,basln,pos=goodpos,ofs=11)
     if((goodpos>2)&(goodpos<(nrow(intens)-2))){
       for(k in 1:nmass) pikint[k]<-sum(intens[(goodpos-2):(goodpos+2),k]) }
     delta<-round(pikint-bas); s5tp<-"5_timepoints:"
    if((misofin[1]==pikmz[1])&(delta[1]/delta[2] > 0.075)) { s5tp<-"*!?* 5_timepoints:";
      print(paste("+++ m-1=",delta[1],"  m0= ",delta[2],"   +++ ",nm)); break }
          
                rat<-delta/bas
                rel<-round(delta/max(delta),4)      # normalization

    a<- wphen(fi,nm,intab$Fragment[i], intab$Formula[i], intab$RT[i], pikmz,delta)
    phenom<-c(phenom,a)

   archar<-paste(c(gsub(' ','_',fi),nm),collapse=" ")
         result<-c(result,archar)
   archar<-paste(c(nm,"peak_index:",goodpos,"c:",goodpos),collapse=" ")
         result<-c(result,archar)
   archar<-paste(c(nm,smaxpik,maxpik,"c:",maxpikc),collapse=" ") 
         result<-c(result,archar)
   archar<-paste(c(nm,"mz:",pikmz,"c:",misoc),collapse=" ")
         result<-c(result,archar)
   archar<-paste(c(nm,s5tp,pikint,"c:",pikintc),collapse=" ")
         result<-c(result,archar)
   archar<-paste(c(nm,"base:",round(bas),"c:",round(basc)),collapse=" ")
         result<-c(result,archar)
#   archar<-paste(c(nm,"max-base:",delta),collapse=" ")
#         result<-c(result,archar)
        if(misofin[1]!=pikmz[1]) {delta<-c(0,delta); rel<-c(0,rel)}
   archar<-paste(c(gsub(' ','_',fi),nm,maxpik,delta),collapse=" ")
         res1<-c(res1,archar)
#   archar<-paste(c(nm,"relative:",rel),collapse=" ")
#         result<-c(result,archar)
   archar<-paste(c(gsub(' ','_',fi),nm,maxpik,rel),collapse=" ")
         res2<-c(res2,archar)
    }
    
#     { }      else pikmz<-c(0,pikmz)
                }
              }
           }
           }
 return(list(result,res1,res2,phenom))}     
 
discan<-function(fi,intab, tlim=50){
# fi: file name
# intab: parameters of metabolite (mz for m0, retention time)
    a<-readcdf(fi);
     result<-character(); res1<-character(); res2<-character()
     mz<-a[[1]]; iv<-a[[2]]   # sets of mz and respecive intensities at each rt
     npoint<-a[[3]];      rett<-a[[4]] # number of mz points and respective rt
#     totiv<-a[[5]]                     # sum of intensities at each rt
    a<-strsplit(strsplit(fi,".CDF")[[1]][1],"/")[[1]];
    fi<-a[length(a)]; print(fi);  phenom<-""
     mzbeg<-which(diff(mz)<0)+1; dmz=0.49  # index of beginning of mz scan interval for each timepoint
     rts<-intab$RT*60.; mz0<-round(intab$mz0,1); mzcon<-round(intab$control,1)
#  search for specified metabolites
 for(i in 1:nrow(intab)) if(max(rett)>rts[i]){nm<-as.character(intab$Name[i])
   tpclose<-which.min(abs(rett-rts[i]))  # index of timepoint closest to theoretical retention time
   tplow<-tpclose-tlim; tpup<-tpclose+tlim # indexes of peak boundaries
   mzpeak<-mz[mzbeg[tplow]:mzbeg[tpup]] # all mz between the timepoints limiting the peak
   ivpeak<-iv[mzbeg[tplow]:mzbeg[tpup]] # all intensity between the timepoints limiting the peak
   rtpeak<-rett[tplow:tpup] # retention times within the boundaries
# additional peak
      nmass<-3; rtdev<-15; intens<-matrix(); selmz<-matrix()
   a<-psimat(nr=(length(rtpeak)+2),nmass,mzpeak,ivpeak,mzz0=mzcon[i],dmzz=dmz,lefb=1,rigb=length(rtpeak)-1,ofs=1)
    intens<-a[[2]]; selmz<-a[[3]]; intens[is.na(intens)]<-0; selmz[is.na(selmz)]<-0;
  pikmzc<-numeric(); 
    pikintc<-apply(intens,2,max)
    pospiks<-apply(intens,2,which.max)
   if(max(abs(diff(pospiks)))>9) goodiso<-which.min(abs(pospiks-tlim))  else goodiso<-which.max(pikintc)
        pikposc<-which.max(intens[,goodiso])
  if(abs(pikposc-tlim)<rtdev) {
       maxpikc<-intens[pikposc,goodiso]
   for(k in 1:nmass) {
   pikmzc[k]<-selmz[pikposc,k] # peak mz
   pikintc[k]<-sum(intens[(pikposc-2):(pikposc+2),k])}
      basc<-apply(intens,2,basln,pos=pikposc,ofs=5)
                ratc<-round(pikintc-basc)/basc
# main peak
  if(ratc[goodiso]>9){  piklim<-9
        a<-as.character(intab$Fragment[i])
    nCfrg<-as.numeric(substr(a,4,nchar(a)))-as.numeric(substr(a,2,2))+1
    nmass <-nCfrg+5 # number of isotopores to present calculated from formula
#        a<-as.character(intab$Formula[i])
#     Cpos<-regexpr("C",a);  Hpos<-regexpr("H",a)
#     Spos<-regexpr("S",a); Sipos<-regexpr("Si",a)
#    nCder<-as.numeric(substr(a,Cpos+1,Hpos-1))
#    if(Sipos>0) nSi<-as.numeric(substr(a,Sipos+2,Sipos+2)) else nSi<-0
#    if((Spos>0)&(Spos!=Sipos)) nS<-as.numeric(substr(a,Spos+1,Spos+1)) else nS<-0
#       tit1<-paste("m",c(1:nmass),sep="")
    a<-psimat(nr=(2*piklim+1),nmass,mzpeak,ivpeak,mzz0=mz0[i],dmzz=dmz,lefb=(pikposc-piklim),rigb=(pikposc+piklim),ofs=2)
    intens<-a[[2]]; selmz<-a[[3]];
#	lapply(intens,length)
    pikint<-apply(intens,2,max)
    isomax<-which.max(pikint)
    pikpos<-which.max(intens[,isomax])
    maxpik<-intens[pikpos,isomax];  smaxpik<-"max_peak:";
     if(maxpik>8000000) {smaxpik<-"**** !?MAX_PEAK:"; print(paste("** max=",maxpik,"   ",nm,"   **"))}
    bas<-apply(intens,2,basln,pos=pikpos,ofs=5)
  if((pikpos>2)&(pikpos<(nrow(intens)-2))){
      pikmz<-numeric(); 
     for(k in 1:nmass) {
     pikmz[k]<-round(selmz[pikpos,k]) # peak mz
     pikint[k]<-sum(intens[(pikpos-2):(pikpos+2),k])
   }
    delta<-round(pikint-bas); s5tp<-"5_timepoints:"
     if(delta[1]/delta[2] > 0.05) { s5tp<-"*!?* 5_timepoints:"; print(paste("+++ m-1=",delta[1],"  m0= ",delta[2],"   +++ ",nm))}
    a<- wphen(fi,nm,intab$Fragment[i], intab$Formula[i], intab$RT[i], pikmz,delta)
    phenom<-c(phenom,a)
                rat<-delta/bas
                rel<-round(delta/max(delta),4)      # normalization
    pikpos<-pikposc-piklim+pikpos-1;
   archar<-paste(c(gsub(' ','_',fi),nm),collapse=" ")
         result<-c(result,archar)
   archar<-paste(c(nm,"peak_index:",pikpos,"c:",pikposc),collapse=" ")
         result<-c(result,archar)
   archar<-paste(c(nm,smaxpik,maxpik,"c:",maxpikc),collapse=" ")
         result<-c(result,archar)
   archar<-paste(c(nm,"mz:",round(pikmz,1),"c:",round(pikmzc,1)),collapse=" ")
         result<-c(result,archar)
   archar<-paste(c(nm,s5tp,pikint,"c:",pikintc),collapse=" ")
         result<-c(result,archar)
   archar<-paste(c(nm,"base:",round(bas),"c:",round(basc)),collapse=" ")
         result<-c(result,archar)
#   archar<-paste(c(nm,"max-base:",delta),collapse=" ")
#         result<-c(result,archar)
   archar<-paste(c(gsub(' ','_',fi),nm,maxpik,delta),collapse=" ")
         res1<-c(res1,archar)
#   archar<-paste(c(nm,"relative:",rel),collapse=" ")
#         result<-c(result,archar)
   archar<-paste(c(gsub(' ','_',fi),nm,maxpik,rel),collapse=" ")
         res2<-c(res2,archar)
            }
         }
       } # if additional peak exists
     } # change of metabolite (i)
 return(list(result,res1,res2,phenom))}
  
icms<-function(infile="tere1",cdfile='teresa/Refsample_12C_Glucose_ICMS.cdf',fiout="out"){
   start.time <- Sys.time()
   outdir="files/"
   intab<-read.table(infile,header=T)
   nc <- nc_open(cdfile, readunlim=FALSE)  #open cdf file
   ret<-ncvar_get( nc, "scan_acquisition_time" )
   twide<-50
   rett<-list();
   for(i in 1:nrow(intab)) rett[[i]]<-getRtInt(retal=ret,rt=intab$RT[i],tlim=twide)
   remove(ret)
   mzz<-ncvar_get( nc, "mass_values" )
   mzbeg<-which(diff(mzz)<0)+1 # index of beginning of mz scan interval for each timepoint
   mzpos<-list(); nCfrg<-numeric()
   for(i in 1:nrow(intab)){
     frag<-as.character(intab$Fragment[i]) # string of fragnent name
     frpos<-gregexpr("C[0-9]",frag)[[1]]+1 # C-positions in the fragment
     c1=as.numeric(substr(frag,frpos[1],frpos[1]));  c2=as.numeric(substr(frag,frpos[2],nchar(frag)))
     nCfrg[i]<-c2-c1+2 # number of carbons in fragment
     isopos<-list();
     for(j in 1:nCfrg[i]) # get valid mz for the selected scans and mass isotopomers
       isopos[[j]]<-getMzInt(mzal=mzz,mzalbeg=mzbeg,tl=rett[[i]][[2]],numscans=twide*2,lmz=intab[i,3], hmz=intab[i,4],iso=(j-1))
     mzpos[[i]]<-isopos
   }
   remove(mzz)
   ival<-ncvar_get( nc, "intensity_values" )
   peaks<-list(); ivabs<-list();  ivrel<-list(); alinfo<-character()
   for(i in 1:nrow(intab)) {
      peak<-list(); disabs<-numeric(); disrel<-numeric();
      for(j in 1:nCfrg[i]) {
      peak[[j]]<-round(geIVsum(iv=ival,mzgood=mzpos[[i]][[j]]))
      ima<-which.max(peak[[j]]);  ma<-peak[[j]][ima]
      if((ima<7)|(ima>twide-7)){disabs[j]<-0; next} 
          disabs[j]<-basecor(peak[[j]],ima,ma) # correct for baseline
          alinfo<-paste(c(alinfo,j-1,'#',ima,':',ma,'\n'),collapse=" ")
      } # sum of intensities for good mz 
      disabs[is.na(disabs)]<-0; ivabs[[i]]<-c(as.character(intab[i,1]),round(disabs))
      sabs<-sum(disabs); disrel<-disabs/sabs; ivrel[[i]]<-c(as.character(intab[i,1]),round(disrel,4))
      peaks[[i]]<-peak; alinfo<-paste(c(alinfo,ivabs[[i]],'\n',ivrel[[i]],'\n\n'),collapse=" ")
   }
   remove(ival)
        write(alinfo,fiout)
  a<-Sys.time() - start.time
  return(list(peaks,a))
  }
seliv55/ramidcor documentation built on Sept. 11, 2021, 4:09 p.m.