R/get_confidence_stage2.1.R

Defines functions get_confidence_stage2.1

get_confidence_stage2.1 <-
function(curdata,max_diff_rt,adduct_weights=NA,filter.by=NA,min_ions_perchem=1,max_isp=5){
    
    
    curdata<-curdata[order(curdata$Adduct),]
    
    #return(curdata)
    
    
    
    
    curdata_all<-curdata
    cur_adducts_with_isotopes<-curdata$Adduct
    
    cur_adducts<-gsub(cur_adducts_with_isotopes,pattern="(_\\[(\\+|\\-)[0-9]*\\])",replacement="")
    #cur_adducts<-gsub(cur_adducts_with_isotopes,pattern="(_\\[\\-[0-9]*\\])",replacement="")
    
    if(is.na(adduct_weights)==TRUE){
        data(adduct_table)
        data(adduct_weights)
        adduct_weights1<-matrix(nrow=2,ncol=2,0)
        adduct_weights1[1,]<-c("M+H",1)
        adduct_weights1[2,]<-c("M-H",1)
        adduct_weights<-as.data.frame(adduct_weights1)
        colnames(adduct_weights)<-c("Adduct","Weight")
    }
    
    adduct_monoisot<-data.frame(cbind("M",1,1,0,"-","S"))
    
    colnames(adduct_monoisot)<-colnames(adduct_table)
    
    adduct_table<-rbind(adduct_table,adduct_monoisot)
    
    adduct_table<-adduct_table[order(adduct_table$Adduct),]
    
    curdata<-cbind(curdata,cur_adducts)
    
    curdata<-merge(curdata,adduct_table,by.x="cur_adducts",by.y="Adduct")
    
    curdata<-curdata[,-c(1)]
    
    
    #return(curdata)
    formula_vec<-curdata$Formula
    
    temp1<-curdata
    temp1$Module_RTclust<-gsub(temp1$Module_RTclust,pattern="(_[0-9]*)",replacement="")
    
    table_modules<-table(temp1$Module_RTclust)
    
    rm(temp1)
    module_names<-names(table_modules[which(table_modules>0)])
    chemscoremat_conf_levels<-"High"
    if(length(which(adduct_table$Adduct%in%cur_adducts))<1)
    {
        chemscoremat_conf_levels<-"None"
        #return(chemscoremat_conf_levels)
    }
    if(length(unique(curdata$Adduct))<2 && curdata$score<10 && length(which(cur_adducts%in%filter.by))<1){
        chemscoremat_conf_levels<-"None"
        #return(chemscoremat_conf_levels)
    }else{
        
        if(length(unique(curdata$Adduct))<2 && length(which(cur_adducts%in%filter.by))>0){
            chemscoremat_conf_levels<-"Low"
            #		 return(chemscoremat_conf_levels)
        }
    }
    
    if(length(unique(curdata$Adduct))<2 && nrow(curdata)>1){
        
        if(nrow(curdata)==2){
            chemscoremat_conf_levels<-"Medium"
        }
        #return(chemscoremat_conf_levels)
        
        if(length(which(cur_adducts%in%filter.by))<1)
        {
            
            chemscoremat_conf_levels<-"None"
            #return(chemscoremat_conf_levels)
        }
    }
    
    #print("Start")
    
    
    curdata$time<-as.numeric(as.character(curdata$time))
    delta_rt<-max(curdata$time)-min(curdata$time)
    
    delta_rt<-round(delta_rt)
    
    if(delta_rt>max_diff_rt)
    {
        #chemscoremat_conf_levels<-"Medium"
        
        #if(delta_rt>10){
        chemscoremat_conf_levels<-"None"
        
        #}
        
        
        groupB<-group_by_rt(curdata,time_step=3,max.rt.diff=max_diff_rt,groupnum="")
        
        
        groupB<-groupB[order(groupB$mz),]
        
        cname1<-paste(groupB$mz,groupB$time,sep="_")
        if(length(which(duplicated(cname1)==TRUE))>0){
            groupB<-groupB[-which(duplicated(cname1)==TRUE),]
        }
        curdata<-curdata[order(curdata$mz),]
        
        #print(curdata)
        #print(groupB)
        
        module_clust<-groupB[,1] #paste(curdata$module_num,groupB[,1],sep="")
        curdata$Module_RTclust<-module_clust
        
        #print("here")
        #print(curdata)
        if(length(which(adduct_weights[,1]%in%cur_adducts))>0 && curdata$score[1]>0.1)
        {
            if(is.na(filter.by)==TRUE){
                good_mod<-curdata$Module_RTclust[which(curdata$Adduct%in%adduct_weights[,1])]
                curdata<-curdata[which(curdata$Module_RTclust%in%good_mod),]
                t1<-table(curdata$Module_RTclust)
                t2<-t1[which(t1>0)]
                t2<-t2[which(t2==max(t2)[1])]
                n1<-names(t2)
                curdata<-curdata[which(curdata$Module_RTclust==n1[1]),]
                delta_rt<-max(curdata$time)-min(curdata$time)
                delta_rt<-round(delta_rt)
                if(curdata$score[1]>0 && nrow(curdata)>1 && length(unique(curdata$Adduct))>1 && delta_rt<max_diff_rt){
                    chemscoremat_conf_levels<-"High"
                }else{
                    chemscoremat_conf_levels<-"Low"
                }
                
                #print("here2")
            }else{
                if(length(which(cur_adducts%in%filter.by))>0){
                    good_mod<-curdata$Module_RTclust[which(curdata$Adduct%in%filter.by)]
                    curdata<-curdata[which(curdata$Module_RTclust%in%good_mod),]
                    curdata<-as.data.frame(curdata)
                    t1<-table(curdata$Module_RTclust)
                    t2<-t1[which(t1>0)]
                    t2<-t2[which(t2==max(t2)[1])]
                    n1<-names(t2)
                    curdata<-curdata[which(curdata$Module_RTclust==n1[1]),]
                    delta_rt<-max(curdata$time)-min(curdata$time)
                    delta_rt<-round(delta_rt)
                    if(length(which(t1>1))<1){
                        if(curdata$score[1]>0 && nrow(curdata)>1 && length(unique(curdata$Adduct))>1 && delta_rt<max_diff_rt && length(which(curdata$Adduct%in%filter.by))>0){
                            chemscoremat_conf_levels<-"High"
                        }else{
                            if(curdata$score[1]>0 && length(which(curdata$Adduct%in%filter.by))>0){
                                chemscoremat_conf_levels<-"Medium"
                            }else{
                                if(curdata$score[1]>0){
                                    chemscoremat_conf_levels<-"Low"
                                    
                                }else{
                                    chemscoremat_conf_levels<-"None"
                                }
                            }
                        }
                    }else{
                        
                        t2<-t1[which(t1>0)]
                        t2<-t2[which(t2==max(t2)[1])]
                        n1<-names(t2)
                        curdata<-curdata[which(curdata$Module_RTclust==n1[1]),]
                        delta_rt<-max(curdata$time)-min(curdata$time)
                        delta_rt<-round(delta_rt)
                        if(curdata$score[1]>0 && nrow(curdata)>1 && length(unique(curdata$Adduct))>1 && delta_rt<max_diff_rt && length(which(curdata$Adduct%in%filter.by))>0){
                            chemscoremat_conf_levels<-"High"
                        }else{
                            if(curdata$score[1]>0 && length(which(curdata$Adduct%in%filter.by))>0 && delta_rt<max_diff_rt){
                                chemscoremat_conf_levels<-"Medium"
                            }else{
                                if(curdata$score[1]>0){
                                    chemscoremat_conf_levels<-"Low"
                                    
                                }else{
                                    chemscoremat_conf_levels<-"None"
                                }
                            }
                        }
                        
                        
                    }
                    
                }
            }
            
            
            
            
        }
        else{
            
            #print(length(which(adduct_weights[,1]%in%cur_adducts)))
            #print(curdata$score[1])
            
            #print("here")
            
            chemscoremat_conf_levels<-"None"
            
        }
        module_clust<-paste(curdata$module_num,curdata$Module_RTclust,sep="")
        curdata$Module_RTclust<-module_clust
        
        
        
    }
    curdata<-as.data.frame(curdata)
    
    #print("here 31")
    #print(curdata)
    temp1<-curdata
    temp1$Module_RTclust<-gsub(temp1$Module_RTclust,pattern="(_[0-9]*)",replacement="")
    
    table_modules<-table(temp1$Module_RTclust)
    
    rm(temp1)
    module_names<-names(table_modules[which(table_modules>0)])
    #print("new mnames")
    #print(module_names)
    
    
    #print("OVER HERE")
    #else
    {
        #change in 1.5.3
        if(length(module_names)>1 && curdata$score<10){
            
            chemscoremat_conf_levels<-"None"
        }else{
            
            if(length(module_names)>1 && curdata$score>10 && length(which(cur_adducts%in%filter.by))>0){
                
                chemscoremat_conf_levels<-"Medium"
            }
        }
        #else
        {
            if(nrow(curdata)<2)
            {
                #chemscoremat_conf_levels<-"Low"
                if(length(which(cur_adducts%in%adduct_weights[,1]))<1){
                    chemscoremat_conf_levels<-"Low"
                }else{
                    #matches an M+H and score is greater than 10
                    if(curdata$score>10 && length(which(cur_adducts%in%filter.by))>0){
                        chemscoremat_conf_levels<-"Medium"
                        
                    }else{
                        
                        chemscoremat_conf_levels<-"None"
                        
                        
                    }
                }
            }else{
                min_nummol<-min(curdata$num_molecules)
                min_nummol_ind<-which(curdata$num_molecules==min_nummol)
                #num molecules check
                if(min_nummol>1){
                    chemscoremat_conf_levels<-"Low"
                }
                
                else{
                    #Multiple molecules abundance check
                    check1<-gregexpr(text=cur_adducts,pattern="([2-3]+M)")
                    
                    if(length(check1)>0)
                    {
                        #adduct_charges<-adduct_table[which(adduct_table$Adduct%in%cur_adducts),2]
                        min_mol<-min(curdata$num_molecules)
                        min_mol_ind<-which(curdata$num_molecules==min_mol)
                        
                        max_int_min_mol<-max(curdata$mean_int_vec[min_mol_ind])
                        
                        #max_int_min_charge<-max(curdata$mean_int_vec[min_mol_ind])
                        
                        bad_ind_status<-rep(0,length(check1))
                        
                        min_mol<-min(adduct_table[which(adduct_table$Adduct%in%cur_adducts),2])
                        
                        #print("Max int")
                        #print(min_mol)
                        #print(max_int_min_mol)
                        
                        if(min_mol<2)
                        {
                            #check pattern of each adduct
                            for(a1 in 1:length(check1))
                            {
                                
                                strlength<-attr(check1[[a1]],"match.length")
                                #print(strlength)
                                
                                if(strlength[1]>(-1))
                                {
                                    
                                    #if(min_charge<2)
                                    {
                                        abund_ratio<-curdata$mean_int_vec[a1]/max_int_min_mol
                                        
                                        #print("abund ratio is")
                                        #print(abund_ratio)
                                        
                                        if(is.na(abund_ratio)==FALSE){
                                            if(abund_ratio>1){
                                                
                                                bad_ind_status[a1]<-1
                                                if(chemscoremat_conf_levels=="High"){
                                                    chemscoremat_conf_levels<-"Medium"
                                                }else{
                                                    chemscoremat_conf_levels<-"Low"
                                                }
                                            }
                                            
                                        }else{
                                            chemscoremat_conf_levels<-"Low"
                                            bad_ind_status[a1]<-1
                                        }
                                        
                                    }
                                }
                                
                            }
                            
                            
                            
                            if(length(which(bad_ind_status==1))>0){
                                
                                #print(curdata)
                                #print(bad_ind_status)
                                bad_adducts<-cur_adducts[which(bad_ind_status==1)]
                                
                                #curdata<-curdata[-which(bad_ind_status==1),]
                                #curdata<-curdata[-which(cur_adducts%in%bad_adducts),]
                                
                                #print(curdata)
                            }
                            
                            if(length(nrow(curdata)>0)){
                                cur_adducts_with_isotopes<-curdata$Adduct
                                
                                #cur_adducts<-gsub(cur_adducts_with_isotopes,pattern="(_\\[\\+[0-9]\\])",replacement="")
                                #				cur_adducts<-gsub(cur_adducts_with_isotopes,pattern="(_\\[\\+[0-9]*\\])",replacement="")
                                
                                cur_adducts<-gsub(cur_adducts_with_isotopes,pattern="(_\\[(\\+|\\-)[0-9]*\\])",replacement="")
                                #cur_adducts<-gsub(cur_adducts,pattern="(_\\[\\-[0-9]*\\])",replacement="")
                                
                                
                                
                            }else{
                                
                                
                                
                                #print("here1")
                                chemscoremat_conf_levels<-"None"
                                
                                
                            }
                        }else{
                            #print("here1")
                            chemscoremat_conf_levels<-"Low"
                        }
                        
                        
                        
                        
                        #Multiply charged ions
                        #if(FALSE)
                        {
                            
                            adduct_charges<-curdata$charge # adduct_table[which(adduct_table$Adduct%in%cur_adducts),3]
                            
                            min_charge<-min(curdata$charge)
                            
                            min_charge_ind<-which(curdata$charge==min_charge)
                            
                            high_charge_ind<-which(curdata$charge>1)
                            max_int_min_charge<-max(curdata$mean_int_vec) #[min_charge_ind])
                            
                            if(length(high_charge_ind)>0){
                                abund_ratio_min_maxcharge<-max(curdata$mean_int_vec[min_charge_ind])[1]/max(curdata$mean_int_vec[high_charge_ind])[1]
                                
                                if((abund_ratio_min_maxcharge<1))
                                {
                                    
                                    
                                    chemscoremat_conf_levels<-"Low"
                                }
                                
                                
                            }
                            
                            bad_ind_status<-rep(1,length(check1))
                            
                            
                            
                            if((min_charge>1))
                            {
                                
                                
                                chemscoremat_conf_levels<-"Low"
                            }
                            
                        }
                        
                        #isotope based check for +1 and +2 to make sure that the abundant form is present
                        check2<-gregexpr(text=cur_adducts_with_isotopes,pattern="(_\\[(\\+|\\-)[0-9]*\\])")
                        
                        if(length(check2)>0){
                            
                            for(a1 in 1:length(check2)){
                                strlength<-attr(check2[[a1]],"match.length")
                                
                                if(strlength[1]>(-1)){
                                    
                                    count_abundant_form<-length(which(cur_adducts%in%cur_adducts[a1]))
                                    
                                    
                                    if(count_abundant_form<2)
                                    {
                                        #print("here")
                                        
                                        #chemscoremat_conf_levels<-"None"
                                        
                                        curdata<-curdata[-c(a1),]
                                        
                                    }
                                    
                                    
                                    
                                    
                                }
                            }
                            
                            
                        }
                        
                        
                        
                        cur_adducts_with_isotopes<-curdata$Adduct
                        
                        #cur_adducts<-gsub(cur_adducts_with_isotopes,pattern="(_\\[\\+[0-9]\\])",replacement="")
                        #                               cur_adducts<-gsub(cur_adducts_with_isotopes,pattern="(_\\[\\+[0-9]*\\])",replacement="")
                        
                        cur_adducts<-gsub(cur_adducts_with_isotopes,pattern="(_\\[(\\+|\\-)[0-9]*\\])",replacement="")
                        formula_vec<-curdata$Formula
                        curformula<-as.character(formula_vec[1])
                        #    check2<-gregexpr(text=curformula,pattern="(_\\[\\+[3]\\])") #M+H+2
                        #print("HERE")
                        #print("here")
                        #print(chemscoremat_conf_levels)
                        #print(curdata)
                        
                        check2<-gregexpr(text=cur_adducts_with_isotopes,pattern="(_\\[(\\+|\\-)[0-9]*\\])") #M+H_2
                        curformula<-gsub(curformula,pattern="Sr",replacement="")
                        curformula<-gsub(curformula,pattern="Sn",replacement="")
                        curformula<-gsub(curformula,pattern="Se",replacement="")
                        curformula<-gsub(curformula,pattern="Sc",replacement="")
                        curformula<-gsub(curformula,pattern="Sm",replacement="")
                        
                        
                        curformula<-gsub(curformula,pattern="(_\\[(\\+|\\-)[2]*\\])",replacement="")
                        
                        if(length(check2)>0){
                            
                            for(a1 in 1:length(check2)){
                                strlength<-attr(check2[[a1]],"match.length")
                                
                                if(strlength[1]>(-1))
                                {
                                    
                                    
                                    
                                    numchlorine<-check_element(curformula,"Cl")
                                    numsulphur<-check_element(curformula,"S")
                                    numbromine<-check_element(curformula,"Br")
                                    
                                    max_isp_count<-max(numchlorine,numsulphur,numbromine)
                                    
                                    if(is.na(max_isp_count)==TRUE | max_isp_count==0){
                                        #chemscoremat_conf_levels<-"None"
                                        curdata<-curdata[-c(a1),]
                                        
                                    }else{
                                        if(max_isp_count<1){
                                            #chemscoremat_conf_levels<-"None"
                                            curdata<-curdata[-c(a1),]                                
                                            
                                        }
                                    }
                                    
                                    
                                    
                                }
                            }
                            
                            
                        }
                        
                        
                        
                        
                    }
                    
                    
                    
                    
                }
                
                
                
            }
            
            
        }
        
    }
    formula_vec<-curdata$Formula
    curformula<-as.character(formula_vec[1])
    curformula<-gsub(curformula,pattern="Ca",replacement="")
    curformula<-gsub(curformula,pattern="Cl",replacement="")
    curformula<-gsub(curformula,pattern="Cd",replacement="")
    curformula<-gsub(curformula,pattern="Cr",replacement="")
				
                numcarbons<-check_element(curformula,"C")
                if(numcarbons<1){
                    
                    chemscoremat_conf_levels<-"None"
                }
                
                if(length(unique(curdata$Adduct))<2 && nrow(curdata)>1){
                    
                    if(nrow(curdata)==2){
                        chemscoremat_conf_levels<-"Medium"
                    }
                    #return(chemscoremat_conf_levels)
                    
                    if(length(which(cur_adducts%in%filter.by))<1)
                    {
                        
                        chemscoremat_conf_levels<-"None"
                        return(chemscoremat_conf_levels)
                    }
                }
                
                
                
                if(nrow(curdata)<1){	
                    score_level<-0
                    #curdata<-c(-1)
                    curdata<-curdata_all
                }else{
                    
                    #3: High
                    #2: medium
                    #1: Low
                    #0: None
                    
                    score_level<-as.character(chemscoremat_conf_levels)
                    
                    score_level<-gsub(score_level,pattern="High",replacement="3")
                    score_level<-gsub(score_level,pattern="Medium",replacement="2")
                    score_level<-gsub(score_level,pattern="Low",replacement="1")
                    score_level<-gsub(score_level,pattern="None",replacement="0")
                    
                    
                    
                }
                
                if(is.na(score_level[1])==TRUE){
                    
                    stop(curdata)
                }
                
                
                score_level<-as.numeric(as.character(score_level))
                final_res<-cbind(score_level,curdata)
                
                #print("curdata")
                #print(curdata)
                #print("score")
                #print(score_level)
                
                
                final_res<-as.data.frame(final_res)
                # final_res$score_level<-(as.numeric(as.factor(final_res$score_level)))
                
                #print(final_res)
                #return(final_res)
                
                if(length(which(is.na(final_res$score_level)==TRUE))>0){
                    final_res$score_level<-replace(final_res$score_level,which(is.na(final_res$score_level)==TRUE),0)
                }
                #final_res$score_level<-replace(final_res$score_level,which(final_res$score_level<1),0)
                
                
                num_uniq_adducts<-length(unique(curdata$Adduct))
                
                uniq_adducts<-unique(curdata$Adduct)
                
                #num_good_adducts<-length(which(uniq_adducts%in%adduct_weights[,1]))
                
                num_good_adducts<-length(which(uniq_adducts%in%filter.by))
                
                #final_res$score<-(as.numeric(as.factor(final_res$score_level))^num_uniq_adducts)+0.1
                
                #print(final_res$score)
                #print(num_uniq_adducts)
                #print(delta_rt)
                #print(length(module_names))
                
                if(num_good_adducts>0){
                    final_res$score<-final_res$score*num_good_adducts
                }else{
                    
                    
                    final_res$score<-final_res$score*(0)
                    
                }
                
                #final_res$score<-final_res$score*1*(0.9)*(1/((delta_rt*0.1)+1)^2)*num_uniq_adducts
                
                # final_res$score<-final_res$score*(1/length(module_names))
                #final_res$score<-(final_res$score)*(100)
                
                #print("delta")
                #print(delta_rt)
                #  if(final_res$chemical_ID=="C00073")
                # {
                
                #print(final_res)
                #stop("Pyridoxine")
                #stop("Methionine")
                #}
                
                
                #final_res$score_level<-as.numeric(as.character(final_res$score_level))
                
                if(nrow(final_res)<min_ions_perchem){
                    
                    final_res$score_level<-0 #max(1,final_res$score_level)
                }
                
                
                #print(final_res)
                final_res<-as.data.frame(final_res)
                return(final_res)
                
}
jaspershen/MSannotator documentation built on May 18, 2019, 5:55 p.m.