R/multilevelannotationstep5.R

Defines functions multilevelannotationstep5

multilevelannotationstep5 <-
function(outloc,max_diff_rt,adduct_weights=NA,filter.by=NA,min_ions_perchem=1,boostIDs=NA,max_isp=5,dbAllinf=NA){

    setwd(outloc)

    outloc2<-paste(outloc,"/stage3/",sep="")

    setwd(outloc)


    browser()

    outloc2<-outloc
    chemscoremat_highconf<-read.delim("Stage3.txt",sep="\t",header=TRUE) #read.table("Stage3.txt",sep="\t",header=TRUE)

    chemscoremat_highconf<-as.data.frame(chemscoremat_highconf)
    chemscoremat_highconf$mz<-as.numeric(chemscoremat_highconf$mz)

    scorethresh<-0.1

    chemscoremat<-chemscoremat_highconf #[which(chemscoremat_highconf$score>=scorethresh),]

    t1<-table(chemscoremat$mz,chemscoremat$chemical_ID)

    s1<-apply(t1,1,sum)

    mzunique<-colnames(which(s1==1))

    dunique<-chemscoremat[which(chemscoremat$mz%in%mzunique),]

    s2<-s1[which(s1>1)]
    mzdup<-names(s2)
    mzdup<-as.data.frame(mzdup)

    if(length(mzdup)>0){
    mzdup<-mzdup[,1]

    bad_ind<-{}
    for(mind in 1:length(mzdup)){

        mznum<-mzdup[mind]

        dmultsub<-chemscoremat[which(chemscoremat$mz%in%mznum),]

	pref_adduct<-adduct_weights[which(adduct_weights$Weight==max(adduct_weights$Weight)),1]
	dgood_add<-which(dmultsub$Adduct%in%pref_adduct) #which(dmultsub$Adduct=="M+H")


	#if(length(dgood_add)>0){
	#dmultsub$score[dgood_add]<-(dmultsub$score[dgood_add])*100
	#}
        com_ind<-which(chemscoremat$mz%in%mznum)

        good_ind<-which(dmultsub$score==max(dmultsub$score))

        for(com_indval in 1:length(com_ind)){

            scoreval<-{}
            if(com_indval%in%good_ind==FALSE){
                #print(com_indval)
                dmat_com<-chemscoremat[which(chemscoremat$chemical_ID%in%dmultsub$chemical_ID[com_indval]),]

                scoreval<-((dim(dmat_com)[1])-1)*dmat_com$score[1]/(dim(dmat_com)[1])

                scorevec<-c(rep(scoreval,length(which(chemscoremat$chemical_ID%in%dmultsub$chemical_ID[com_indval]))))

                if(length(scorevec)<length(which(chemscoremat$chemical_ID%in%dmultsub$chemical_ID[com_indval]))){
                    break;
                }

                chemscoremat$score[which(chemscoremat$chemical_ID%in%dmultsub$chemical_ID[com_indval])]<-scorevec

            }

        }
        com_ind<-com_ind[-good_ind]

        bad_ind<-c(bad_ind,com_ind)

    }

    }else{
	bad_ind<-{}
    }
    if(length(bad_ind)>0){
        chemscoremat_unique<-chemscoremat[-c(bad_ind),]
    }else{
        chemscoremat_unique<-chemscoremat

	print("No redundancies. Exiting step5.")
     	return(0)
	}

    rm(chemscoremat)
    good_ind<-which(chemscoremat_unique$score>=scorethresh)

    chemscoremat_unique_highconf<-{}
    if(length(good_ind)>0){
        chemscoremat_unique_highconf<-chemscoremat_unique[good_ind,]
    }else{


		print("No matches meet the minimum score threshold. Exiting step 5.")
		return(0)
	}


    t1<-table(chemscoremat_unique_highconf$mz,chemscoremat_unique_highconf$chemical_ID)
    t2<-apply(t1,1,sum)
    t2<-apply(t1,1,sum)

    multi_mz<-names(t2[which(t2>1)])


	if(length(multi_mz)>0){
    chemscoremat_unique_highconf$MatchCategory<-gsub(as.character(chemscoremat_unique_highconf$MatchCategory),pattern="Multiple",replacement="Unique")
    chemscoremat_unique_highconf$MatchCategory[which(chemscoremat_unique_highconf$mz%in%multi_mz)]<-"Multiple"
    }
    chemscoremat_highconf<-chemscoremat_unique_highconf

    chemids<-chemscoremat_highconf$chemical_ID

    chemids<-unique(chemids)

    chemscoremat_conf_levels<-rep("High",length(chemids))



    data(adduct_table)
    if(is.na(adduct_weights)==TRUE){
        data(adduct_weights)
        adduct_weights1<-matrix(nrow=2,ncol=2,0)
        adduct_weights1[1,]<-c("M+H",1)
        adduct_weights1[2,]<-c("M-H",1)
        adduct_weights<-as.data.frame(adduct_weights1)
        colnames(adduct_weights)<-c("Adduct","Weight")
    }
    adduct_table<-adduct_table[order(adduct_table$Adduct),]
    chemscoremat_conf_levels1<-lapply(1:length(chemids),function(c)
    {

        cur_chemid<-chemids[c]

        curdata<-chemscoremat_highconf[which(chemscoremat_highconf$chemical_ID==cur_chemid),]

	bool_check=1;


        curdata<-curdata[order(curdata$Adduct),]

        if((is.na(filter.by)==FALSE) && (bool_check==1)){

            check_adduct<-which(curdata$Adduct%in%filter.by)
            if(length(check_adduct)>0){
                bool_check=1;
            }else{
                    bool_check=0;
            }
        }

        if(bool_check==1){


	    final_res<-get_confidence_stage4(curdata,max_diff_rt,adduct_weights=adduct_weights,filter.by=filter.by,max_isp=max_isp,min_ions_perchem=min_ions_perchem)

		Confidence<-0
		#print(final_res)
        	if(final_res!="None"){
            if(is.na(final_res[1,1])==FALSE){


                Confidence<-as.numeric(as.character(final_res[,1]))

                curdata<-final_res #[,-c(1)]

                #print(final_res)
                if(Confidence<2){

                    if(length(which(curdata$Adduct%in%adduct_weights[which(as.numeric(adduct_weights[,2])>0),1]))>0){

                        if(curdata$score>10){

					mnum<-max(as.numeric(as.character(adduct_weights[which(adduct_weights[,1]%in%curdata$Adduct),2])))[1]
				   curdata<-curdata[which(curdata$Adduct%in%adduct_weights[which(as.numeric(as.character(adduct_weights[,2]))>=mnum),1]),]
				    Confidence<-2
                        }
                    }
                }

            }
		}
        }else{
            Confidence<-0
			if(length(which(curdata$Adduct%in%adduct_weights[,1]))>0){

                                if(curdata$score>=10){

                                        #curdata<-curdata[which(curdata$Adduct%in%adduct_weights[,1]),]
                                        mnum<-max(as.numeric(as.character(adduct_weights[which(adduct_weights[,1]%in%curdata$Adduct),2])))[1]
                                   #curdata<-curdata[which(curdata$Adduct%in%adduct_weights[which(as.numeric(as.character(adduct_weights[,2]))>=mnum),1]),]
                                    	if(length(which(curdata$Adduct%in%filter.by))>0){
						curdata<-curdata[which(curdata$Adduct%in%filter.by),]
						Confidence<-2
					}

                                }
                        }

        }

	if(nrow(curdata)>1){
	 if(curdata$score<10){
        if(length(unique(curdata$Adduct))<2){

                Confidence<-1
        }else{
		if(FALSE){
		 if(Confidence<2){

                    if(length(which(curdata$Adduct%in%adduct_weights[which(adduct_weights[,2]>1),1]))>0){

                        if(curdata$score>10){
                            #Confidence<-2

				mnum<-max(as.numeric(as.character(adduct_weights[which(adduct_weights[,1]%in%curdata$Adduct),2])))[1]
                                   curdata<-curdata[which(curdata$Adduct%in%adduct_weights[which(as.numeric(as.character(adduct_weights[,2]))>=mnum),1]),]
                                    Confidence<-2

			}
                    }
                }
		}
	}
        }
	}
         if(cur_chemid%in%boostIDs){

                Confidence<-4
        }

        curdata<-cbind(Confidence,curdata)
        curdata<-as.data.frame(curdata)


        return(curdata)
    })

    #stopCluster(cl)

	outloc2<-paste(outloc,"/stage5/",sep="")
    	dir.create(outloc2)
	setwd(outloc2)

#save(list=c("chemscoremat_conf_levels1","chemids"),file="stage5conf_levels.Rda")


    chemscoremat_conf_levels<-ldply(chemscoremat_conf_levels1,rbind) #unlist(chemscoremat_conf_levels)

    chemscoremat_highconf<-as.data.frame(chemscoremat_highconf)


    chemscoremat_conf_levels<-as.data.frame(chemscoremat_conf_levels)


    #chemconf_levels<-cbind(chemscoremat_conf_levels,chemids)
    #chemconf_levels<-as.data.frame(chemconf_levels)
    #colnames(chemconf_levels)<-c("Confidence","chemids")


   # write.table(chemconf_levels,file="confidence_levels_chemicals.txt",sep="\t",row.names=FALSE)

    curated_res<-chemscoremat_conf_levels # merge(chemconf_levels,chemscoremat_highconf,by.x="chemids",by.y="chemical_ID")
    cnames<-colnames(curated_res)
    cnames[1]<-"Confidence"
    colnames(curated_res)<-as.character(cnames)

	curated_res_isp_check<-gregexpr(text=curated_res$Adduct,pattern="(_\\[(\\+|\\-)[0-9]*\\])")

isp_ind<-which(curated_res_isp_check>0)

if(length(isp_ind)>0){

        formula_vec<-curated_res$Formula  #aregexpr(text=curated_res$Formula,pattern="(_\\[(\\+|\\-)[0-9]*\\])")
        formula_vec<-gsub(x=formula_vec,pattern="[a-z|+|-|0-9]*_",replacement="",ignore.case=T)
        #print(formula_vec)
        #print(formula_vec[isp_ind])
        temp_adducts<-as.character(paste("M","_",formula_vec[isp_ind],sep=""))
        curated_res$Adduct<-as.character(curated_res$Adduct)
        #print(curated_res$Adduct)
        #print(temp_adducts)
        #print(curated_res$Adduct[isp_ind])
        curated_res$Adduct[isp_ind]<-as.character(temp_adducts)
        #print(curated_res$Adduct[isp_ind])
}



   # write.table(curated_res,file="confidence_levels_chemicals.txt",sep="\t",row.names=FALSE)
    cnames<-colnames(curated_res)

    #curated_res<-cbind(curated_res[,3],curated_res[,2],curated_res[,-c(2:3)])


    # cnames[1]<-"chemical_ID"
    #cnames[2]<-"Score_category"
    # colnames(curated_res)<-as.character(cnames)

    # curated_res<-curated_res[order(curated_res[,2],curated_res[,1],curated_res[,3],decreasing=TRUE),]

   # print(dim(curated_res))

    curated_res<-curated_res[order(curated_res$Confidence,curated_res$chemical_ID,curated_res$score,curated_res$Adduct,decreasing=TRUE),]




    outloc3<-paste(outloc,"/stage5/",sep="")

     outloc3<-outloc
    dir.create(outloc3)
    setwd(outloc3)
    #print(curated_res[1:2,])
    curated_res<-curated_res[,-c(15)]

if(FALSE)
{
    d2<-read.table("Stage2.txt",sep="\t",header=TRUE)
    d3pres<-d2[-which(d2$mz%in%curated_res$mz),]
    d3pres<-d3pres[-which(d3pres$chemical_ID%in%curated_res$chemical_ID),]
    d3pres<-d3pres[which(d3pres$Adduct%in%adduct_weights[,1]),]

    conf_vec<-rep(1,length(d3pres[,1]))
    score_vec<-rep(1,length(d3pres[,1]))

   d6pres<-cbind(conf_vec,d3pres[,5],score_vec,d3pres[,11],d3pres[,1:4],d3pres[,6:10],d3pres[,13:14])

    print(curated_res[1:2,1:15])
    print(d6pres[1:2,1:15])
	print(dim(d6pres))
	print(dim(curated_res))

   curated_res<-curated_res[,c(1:15)]

   colnames(d6pres)<-colnames(curated_res)
   curated_res<-rbind(curated_res,d6pres)
}
   curated_res<-as.data.frame(curated_res)


 # curated_res<-curated_res[order(curated_res$Confidence,curated_res$chemical_ID,curated_res$score,curated_res$mean_int_vec,decreasing=TRUE),]

#curated_res<-curated_res[order(curated_res$Confidence,curated_res$score,curated_res$mean_int_vec,decreasing=TRUE),]

  # curated_res<-curated_res[order(curated_res$Confidence,curated_res$chemical_ID,decreasing=TRUE),]
#curated_res<-curated_res[order(curated_res$Confidence,curated_res$score,decreasing=TRUE),]

    t2<-table(curated_res$mz)

    same1<-which(t2==1)

    uniquemz<-names(same1)

    curated_res$MatchCategory=rep("Multiple",dim(curated_res)[1])

    curated_res$MatchCategory[which(curated_res$mz%in%uniquemz)]<-"Unique"

    #curated_res<-curated_res[,-c(3,12:14)]



    write.table(curated_res,file="Stage5.txt",sep="\t",row.names=FALSE)


   curated_res$mz<-sprintf("%.5f",as.numeric(curated_res$mz))


    fname=paste("Stage5_annotation_results",sep="")
    unlink(fname)

    HTMLInitFile(filename=fname,Title="Stage 5 annotation results", outdir=outloc)
    fname=paste(outloc,"/Stage5_annotation_results.html",sep="")
    HTML(curated_res,file=fname,Border=1,innerBorder=1,useCSS=TRUE)
    HTMLEndFile(file=fname)

curated_res<-as.data.frame(curated_res)

    return(curated_res)

}
jaspershen/MSannotator documentation built on May 18, 2019, 5:55 p.m.