R/MAIN_biopax_comparison.R

######################################## MAIN_biopax_comparison ########################################
MAIN_biopax_comparison<-
    function(pwid_to_compare=c("all"
                               ,"none")
             ,pwid_to_plot=c("none"
                             ,"same"
                             ,"test5")
             ,new_biopax
             ,orig_biopax
             ,new_biopax_tag="_new"
             ,orig_biopax_tag="_orig"
             ,verbose = TRUE
             ,pw_df_path="default"
             ,source_name=NULL
             ,groupnum=NULL
             ,ngroups=NULL
             
             
    ){
        #' @title
        #' MAIN -- Compare BioPAX Objects
        #' @description 
        #' Compare pathways in two BioPAX objects. 
        #' @details 
        #' Allows to compare all control components between two biopax objects.
        #' @param pwid_to_compare Which pathway IDs to use for the comparison of control components? Takes either a vector of pathway IDs, 
        #' or \code{all} to process all pathways, or \code{none} to not use this comparison at all.
        #' @param pwid_to_plot Which pathways to plot for subsequent visual inspection? Takes either a vector of pathway IDs, 
        #' or \code{same} to process the same pathways as for control component comparison, 
        #' or \code{test5} to plot 5 random pathways (can change to any number),
        #' or \code{none} to not use this comparison at all.
        #' @param new_biopax First BioPAX object.
        #' @param orig_biopax Second BioPAX object.
        #' @param new_biopax_tag First BioPAX Object tag.
        #' @param orig_biopax_tag BioPAX object.
        #' @param verbose Logical. Show all pertaining progress?
        #' @param pw_df_path Path to the source table with pathway, id, name, and source values.
        #' @param source_name BioPAX public source name.
        #' @param groupnum Split pathway ids into \code{ngroups} groups. 
        #' Only process group #\code{groupnum} out of a total of \code{ngroups}.
        #' @param ngroups See \code{groupnum}.
        
        #' @author 
        #' Ivan Grishagin
        
        #establish initial key parameters, if not supplied by user
        if(pw_df_path=="default"){
            pw_df_path<-
                system.file("extdata"
                            ,"pathways_matched_to_sources_current_version.xlsx"
                            ,package="RIGbiopax")
        }
        
        #IMPORTANT! Clean original biopax values (the new one should be clean)
        #otherwise there will be some inconsistencies
        orig_biopax<-
            orig_biopax %>% 
            clean_biopax
        
        st<-Sys.time()
        
        if(pwid_to_compare=="none"){
            pwid_to_compare<-
                NULL
        } else if(pwid_to_compare=="all"){
            pwid_to_compare<-
                read_excel_astext(path = pw_df_path
                                  #,col_types = rep("text",11)
                                  ,sheet = 1) %>%
                filter(!is.na(biopax.Pathway.Name)
                       ,Source==source_name) %>%
                .$biopax.Pathway.ID
            
        }
        #take only a chunk of pathways based on quantile split provided
        if (!is.null(groupnum)
            & !is.null(ngroups)){
            #get split intervals based on desired number of intervals 
            #and vector length
            quant_splits<-
                quantile(1:length(pwid_to_compare)
                         ,probs=seq(0,1
                                    ,1/ngroups)
                         ,type=1)
            #define interval span
            if(groupnum==1){
                start<-1
            } else {
                start<-
                    quant_splits[groupnum]+1
            }
            end<-
                quant_splits[groupnum+1]
            #cut out desired interval span
            pwid_to_compare<-
                pwid_to_compare[start:end]
            message("Comparing pathways #"
                    ,start
                    ," to #"
                    ,end
                    ," from "
                    ,source_name
                    ," biopax.")
        }
        
        if(pwid_to_plot=="none"){
            pwid_to_plot<-
                NULL
        } else if(pwid_to_plot=="same"){
            pwid_to_plot<-
                pwid_to_compare
        } else if(length(grep("test"
                              ,pwid_to_plot))>0){
            pwnum<-
                gsub("test"
                     ,""
                     ,pwid_to_plot) %>%
                as.integer
            if(is.na(pwnum)){
                stop("MAIN_biopax_comparison: wrong number of test pathways!")
            }
            set.seed(123)
            pwid_to_plot<-
                load_biopax_pathways(new_biopax)$biopax.Pathway.ID %>%
                sample(pwnum)
            
            message("Plotting "
                    ,pwnum
                    ," random pathways.")
        }
        
        #plot biopax graphs
        if(!is.null(pwid_to_plot)){
                check_biopax(biopax=new_biopax
                                  ,pw_to_check=pwid_to_plot
                                  ,what="id"
                                  ,output_identifier=new_biopax_tag
                                  ,verbose = verbose)
                
                check_biopax(biopax=orig_biopax
                                  ,pw_to_check=pwid_to_plot
                                  ,what="id"
                                  ,output_identifier=orig_biopax_tag
                                  ,verbose = verbose)
        }
       
        #extract controllers and controlleds
        #and compare them
        if(is.null(pwid_to_compare)){
            return()
        }
        message("Comparing control components of "
                ,length(pwid_to_compare)
                ," pathways..."
                )
        status_vect<-
            pwid_to_compare %>%
            sapply(new_biopax=
                       new_biopax
                   ,orig_biopax=
                       orig_biopax
                   ,function(pwid
                             ,new_biopax
                             ,orig_biopax){
                       new_bp_contr<-
                           pathway2RegulatoryGraph_Rancho(biopax=new_biopax
                                                          ,pwid=pwid
                                                          ,returnGraph=FALSE
                           )
                       
                       orig_bp_contr<-
                           pathway2RegulatoryGraph_Rancho(biopax=orig_biopax
                                                          ,pwid=pwid
                                                          ,returnGraph=FALSE
                           )
                #if both don't have components (likely, something went wrong)
                #return "nocomponents"
                if(is.null(new_bp_contr) &
                   is.null(orig_bp_contr)){
                    return("no_control_components")
                }
                new_in_orig<-
                    round(100*sum(new_bp_contr %in% orig_bp_contr)/length(new_bp_contr) 
                          ,digits = 0) 
                    
                orig_in_new<-
                    round(100*sum(orig_bp_contr %in% new_bp_contr)/length(orig_bp_contr) 
                          ,digits = 0) 
                   
                curr_status<-
                    paste(new_in_orig
                          ,length(new_bp_contr)
                          ,orig_in_new
                          ,length(orig_bp_contr)
                          #,orig_bp_contr[!orig_bp_contr %in% new_bp_contr]
                          #,new_bp_contr[!new_bp_contr %in% orig_bp_contr]
                          ,sep=" | ")
                
                return(curr_status)
            })
        
        #make and record the control component comparison status dataframe
        if(!is.null(status_vect)){
            
            status_df<-
                status_vect %>% 
                strsplit(split="\\|") %>% 
                do.call(rbind.data.frame
                        ,.) %>% 
                cbind.data.frame(pwid_to_compare
                                 ,.)
            #if there are no components, there will be only 2 columns
            if(ncol(status_df)==2){
                #append 3 columns
                colnames(status_df)<-
                    c("col1"
                      ,"col2")
                status_df<-
                    status_df %>% 
                    mutate(col3=col2
                           ,col4=col2
                           ,col5=col2)
            } else if(ncol(status_df)!=5){
                warning("MAIN_biopax_comparison: number of columns in the final output is neither 2 nor 5! Weird!")
            }
            try(colnames(status_df)<-
                    c("pwid"
                      ,"new-in-orig, %"
                      ,"N_new_control_components"
                      ,"orig-in-new, %"
                      ,"N_orig_control_components"
                      #,"orig_not_in_new"
                      #,"new_not_in_orig"
                    ))
            
            write.table(status_df
                        ,file = paste(Sys.Date()
                                      ,source_name
                                      ,groupnum
                                      ,"biopax_comparison_report.txt"
                                      ,sep="_")
                        ,sep = "\t"
                        ,quote = FALSE
                        ,row.names = FALSE
                        ,col.names = TRUE)
        }
        et<-Sys.time()
        
        msg<-
            paste0(source_name
                   ," biopax comparison took "
                   ,round(difftime(et
                                   ,st
                                   ,units = "mins")
                          ,1)
                   ," minutes.")
        message(msg)
        write(msg
              ,"time_log.txt"
              ,sep = "\n"
              ,append=TRUE)
    }
######################################## MAIN_biopax_comparison ########################################
grishagin/RIGconvertbiopax documentation built on May 5, 2019, 9:18 a.m.