R/geNet_algorithm.R

Defines functions export_geNet_result geNet

Documented in export_geNet_result geNet

#############################################################################
################################# main geNet function #######################
#############################################################################
#' Execution geNet algorithm
#' 
#' function to execute the geNet algorithm on a binary dataframe of genes occurrences (presence/absence data)
#' @param input_binary_df input binary dataframe containing genes occurrences. Mandatory argument.
#' input_binary_df is an object of class "dataframe". The rownames are the strains names, the columns names are unique genes/nodes IDs.
#' @param clust_method clustering method. Default to "infomap".
#' @param type_weight possible values:
#' * logpvalue: the weights of the edges will be the negative log adjusted p-values
#' * coeff: the weights of the edges will be the correlation coefficients values
#' Note: This is true only for the positive edges. The negative edges will have 0 weights in both cases. 
#' The negative edges doesn't influence the topology of the network or the clustering.
#' @param cores number of cores to use for parallel processing
#' @param test_pvalue significance test to use.
#' * cor_test: use Pearson correlation test approach (faster)
#' * chisquare: use chi-square test approach (slower)
#' @param pval_thr_pos threhold p-value positive edges. Default to 0.01.
#' @param pval_thr_neg threshold p-value negative edges. Default to 0.1.
#' @param out_dir if export_results=T, select output directory
#' @param export_results exporting geNet output in csv files? 

#' @return list of two objects of class "ffdf".  
#' * nodes: reports info about the nodes and clustering
#' * edges: reports info about the connections between the nodes
#' This format is designed to be used with the visNetwork package.
#' @export
#' @examples 
#' \dontrun{geNet(input_binary_df,clust_method="infomap",type_weight="coeff",cores=4)}
#' @importFrom dplyr %>%
geNet<-function(input_binary_df,clust_method="infomap",
                type_weight="coeff",cores=1,out_dir,export_results=F,
                test_pvalue="cor_test",pval_thr_pos=0.01,pval_thr_neg=0.1){
  #-------------------checking and pre-processing functions --------------------
  print("############### checking input binary data #################")
  check_input_geNet(binary_df=input_binary_df)
  #----------------------------- generate the base scores df ----------------------
  print("##################### generate the base scores edges df ################## ")
  final_df_phi<-generate_final_df(binary_matrix = input_binary_df,n_cores = cores,test=test_pvalue)
  #-------------- format the df correctly -------------------
  print("##################### format the edges df correctly ################## ")
  final_df_score<-final_score(final_df_phi_score = final_df_phi,
                              sel_weight = type_weight,
                              pval_thr_pos=pval_thr_pos,
                              pval_thr_neg=pval_thr_neg)
  # ------------------ generate the igraph object ------------
  # apply default group layer and color layer
  print("##################### generate the igraph object################## ")
  igraph_network<-gen_network_obj(as.data.frame(final_df_score))
  data_new_groups<-get_groups_based_on_clustering(igraph_network,method=clust_method)
  igraph_network<-igraph_network %>% set_vertex_attr(name="color",index = vertex_attr(igraph_network)$name,value = data_new_groups$final_col_vec)
  # --------------------- conversion igraph to visnetwork -------------------
  print("##################### conversion igraph to visnetwork ################## ")
  data <- gen_visnetwork_data(igraph_network)
  # ------------------------ export results -----------------------------
  if(export_results==T){
    print("##################### exporting geNet results ################## ")
    export_geNet_result(data,out_directory=out_dir)
  }
  print("Done")
  return(data)
}

#' Export geNet results
#' 
#' function to export the geNet output. 
#' It is automatically called by the geNet() function if export_results=T
#' @param geNet_output the object generated by the geNet function. It is a list of two ffdf objects.
#' * nodes: ffdf object containing the information about the nodes
#' * edges: ffdf object containing the information about the edges
#' @param out_directory the directory to write the output
#' @param format the user can export the geNet output in .csv format or in .ffdata format.
#' In both cases, two files are generated. One containing the nodes information (e.g., clustering association),
#' and the other one containing edges information (e.g.,connections weight).
#' Default to "csv".
#' @return In case of the csv format, two csv files are generated in the output directory  
#' * nodes_geNet_output.csv: it contains the information about the nodes
#' * edges_geNet_output.csv: it contains the information about the edges
#' @export
#' @examples 
#' \dontrun{export_geNet_result(geNet_output,out_directory="./",format="csv") 
#' # export output in current working directory "./"
#' }
#' @import ff ffbase
export_geNet_result<-function(geNet_output,out_directory,format="csv"){
  if(format=="ffData"){
    dir_name<-dirname(out_directory)
    final_dir<-basename(out_directory)
    path<-paste0(dir_name,"/",final_dir)
    nodes_data<-geNet_output$nodes
    path_nodes<-paste0(path,"/nodes_data")
    ffsave(nodes_data, file=path_nodes)
    edges_data<-geNet_output$edges
    path_edges<-paste0(path,"/edges_data")
    ffsave(edges_data, file=path_edges)
  }else if(format=="csv"){
    dir_name<-dirname(out_directory)
    final_dir<-basename(out_directory)
    path<-paste0(dir_name,"/",final_dir)
    nodes_data<-geNet_output$nodes
    edges_data<-geNet_output$edges
    path_nodes<-paste0(path,"/nodes_data.csv")
    path_edges<-paste0(path,"/edges_data.csv")
    write.csv.ffdf(nodes_data,path_nodes)
    write.csv.ffdf(edges_data,path_edges)
    
  }else{
    stop("not valid format argument, valid options: ffData,csv")
  }

}
haneylab/geNet documentation built on Oct. 4, 2020, 8:40 a.m.