R/umbrella_module_analyse_data.R

Defines functions AnalyseData

Documented in AnalyseData

#!/usr/bin/env Rscript

##########
# Notice #
##########

# Umbrella: A Biased Generic Random Walk Algorithm for Community Detection
# Copyright (C) 2020 William Willis Whinn

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

##############
# References #
##############

# - https://tinyurl.com/yycfo2zh

#########
# Notes #
#########

# - This function will allow a user to analyse a data set, including randomly-
#   generated data, to highlight the layout and inform user-input for the
#   RandomWalk() function.
# - By providing an outline of these data, a user may 'bias' their Random Walk.
#   This is useful if there are clusters of communities and the user wishes to
#   traverse a particular cluster.
# - It is also possible to use this function to validate network data. Data sets
#   such as the Karate Club (Zachary) and the random data generated by Umbrella
#   (GenerateRandomNetworkFile()) are all of type 'list'. This function will
#   terminate if data are provided in an incompatible format.

#############
# Functions #
#############

AnalyseData <- function(dataset,
                        draw_graph = c(TRUE, FALSE),
                        random_seed = c(TRUE, FALSE))
{
  #################################
  # Argument Parsing: random_seed #
  #################################

  if (isTRUE(random_seed))
  {
    umbrella::ApplyRandomSeed()
  }
  else if (isFALSE(random_seed))
  {
    print(paste("NOTE: Argument 'random_seed' is set to 'FALSE'. Proceeding ",
                "with machine default."))
  }
  else if (missing(random_seed))
  {
    print(paste("NOTE: Argument 'random_seed' is missing. Proceeding with ",
                "machine default."))
  }
  else
  {
    # FAILURE: Catch unknown error.
    print("ERROR: An unknown error occurred. Terminating Data Analysis.")
    return()
  }

  #############################
  # Argument Parsing: dataset #
  #############################

  if (missing(dataset))
  {
    # FAILURE: Data have not been provided. Terminate analysis.
    print(paste("ERROR: Argument 'dataset' has not been defined. Terminating",
                "Data Analysis."))
    return()
  }
  else if (typeof(dataset) == 'list')
  {
    # SUCCESS: Data exists and is in the correct format.
    print("NOTE: Data is of type 'list'.")

    # NOTES:
    # - Continue outside of IF statement.
    # - Error conditions will terminate data analysis.

    dataset_class <- class(dataset)
    dataset_type <- typeof(dataset)
  }
  else
  {
    # FAILURE: Catch unknown error.
    print("ERROR: An unknown error occurred. Terminating Data Analysis.")
    return()
  }

  #################
  # Data Analysis #
  #################

  print("NOTE: Validation Complete. No errors encountered.")

  if (class(dataset) == 'data.frame')
  {
    # SUCCESS: Convert data into correct format for network analysis.
    print(paste("NOTE: Data are 'data.frame' format. Converting to 'igraph'."))

    dataset <- igraph::graph_from_adj_list(dataset, mode = 'all',
                                           duplicate = FALSE)

    print(paste("NOTE: Data have been converted to 'igraph'."))
  }
  else if (class(dataset) == 'igraph')
  {
    # SUCCESS: No data conversion required.
    print(paste("NOTE: Data are 'igraph' format. No conversion required."))
  }
  else
  {
    # FAILURE: Catch unknown error.
    print("ERROR: An unknown error occurred. Terminating Data Analysis.")
    return()
  }

  # Get number of edges (connections) within the graph object.
  dataset_edge_count <- igraph::ecount(dataset)
  print(paste("NOTE: Data contains a total of", igraph::ecount(dataset),
              "connections (edges)."))

  # Get number of vertices (nodes) within the graph object.
  dataset_vertex_count = igraph::vcount(dataset)
  print(paste("NOTE: Data contains a total of", igraph::vcount(dataset),
              "nodes (vertices)."))

  ################################
  # Argument Parsing: draw_graph #
  ################################

  if (isTRUE(draw_graph))
  {
    igraph::plot.igraph(dataset,
                        main = 'Network Data Graph',
                        sub = paste("Umbrella", packageVersion("umbrella")))
  }

  ############################
  # Create Transition Matrix #
  ############################

  dataset_transition_matrix <- igraph::stochastic_matrix(dataset, sparse = TRUE)

  ############################
  # Compile Analysis Payload #
  ############################

  # NOTES:
  # - This data frame will contain the analysis of the dataset and will be
  #   returned.

  analysis_payload <- list(
    'dataset' = dataset,
    'dataset_type' = dataset_type,
    'dataset_class' = dataset_class,
    'dataset_edge_count' = dataset_edge_count,
    'dataset_vertex_count' = dataset_vertex_count,
    'dataset_adjacency_matrix' = as_adjacency_matrix(dataset),
    'dataset_transition_matrix' = dataset_transition_matrix
  )

  ###############
  # Return Data #
  ###############

  # NOTES:
  # - Return 'analysis_payload' quietly.
  # - This allows assignment to variable.
  # - Must access specific item using 'analysis_payload$dataset' for example.

  return(analysis_payload)
}

# End of File.
ultraviolet-1986/umbrella documentation built on July 20, 2020, 9:02 p.m.