percopackage: Percolation Analysis functions

Documented in plotClustFreq

#' Plot Cluster Frequency
#'
#' Percolation cluster size and rank analyses 
#' – this program processes the cluster data generated by the cluster extraction program to give various different frequency and ranking plots. 
#' 
#' source_file_name is a string, which will be used to label the plots and describes the source data set. 
#' If already input in mapClusters this doesn’t need to be done again. 
#' 
#' Three png-files are generated:
#' 
#' a) radius to maximum cluster size, 
#' 
#' b) radius to mean cluster size and 
#' 
#' c) radius to normalized max. cluster size
#'
#' and they will include the information of the source file as a subtitle.
#' 
#' They are stored as png files in the /analysis_results directory as “radius_to_max_cluster_size”, “radius_to_mean_cluster_size” and “radius_to_norm_max_cluster_size”.
#' 
#' For more information and a code and data example please check the vignette "percolation".

#' @author Simon Maddison
#' @author Sophie C. Schmidt
#' 
#' @import stats
#' @import calibrate
#' @import Hmisc
#' @import grDevices
#' @import graphics
#' @import utils
#' @param source_file_name needs input of source file name for plotting
#' @return three plots of radius to maximum cluster size, radius to 
#'  mean cluster size and radius to normalized max. cluster size
#' 
#' @export plotClustFreq
#'


# paths definition
plotClustFreq <- function(source_file_name = source_file_name) {
 
  path_working <- paste(file.path(getwd(),"working_data"))
  path_results <- paste(file.path(getwd(),"analysis_results"))

# load data generated by nodes_clust_freq-function
file_name <- paste(file.path(path_results,"analysis_by_radius.csv"))
analysis_by_radius <- read.csv2(file_name,sep=",")

# load source_file_name from mapClusters, if it was saved there. Otherwise use input

if ( file.exists(file.path(path_working, "source_file_name_out.csv"))) {
source_file_name <- read.csv(file.path(path_working, "source_file_name_out.csv"), sep = " ")
source_file_name <- source_file_name[1,1] 
} else if ( exists("source_file_name")) {
  source_file_name <- source_file_name
  print(paste("source_file_name is", source_file_name))
} else {
  print("source_file_name input needed")
}


# Read in distance thresholds - this ensures same values used as in clustering script, where it was saved
file_name <- paste(file.path(path_working, "working_data.csv"))
radius_values <- read.csv2(file_name,header=TRUE, sep = ",")
upper_radius <- radius_values$upper_radius
lower_radius <- radius_values$lower_radius
step_value <- radius_values$step_value
radius_unit <- radius_values$radius_unit
	
if (radius_unit == 1)
{unit_text <- "m"
} else if (radius_unit == 1000)
{unit_text <- "km"
} else {
  unit_text <- paste(radius_unit, "m", sep="")}	
	
# output as png files

# Plot radius vs max_clust_size

  output_file <- paste(file.path(path_results,"radius_to_max_cluster_size.png"))
	png(file=output_file, units="cm", width=21, height=21, res=300)
	radius_vals <- as.numeric(as.character(analysis_by_radius$radius))
	plot(radius_vals,analysis_by_radius$max_clust_size,
	     main=paste("Max cluster size vs radius "),
	     sub=paste("Source File: ",source_file_name),
	     xlab=paste("radius ", unit_text),
	     ylab="max cluster size", type="b")

	dev.off()	
	
	# Plot radius vs mean_clust_size
	
	output_file <- paste(file.path(path_results,"radius_to_mean_cluster_size.png"))
	png(file=output_file, units="cm", width=21, height=21, res=300)
	# convert factor to numeric
	mean_clust_size_values <- as.numeric(as.character(analysis_by_radius$mean_clust_size))
	plot(radius_vals,mean_clust_size_values ,
	     main=paste("Mean cluster size vs radius "),
	     sub=paste("Source File: ",source_file_name),
	     xlab=paste("radius ", unit_text),
	     ylab="mean cluster size", type="b")

	dev.off()
	
	# Plot radius vs normalized max_clust_size
	
	output_file <- paste(file.path(path_results,"radius_to_norm_max_cluster_size.png"))
	png(file=output_file, units="cm", width=21, height=21, res=300)
	# convert factor to numeric
	max_normalized_values <- as.numeric(as.character(analysis_by_radius$max_normalized))
	plot(radius_vals,max_normalized_values,
	     main=paste("Max cluster size (normalized) vs radius "),
	     sub=paste("Source File: ",source_file_name),
	     xlab=paste("radius ", unit_text),
	     ylab="max cluster size (normalized)", type="b")

	dev.off()
	
	
}