Nothing
##################################################
# File: rSCA.modeling.r
# Desp: R function for modeling using SCA
# Date: Jan 15, 2014, Regina, SK, Canada
# Author: Xiuquan Wang
# Email: xiuquan.wang@gmail.com
##################################################
##################################################
# Variables
# ================================================
#: new environment
rSCA.env = new.env()
#: significance level
rSCA.env$n_alpha = 0.95
#: data matrix
rSCA.env$o_sample_data_x = 0
rSCA.env$n_sample_size = 0
rSCA.env$n_sample_x_cols = 0
rSCA.env$o_sample_data_y = 0
rSCA.env$n_sample_y_cols = 0
#: matrix to store the sorted results
rSCA.env$o_sorted_matrix = 0
rSCA.env$o_sorted_temp_matrix = 0
#: output matrix
rSCA.env$o_output_tree = list()
#: stack to store the unprocessed node ids in the output tree
rSCA.env$o_nodeid_stack_cut = c()
rSCA.env$n_nodeid_statck_cut_cursor = 1
rSCA.env$o_nodeid_stack_merge = c()
rSCA.env$n_nodeid_statck_merge_cursor = 1
#: cutting and merging flags for loop, 1:do loop, 0:no need
rSCA.env$n_flag_cut = 0
rSCA.env$n_flag_merge = 0
#: some statistical infos for the results
rSCA.env$n_cut_times = 0
rSCA.env$n_merge_times = 0
rSCA.env$n_leafnodes_count = 0
#: hash table, hash function = (a + b) % n_sample_size
#: structure: [value, a, b, pointer]
rSCA.env$o_hashtable_matrix = matrix(0, rSCA.env$n_sample_size, 4)
#: hash table list: (value, a, b, pointer)
rSCA.env$o_hashtable_list = list()
rSCA.env$n_hashtable_list_index = 0
#: output files & paths
rSCA.env$s_tree_file = ""
rSCA.env$s_tree_filepath = ""
rSCA.env$s_map_file = ""
rSCA.env$s_map_filepath = ""
rSCA.env$s_logfile = ""
rSCA.env$s_logfilepath = ""
#: map representation: interval or mean
rSCA.env$n_mapvalue = "mean"
#: using optimization seeking method (Golden Section Search) or not
rSCA.env$b_GSS = FALSE
#: debug display
rSCA.env$b_debug = FALSE
##################################################
#
# NAME: rSCA.modeling
#
# INPUTs:
#
# @modelname: specify a unique model name (updated on March 2, 2020 by Xander Wang at Charlottetown, PE, Canada)
#
# @alpha: significance level, usually in 0.001 - 0.05, default is 0.05
#
# @xfile: a string to specify the full file name of the x file, only supports *.txt or *.csv
#
# @yfile: for y file, similar to x file
#
# @x.row.names: TRUE/FALSE, default is FALSE
#
# @x.col.names: TRUE/FALSE, default is FALSE
#
# @y.row.names: TRUE/FALSE, default is FALSE
#
# @y.col.names: TRUE/FALSE, default is FALSE
#
# @x.missing.flag: a string to specify the missing flag, default is "NA"
#
# @y.missing.flag: a string to specify the missing flag, default is "NA"
#
# @x.type: ".txt" or ".csv", default is ".txt"
#
# @y.type: ".txt" or ".csv", default is ".txt"
#
# @mapvalue: {mean, max, min, median, interval, radius, variation, random}
# default is mean,
# interval: [min, max],
# radius: [mean, radius], radius = (max - min) / 2,
# variation: [mean, sd],
# random: generate randomly from [min, max].
#
# @GSS: TRUE/FALSE, default is FALSE (no optimization seeking), the Golden Section Search method is used for seeking the best cutting point if GSS is TRUE
#
# @debug: TRUE/FALSE, default is FALSE, log file will be created if debug is TRUE
#
# @outfolder: full path to the output folder, the default output folder is tempdir()
#
# OUTPUTs:
#
# @statistics infos: to screen
#
# @detailed infos: to log file, if debug is TRUE
#
# RETURNS:
#
# @model object: treefile + mapfile + type (mean/interval)
#
###################################################
rSCA.modeling <- function(modelname = "rSCA", alpha = 0.05, xfile, yfile, x.row.names = FALSE, x.col.names = FALSE, y.row.names = FALSE, y.col.names = FALSE, x.missing.flag = "NA", y.missing.flag = "NA", x.type = ".txt", y.type = ".txt", mapvalue = "mean", GSS = FALSE, debug = FALSE, outfolder = tempdir())
{
rSCA.env$n_alpha = alpha
rSCA.env$n_mapvalue = mapvalue
rSCA.env$b_GSS = GSS
rSCA.env$b_debug = debug
#: model list
o_model = list(treefile = "", mapfile = "", logfile = "", type = "mean", totalNodes = 0, leafNodes = 0, cuttingActions = 0, mergingActions = 0)
#: data matrix
o_xdata = 0
o_ydata = 0
#: read x data file
if (x.type == ".txt")
{
if (x.row.names == TRUE && x.col.names == TRUE)
o_xdata = read.table(xfile, header = TRUE, row.names = 1, na.strings = x.missing.flag)
else if (x.row.names == TRUE && x.col.names == FALSE)
o_xdata = read.table(xfile, header = FALSE, row.names = 1, na.strings = x.missing.flag)
else if (x.row.names == FALSE && x.col.names == TRUE)
o_xdata = read.table(xfile, header = TRUE, na.strings = x.missing.flag)
else if (x.row.names == FALSE && x.col.names == FALSE)
o_xdata = read.table(xfile, header = FALSE, na.strings = x.missing.flag)
}
if (x.type == ".csv")
{
if (x.row.names == TRUE && x.col.names == TRUE)
o_xdata = read.csv(xfile, header = TRUE, row.names = 1, na.strings = x.missing.flag)
else if (x.row.names == TRUE && x.col.names == FALSE)
o_xdata = read.csv(xfile, header = FALSE, row.names = 1, na.strings = x.missing.flag)
else if (x.row.names == FALSE && x.col.names == TRUE)
o_xdata = read.csv(xfile, header = TRUE, na.strings = x.missing.flag)
else if (x.row.names == FALSE && x.col.names == FALSE)
o_xdata = read.csv(xfile, header = FALSE, na.strings = x.missing.flag)
}
#: read y data file
if (y.type == ".txt")
{
if (y.row.names == TRUE && y.col.names == TRUE)
o_ydata = read.table(yfile, header = TRUE, row.names = 1, na.strings = y.missing.flag)
else if (y.row.names == TRUE && y.col.names == FALSE)
o_ydata = read.table(yfile, header = FALSE, row.names = 1, na.strings = y.missing.flag)
else if (y.row.names == FALSE && y.col.names == TRUE)
o_ydata = read.table(yfile, header = TRUE, na.strings = y.missing.flag)
else if (y.row.names == FALSE && y.col.names == FALSE)
o_ydata = read.table(yfile, header = FALSE, na.strings = y.missing.flag)
}
if (y.type == ".csv")
{
if (y.row.names == TRUE && y.col.names == TRUE)
o_ydata = read.csv(yfile, header = TRUE, row.names = 1, na.strings = y.missing.flag)
else if (y.row.names == TRUE && y.col.names == FALSE)
o_ydata = read.csv(yfile, header = FALSE, row.names = 1, na.strings = y.missing.flag)
else if (y.row.names == FALSE && y.col.names == TRUE)
o_ydata = read.csv(yfile, header = TRUE, na.strings = y.missing.flag)
else if (y.row.names == FALSE && y.col.names == FALSE)
o_ydata = read.csv(yfile, header = FALSE, na.strings = y.missing.flag)
}
#: remove missing rows
o_xdata = na.omit(o_xdata)
o_ydata = na.omit(o_ydata)
#: basic infos
n_xdatarow = nrow(o_xdata)
n_ydatarow = nrow(o_ydata)
#: checking if x and y have the same number of rows.
if (n_xdatarow != n_ydatarow)
{
cat("Error: please make sure x and y have the same number of rows.\n")
}
else
{
#: start training
rSCA.env$o_sample_data_x = o_xdata
rSCA.env$n_sample_size = nrow(rSCA.env$o_sample_data_x)
rSCA.env$n_sample_x_cols = ncol(rSCA.env$o_sample_data_x)
rSCA.env$o_sample_data_y = o_ydata
rSCA.env$n_sample_y_cols = ncol(rSCA.env$o_sample_data_y)
rSCA.env$s_tree_file = paste(outfolder, "/tree_", modelname, ".txt", sep = "")
rSCA.env$s_tree_filepath = paste(outfolder, "/tree_", modelname, ".txt", sep = "")
rSCA.env$s_map_file = paste(outfolder, "/map_", modelname, ".txt", sep = "")
rSCA.env$s_map_filepath = paste(outfolder, "/map_", modelname, ".txt", sep = "")
rSCA.env$s_logfile = paste(outfolder, "/log_", modelname, ".txt", sep = "")
rSCA.env$s_logfilepath = paste(outfolder, "/log_", modelname, ".txt", sep = "")
#: start SCA modeling
do_cluster()
#: return the model
o_model$treefile = rSCA.env$s_tree_file
o_model$mapfile = rSCA.env$s_map_file
if (rSCA.env$b_debug)
o_model$logfile = rSCA.env$s_logfile
else
o_model$logfile = "NA"
o_model$s_rslfile = paste(outfolder, "/rsl_", modelname, ".txt", sep = "")
o_model$s_rslfilepath = paste(outfolder, "/rsl_", modelname, ".txt", sep = "")
o_model$type = rSCA.env$n_mapvalue
o_model$totalNodes = length(rSCA.env$o_output_tree)
o_model$leafNodes = rSCA.env$n_leafnodes_count
o_model$cuttingActions = rSCA.env$n_cut_times
o_model$mergingActions = rSCA.env$n_merge_times
return(o_model)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.