############################################################################################
###########################################################################################
#'Assesses mapping characteristics across multiple ECM sizes
#'
#'map_size provides diagnostic plots and summaries of select criteria for determining how the size of the SOM influences characteristics of the SOM model.
#'@param trn_dat is a data object to train the map. Should be a numerical or factor based data matrix.
#'@param kmn is the minimum number of map units (aka nodes) to evaluate. Default value is 2.
#'@param kmx is maximum number of map units (aka nodes) to evaluate. Default is 5*sqrt(n).
#'@param itermax. maximum number of iterations passed to map_ecm. Default is 500*number of map nodes (k).
#'@param nstarts. number of random initializatons passed to map_inits. Default is 5.
#'@param maptopo. map topology passed to map_ecm. Default is hexagonal.
#'@param distmet. distance method passed to map_ecm. Default is Euclidean.
#'@param lmode. self-organizing map learning algorithm passed to map_ecm
#'@param symsize sets symbol size on plots
#'@return Panels a-d on the diagnostic plot illustrate common model performance metrics as a function of map size. Panels e-f examine within-class-sum-of-squares and frequency distributions as a function of map size. A list of model and class-level perfomance statistics is also returned.
#' \itemize{
#' \item {xdim} x dimension
#' \item {ydim} y dimension
#' \item {k} Number of map nodes
#' \item {R2} R2
#' \item {ADJ_R2} Adjusted R2
#' \item {MAE} mean absolute error based on class assignment distances
#' \item {RMSE} root-mean-square-error based on class assignment distances
#' \item {AIC} a form of Akaikes Information Criteria applied to clustering algorithms
#' \item {TotWCSS} Total Within-Cluster Sum-of-Squares
#' \item {N} Number class assignments
#' \item {FREQ} Proportion of class assignments
#' \item {WCD} average within-class distance
#' \item {BCD} average between-class distance
#' \item {WB_Ratio} WCD/BCD
#'
#' }
#'@details An important step in the application of ECM are the user provided SOM inputs for the dimensions of the mapping (i.e., size). Here we provide common model performance metrics and class-level evaluations in effort to assist the user in determining an appropriate map size.
#'@export
#Develop a set of functions to evaluate a range of common Self-Organizing Map sizes
map_size<-function(trn_dat, kmn=NULL, kmx=NULL, itermax.=NULL, nstarts.=NULL, maptopo.=NULL, distmet.=NULL,
lmode.=NULL, symsize=1) {
#set Data
#Data
trn_dat<-data.frame(trn_dat)
n<-dim(trn_dat)[1]
p<-dim(trn_dat)[2]
varnames<-names(trn_dat)
nvars<-p
nobs<-n
#Set training data
data_trn<-data.matrix(trn_dat)
#set Data distance matrix
dist_mat<-stats::dist(x=data_trn, method="euclidean")
#Set function defaults for map_ecm
if (is.null(kmn)) kmn <- 2
if (is.null(kmx)) kmx <- 5*sqrt(nobs)
#Set common dimensions
som_x<-c(2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20)
som_y<-c(1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20)
som_k<-som_x*som_y
som_k
#Set test range of sizes
minsize<-which(abs(som_k-kmn)==min(abs(som_k-kmn)))
maxsize<-which(abs(som_k-kmx)==min(abs(som_k-kmx)))
#Set function defaults for map_ecm
if (is.null(maptopo.)) maptopo. <- "hexagonal"
if (is.null(distmet.)) distmet.<-"euclidean"
if (is.null(lmode.)) lmode. <- "online"
if (is.null(nstarts.)) nstarts. <- 1
MAP_EVAL<-data.frame()
CLASS_EVAL<-data.frame()
for (i in minsize:maxsize){
somx<-som_x[i]
somy<-som_y[i]
somk<-somx*somy
if (is.null(itermax.)) itermax. <- 500*(somx*somy)
mi<-map_inits(trn_dat=data_trn, xdim=somx, ydim=somy, nstarts=nstarts.)
opt_init<-mi$opt_init
map_obj<-map_ecm(trn_dat=data_trn, xdim=somx, ydim=somy, itermax=itermax.,
maptopo=maptopo., distmet=distmet., lmode=lmode., inits=opt_init)
print(summary(map_obj))
#Characterize summary features
#Map_stats
map_stat<-map_stats(map_obj)
#SOM GRID and coordinates
XYs<-paste(round(map_obj$grid$pts[,1],1),round(map_obj$grid$pts[,2],1), sep="")
NODE<-1:somk
#SOM Coordinates and reference table
som_grid<-data.frame(NODE=factor(NODE),
SOM_X=as.numeric(map_obj$grid$pts[,1]),
SOM_Y=as.numeric(map_obj$grid$pts[,2]))
#Summarize Frequency Assignments
N<-table(map_obj$unit.classif)
FREQ<-round(100*(table(map_obj$unit.classif)/sum(table(map_obj$unit.classif))),2)
freq.tab<-data.frame(N, FREQ)
freq.tab2<-merge(som_grid, freq.tab, by.x="NODE", by.y="Var1", all.x=TRUE)
freq.tab2$Var1.1<-NULL
colnames(freq.tab2)<-c("NODE","SOM_X","SOM_Y","N","FREQ")
freq.tab2<-freq.tab2[order(as.numeric(freq.tab2$NODE)),]
map_eval<-data.frame(xdim=somx, ydim=somy, k=somk,
R2=map_stat$R2,
ADJ_R2=map_stat$ADJ_R2,
MAE=map_stat$MAE,
RMSE=map_stat$RMSE,
AIC=map_stat$AIC,
TotWCSS=map_stat$TOTWCSS)
class_eval<-data.frame(xdim=somx, ydim=somy, k=somk,
N=freq.tab2$N,
FREQ=freq.tab2$FREQ,
WCSS=map_stat$WCSS,
BCSS=map_stat$BCSS,
WB_RATIO=mean(map_stat$WCSS)/map_stat$BCSS)
MAP_EVAL<-rbind(MAP_EVAL,map_eval)
CLASS_EVAL<-rbind(CLASS_EVAL, class_eval)
}
#Evaluation Plots
opar<-graphics::par(mfrow=c(1,1), mar=c(5,4,3,2), pty="s", cex=1, xpd=FALSE)
graphics::par(mfrow=c(3,2), mar=c(4,4,1.5,1), family='serif', ask=FALSE, xpd=FALSE)
#Plots
plot(MAP_EVAL$ADJ_R2~MAP_EVAL$k, ylab="Proportion of Variance", xlab="Number of nodes (k)",
main="a) Adjusted R2", type="b", pch=19, cex=symsize, col="darkgrey",
ylim=c(0,1))
plot(MAP_EVAL$RMSE~MAP_EVAL$k, ylab="Distance", xlab="Number of nodes (k)",
main="b) Root-Mean-Square-Error (RMSE)", type="b", pch=19, cex=symsize, col="darkgrey",
ylim=c(0,max(MAP_EVAL$RMSE)*1.2))
plot(MAP_EVAL$MAE~MAP_EVAL$k, ylab="Distance", xlab="Number of nodes (k)",
main="c) Mean-Absolute-Error (MAE)", type="b", pch=19, cex=symsize, col="darkgrey",
ylim=c(0,max(MAP_EVAL$MAE)*1.2))
plot(MAP_EVAL$AIC~MAP_EVAL$k, ylab="AIC", xlab="Number of nodes (k)",
main="d) Akaike Information Criterion (AIC)", type="b", pch=19, cex=symsize, col="darkgrey",
ylim=c(0,max(MAP_EVAL$AIC)*1.2))
graphics::stripchart(CLASS_EVAL$WCSS~CLASS_EVAL$k, ylab="", xlab="Number of nodes (k)",
main="e) Within-Class Sum-of-Squares (WCSS)", vertical = TRUE, pch=19, cex=symsize, col="darkgrey")
graphics::stripchart(CLASS_EVAL$N~CLASS_EVAL$k, ylab="Count", xlab="Number of nodes (k)",
main="f) Within-Class Sample Size (n)", vertical = TRUE, pch=19, cex=symsize, col="darkgrey")
on.exit(graphics::par(opar))
on.exit(graphics::layout(1))
ECM_SIZE<-list(MAP_EVAL=MAP_EVAL, CLASS_EVAL=CLASS_EVAL)
return(invisible(ECM_SIZE))
}
############################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.