R/hclus_eval.R

#' hclus_eval function
#'
#' This is one of a few functions created by Joe Cauteruccio, Jessie Li, Andrew West of Yale University that are used together to create the hclust_eval() function.
#'@param data_m Data as a matrix.
#'@param dist_m Distance metric of choice; defaults to euclidean.
#'@param clus_m Agglomeration method of choice; defaults to Ward's method.
#'@param plot_op Indicates whether to plot RSQ, RMSSTD, SPRSQ, and CD against each other. Defaults to "T" (true).
#'@param dist_cust A custom distance metric; defaults to NA.
#'@keywords hclus_eval
#'@export
#'
hclus_eval <- function(data_m, dist_m = 'euclidean', clus_m = 'ward', plot_op = T, dist_cust = NA){

  output_list <- list()

  ## Some Initial Calculations

  ### END INITIAL CALCULATIONS

  ### Create Distance matrix and Clustering ###
  Info <- paste('Creating Distance Matrix using', dist_m)
  print(Info)

  if (dist_m == 'custom' & !is.na(dist_cust)) {dist1 <- dist_cust}
  else dist1 <- dist(data_m, method= dist_m)


  Info <- paste('Clustering using', clus_m)
  print(Info)

  clust1 <- hclust(dist1, method = clus_m)

  print('Clustering Complete. Access the Cluster object in first element of output')

  output_list[[1]] <- clust1


  rs_out <- affected.rows(clust1$merge)

  me <- gen_cutmat(clust1, dim(data_m)[1])
  rs <- rs_out[[2]]

  # Calculate Metrics #

  print('Calculating RMSSTD')
  output_list[[2]] <- RMSSTD_FUNC(data_m, rs, dim(data_m)[1])
  print('RMSSTD Done. Access in Element 2')

  print('Calculating RSQ')
  output_list[[3]] <- RSQ_FUNC(data_m, me, dim(data_m)[1])
  print('RSQ Done. Access in Element 3')

  print('Calculating SPRSQ')
  output_list[[4]] <- SPRS_FUNC(data_m, rs, dim(data_m)[1], me)
  print('SPRSQ Done. Access in Element 4')

  print('Calculating Cluster Dist. ')
  output_list[[5]] <- cluster.dis(data_m, clust1, output_list[[4]])
  print('CD Done. Access in Element 5')

  ylim_n <- max(output_list[[2]], output_list[[3]], output_list[[4]], output_list[[5]])

  if (plot_op == T){
    plot(output_list[[3]], type = 'l', col = 'red', ylim = c(0, ylim_n), lwd=3)
    lines(output_list[[2]], col = 'blue', lwd=3)
    lines(output_list[[4]], col = 'green', lwd=3)
    lines(output_list[[5]], col = 'black', lwd=3)
    legend('topright', c('RSQ', 'RMSSTD', 'SPRSQ', 'CD'), lty = c(1 ,1,1,1),
           col = c('red', 'blue', 'green', 'black'))
  }

  return(output_list)

}
18kimn/yalestats documentation built on May 9, 2019, 2:17 a.m.