R/SPRS_FUNC.R

#' SPRS_FUNC function
#'
#' This is one of a few functions created by Joe Cauteruccio, Jessie Li, Andrew West of Yale University that are used together to create the hclust_eval() function.
#'
#'@keywords SPRS_FUNC
#'@export
#'
SPRS_FUNC <- function(data_m, rows, dim_m, cut_mat) {
  SPRS_vect <- rep(NA, dim_m)
  SPRS_vect[dim_m] <- 0

  x_bar_all <- apply(data_m, 2, mean)
  tot_ss <- sum(apply(data_m, 1, function(x) (dist(rbind(x, x_bar_all))^2)))


  for (i in (dim_m - 1):1) {

    need_rows <- rows[[i]]

    if (length(need_rows) > 1) {
      x_bar_m <- apply(data_m[need_rows, ], 2, mean)
      w_m <- sum(apply(data_m[need_rows, ], 1,
                       function(x) (dist(rbind(x, x_bar_m))^2)))
    }

    if (length(need_rows) == 1) {
      x_bar_m <- data_m[need_rows, ]
      w_m <- dist(rbind(data_m[need_rows, ], x_bar_m))^2
    }

    # Get clusters of these rows from the previous step
    need_clus_prev <- unique(cut_mat[need_rows, i + 1])

    # Get Rows of cluster K
    need_rows_k <- which(cut_mat[, i + 1] %in% need_clus_prev[1])

    if (length(need_rows_k) > 1) {
      x_bar_k <- apply(data_m[need_rows_k, ], 2, mean)
      w_k <- sum(apply(data_m[need_rows_k, ], 1,
                       function(x) (dist(rbind(x, x_bar_k))^2)))
    }

    if (length(need_rows_k) == 1) {
      x_bar_k <- data_m[need_rows_k, ]
      w_k <- dist(rbind(data_m[need_rows_k, ], x_bar_k))^2
    }

    # Get Rows of cluster L
    need_rows_l <- which(cut_mat[, i + 1] %in% need_clus_prev[2])

    if (length(need_rows_l) > 1) {
      x_bar_l <- apply(data_m[need_rows_l, ], 2, mean)
      w_l <- sum(apply(data_m[need_rows_l, ], 1,
                       function(x) (dist(rbind(x, x_bar_l))^2)))
    }

    if (length(need_rows_l) == 1) {
      x_bar_l <- data_m[need_rows_l, ]
      w_l <- dist(rbind(data_m[need_rows_l, ], x_bar_l))^2
    }

    SPRS_vect[i] <- (w_m - w_k - w_l)/ tot_ss
  }

  return(SPRS_vect)
}
18kimn/yalestats documentation built on May 9, 2019, 2:17 a.m.