R/old/wpd_clust.R

Defines functions wpd_clust

#' Title
#'
#' @param .data
#' @param harmony_tbl
#' @param filter_comb
#' @param nperm
#' @param nsamp
#'
#' @return
#' @export
#'
#' @examples
#' library(gravitas)
#' library(tidyverse)
#' library(parallel)
#' library(tsibble)
#' library(rlang)
#' sm <- smart_meter10 %>%
#'   filter(customer_id %in% c("10006704", "10017936", "10006414", "10018250"))
#' gran1 <- "hour_day"
#' gran2 <- NULL
#' harmonies <- sm %>%
#'   harmony(
#'     ugran = "month",
#'     filter_in = "wknd_wday",
#'     filter_out = c("hhour", "fortnight")
#'   )
#'
#' v <- suppressWarnings(wpd_clust(sm, harmony_tbl = harmonies))
#' v
wpd_clust <- function(.data,
                      harmony_tbl = NULL,
                      filter_comb = NULL,
                      nperm = 2,
                      nsamp = 2,
                      kopt = 2) {
  if (is.null(kopt)) {
    koptimal <- fpc::nselectboot(d,
      B = 50,
      method = "complete",
      clustermethod = fpc::disthclustCBI,
      classification = "averagedist",
      krange = 2:nmaxclust
    )

    kopt <- koptimal$kopt
  }

  if (is.null(harmony_tbl)) {
    stop("harmony table must be provided")
  }

  harmonies <- harmony_tbl %>%
    mutate(comb = paste(facet_variable,
      x_variable,
      sep = "-"
    )) %>%
    filter(comb %in% c(
      "hour_day-wknd_wday",
      "day_month-hour_day",
      "wknd_wday-hour_day",
      "hour_day-day_week",
      "day_week-hour_day"
    )) %>%
    select(-comb)


  # if(is.null(key)){
  key <- tsibble::key(.data)
  key <- key[1] %>% as.character()
  # }

  uni_cust <- unique(.data %>% pull(!!sym(key)))
  customer_ref <- tibble(
    customer_serial_id = as.character(seq(length(uni_cust))),
    customer_id = uni_cust
  )


  elec_split <- .data %>% group_split(!!sym(key))

  elec_select_harmony <- parallel::mclapply(seq_len(length(elec_split)), function(x) {
    data_id <- elec_split %>%
      magrittr::extract2(x) %>%
      as_tsibble(index = reading_datetime)


    k <- hakear::select_harmonies(data_id,
      harmony_tbl = harmonies,
      response = {{ response }},
      nperm = nperm,
      nsamp = nsamp
    )
  }, mc.cores = parallel::detectCores() - 1, mc.preschedule = FALSE, mc.set.seed = FALSE) %>%
    dplyr::bind_rows(.id = "customer_serial_id") %>%
    # dplyr::mutate(!!key := m) %>%
    # dplyr::select(-m) %>%
    dplyr::left_join(customer_ref) %>%
    dplyr::select(-customer_serial_id)

  write_rds(elec_select_harmony, "data/elec_select_harmony.rds")

  mydist <- elec_select_harmony %>%
    mutate(comb = paste(facet_variable, x_variable, sep = "-")) %>%
    select(comb, customer_id, wpd) %>%
    pivot_wider(names_from = comb, values_from = wpd)

  hc <- stats::hclust(dist(mydist[-1]), method = "complete")

  groups <- tibble(group = cutree(hc, k = kopt), customer_id = mydist$customer_id)
  groups
}
Sayani07/gracsr documentation built on Dec. 18, 2021, 12:59 p.m.