akmedoids: Anchored Kmedoids for Longitudinal Data Clustering

Documented in plot_akstats

#' @title Plot of cluster groups.
#' @description Takes the 'ak_object' from the
#' \code{'akclustr'} as input and produce either the 'line' plot
#' or 'stacked' histogram.
#' @param ak_object An output of \code{\link{akclustr}} function.
#' The object contains individual trajectories and their cluster
#' solution(s) at the specified values of \code{k}. Also, includes
#' the optimal value of \code{k} based on the criterion specified.
#' at (different) values of \code{k} the \code{traj}.
#' @param k [integer] \code{k} cluster to generate its solution.
#' @param type [character] plot type. Available options are:
#' \code{"lines"} and \code{"stacked"}.
#' @param y_scaling [character] works only if \code{type="lines"}.
#' \code{y_scaling} set the vertical scales of the cluster panels.
#' Options are: \code{"fixed"}: uses uniform scale for all panels,
#' \code{"free"}: uses variable scales for panels.
#' @param reference [numeric] Specifying the reference line from
#' which the direction of each group is measured. Options are:
#' \code{1}: slope of mean trajectory, \code{2}: slope of medoid
#' trajectory, \code{3}: slope of a horizontal line
#' (i.e. slope = 0). Default: \code{1}.
#' @param n_quant [numeric] Number of equal intervals (quantiles)
#' to create between the reference line \code{(R)} and the medoids
#' \code{(M)} of the most-diverging groups of both sides of
#' \code{(R)}. Default is \code{4} - meaning quartile subdivisions
#' on each side of \code{(R)}. In this scenario, the function
#' returns the quartile in which the medoid of each group falls.
#' This result can be used to further categorize the groups into
#' 'classes'. For example, groups that fall within the \code{1st}
#' quartile may be classified as 'Stable' groups (Adepeju et al. 2021).
# @s3method plot_akstats
#' @examples
#'
#' data(traj)
#'
#' trajectry <- data_imputation(traj, id_field = TRUE, method = 1,
#' replace_with = 1, fill_zeros = FALSE)
#'
#' print(trajectry$CompleteData)
#'
#' trajectry <- props(trajectry$CompleteData, id_field = TRUE)
#'
#' aksolution <- akclustr(trajectry, id_field = TRUE,
#' method = "linear", k = c(3,5), crit='Calinski_Harabasz')
#'
#' plot_akstats(aksolution, k = 4, type="lines",
#' y_scaling="fixed")
#'
#' plot_akstats(aksolution, k = 4, reference = 1,
#' n_quant = 4, type="stacked")
#'
#' @details Generates the plots of cluster groups - same plots
#' generated by the \code{'show_plots'} argument of \code{print_akstats}.
#' The function draw from the functionalities of the
#' \code{ggplot2} library.
#' For a more customized visualisation, we recommend that users
#' deploy \code{ggplot2} directly (\code{Wickham H. (2016)}).
#' @return A plot showing group membership or sizes (proportion)
#' and statistics.
#' @references \code{1}. Adepeju, M. et al. (2021). Anchored k-medoids:
#' A novel adaptation of k-medoids further refined to measure
#' inequality in the exposure to crime across micro places,
#' doi: 10.1007/s42001-021-00103-1.
#' @references \code{2}. Wickham H. (2016). Elegant graphics for
#' Data Analysis. Spring-Verlag New York (2016).
#' @importFrom stats quantile
# @importFrom utils flush.console
# @importFrom grDevices dev.new
#' @importFrom ggplot2 stat_summary scale_colour_brewer theme_light
#' theme geom_area scale_x_continuous scale_fill_brewer facet_wrap
#' @importFrom dplyr bind_cols
#' @export
plot_akstats<- function(ak_object, k = 3, reference = 1,
                      n_quant = 4,
                      type = "lines",
                      y_scaling="fixed"){

  UseMethod('plot_akstats')
}

#' @export
plot_akstats.default <- function(ak_object, k = 3, reference = 1,
                                  n_quant = 4,
                                  type = "lines",
                                  y_scaling="fixed"){

  #first testing that correct values of k is specified.
  #get all values of k..
  all_K <- as.vector(unlist(lapply(ak_object$solutions, attributes)))

  if(!k %in% all_K){
    stop(paste("*----k =", k, "is not applicable!. Print the",
               "'akobject' to see allowed k-values----*", sep=" "))
  }

  # check object type
  if(class(ak_object)[1] != "akobject"){
  stop("*----Object not right type!! 'akclustr' object required!----*")
  }

  #test data type/or class

  #extract variables
  traj <- ak_object$traj
  clustr <- as.vector(ak_object$solutions[[k-2]])
  id_field <- ak_object$id_field

  #testing that data and clusters have equal number of elements
  if(length(clustr)!=nrow(traj)){
    stop("*----Unequal number of clusters elements and trajectories----*")
  }

  #joining the data with clusters
  clustr <- data.frame(cbind(traj, clusters=clustr))

  dat <- traj #back up traj

  n_quant <- round(n_quant, digits = 0)

  if(n_quant < 2 | n_quant > 10){
    stop(paste("*----Please, enter an integer between 2",
               "and 10 for the 'n_quant' argument'!!!----*", sep=" "))
  }

  #test id_field is true
  if(id_field==TRUE){
    dat <- dat[,2:ncol(dat)]
    n_CL <- colnames(clustr)[1]
    col_names <- as.vector(clustr[,1])

    #test if id field  is unique
    if(!length(col_names)==length(unique(col_names))){
      stop(paste("(: The 'id_field' is not a unique field.",
                 "Function terminated!!! :)", sep=" "))
    }
  }

  #test if id_field is excluded for traj
  if(id_field==FALSE){
    clustr <- cbind(seq_len(nrow(clustr)), clustr)
  }

  #collect cluster list
  clusters <- as.vector(clustr[,ncol(clustr)])

  data_subset <- clustr[,seq_len((ncol(clustr))-1)]

  data_subset <- as.data.frame(data_subset)

  colnames(data_subset) <- c("code", seq_len((ncol(data_subset))-1))

  #data.subset.melted <- suppressWarnings(melt(data_subset, id="code"))

  #tranform wide to long (to resolve the rgl.null
  #package built problem)
  #avoid using 'melt' function
  code_ <- rep(col_names, ncol(data_subset)-1)
  d_bind <- NULL
  for(v in seq_len(ncol(data_subset)-1)){
    d_bind <- c(d_bind, as.numeric(data_subset[,(v+1)]))
  }

  code <- data.frame(location_ids=as.character(code_))
  variable <- data.frame(variable=as.character(rep(seq_len((ncol(data_subset))-1),
                                                   each=length(col_names))))
  value=data.frame(value = as.numeric(d_bind))

  data.subset.melted <- bind_cols(code, variable,value)

  # data.subset.melted <- data.frame(cbind(code=code_, variable =
  #                       rep(seq_len((ncol(data_subset))-1),
  #                       each=length(col_names)), value=d_bind))

  #append cluster list with traj
  data.subset.melted <- cbind(data.subset.melted,
                              rep(clusters, ncol(data_subset)-1))
  colnames(data.subset.melted) <- c("id","Year","value", "clusters")

  #----------------------------------------------------
  #preparing the data to generate descriptive statitics
  year_uni <- as.vector(unique(data.subset.melted$Year))
  order_Cluster <- as.vector(unique(data.subset.melted$clusters))
  clusters_uni <-
    order_Cluster[order(as.vector(unique(data.subset.melted$clusters)))]

  change_ave_yr_ALL <- NULL

  for(q in seq_len(length(clusters_uni))){ #q=1

    all_clust_list <-
      data.subset.melted[which(data.subset.melted$clusters==clusters_uni[q]),]

    ave_yr <- NULL

    for(m in seq_len(length(year_uni))){
      yr_ <-
        all_clust_list[which(as.vector(all_clust_list$Year)==year_uni[m]),]

      ave_yr <- c(ave_yr, sum(as.numeric(as.character(yr_$value))))
    }

    change_ave_yr_ALL <- rbind(change_ave_yr_ALL,  ave_yr)
  }




  #whether to plot the clusters
  #----------------------------------------------------
  #plotting
  #----------------------------------------------------
  ggplot <- aes <- Year <- value <- id <- geom_line <- facet_wrap <-
    geom_smooth <- theme_minimal <- variable <- group <- NULL

  #plot option 1:
  if(type=="lines"){
    if(y_scaling=="fixed"){
      #options(rgl.useNULL = TRUE)
      plt <- (ggplot(data.subset.melted, aes(x=Year, y=value,
                                             group=id, color=clusters)) +
                geom_line() +
                stat_summary(fun.y=mean, geom="line", aes(group=clusters),
                             color="black", size=1) +
                facet_wrap(~clusters, scales = "fixed") +
                facet_wrap(~clusters) +
                scale_colour_brewer(palette = "Set1")) #clusters
    }

    if(y_scaling=="free"){
      options(rgl.useNULL = TRUE)
      plt <- (ggplot(data.subset.melted, aes(x=Year, y=value,
                                             group=id, color=clusters)) +
                geom_line() +
                stat_summary(fun.y=mean, geom="line", aes(group=clusters),
                             color="black", size=1) +
                facet_wrap(~clusters, scales = "free") +
                facet_wrap(~clusters) +
                scale_colour_brewer(palette = "Set1") +
                theme_light()) #clusters
    }
  }

  #----------------------------------------------------
  #plot option 2:
  if(type=="stacked"){
    change_ave_yr_ALL_transpose <- t(change_ave_yr_ALL)
    grp.dat<-data.frame(change_ave_yr_ALL_transpose,
                        row.names=seq_len(nrow(change_ave_yr_ALL_transpose)))
    names(grp.dat)<-clusters_uni
    p.dat<-data.frame(Year=row.names(grp.dat),grp.dat,stringsAsFactors=F)

    #p.dat<-melt(p.dat,id='Year')
    #discarding the use of 'melt' function above
    code_ <- rep(seq_len(nrow(change_ave_yr_ALL_transpose)),
                 ncol(p.dat)-1)
    e_bind <- NULL
    for(v in seq_len(ncol(p.dat)-1)){
      e_bind <- c(e_bind, as.numeric(p.dat[,(v+1)]))
    }

    code <- data.frame(Year=as.character(code_))
    variable <- data.frame(variable=as.character(rep(clusters_uni,
                each=nrow(change_ave_yr_ALL_transpose))))
    value=data.frame(value = as.numeric(e_bind))

    p.dat <- bind_cols(code, variable,value)

    p.dat$Year<-as.numeric(p.dat$Year) #head(p.dat)
    class(p.dat$Year)
    options(rgl.useNULL = TRUE)
    plt <- (ggplot(p.dat,aes(x=Year,y=value)) + theme(legend.position="none")+
              geom_area(aes(fill=variable), colour = "gray30", position='fill') +
              scale_x_continuous(breaks=seq_len(nrow(change_ave_yr_ALL_transpose)),
                                 labels=Year)+
              scale_fill_brewer(palette = "Set1") +
              theme_light())
            }

  all_plots <- list(cluster_plot = plt)

  #-------------------
  return(all_plots)

}