Nothing
#' @title Descriptive (Change) statistics
#' @description This function perform two tasks:
#' (i) it generate the descriptive and change statistics
#' of groups, particularly suited for the outputs form
#' the \code{\link{akclustr}} function, and
#' (ii) generates the plots of the groups (performances).
#' @param ak_object An output of \code{\link{akclustr}} function.
#' The object contains individual trajectories and their cluster
#' solution(s) at the specified values of \code{k}. Also, includes
#' the optimal value of \code{k} based on the criterion specified.
#' at (different) values of \code{k} the \code{traj}.
#' @param k [integer] \code{k} cluster to generate its solution.
#' @param reference [numeric] Specifying the reference line from
#' which the direction of each group is measured. Options are:
#' \code{1}: slope of mean trajectory, \code{2}: slope of medoid
#' trajectory, \code{3}: slope of a horizontal line
#' (i.e. slope = 0). Default: \code{1}.
#' @param n_quant [numeric] Number of equal intervals (quantiles)
#' to create between the reference line \code{(R)} and the medoids
#' \code{(M)} of the most-diverging groups of both sides of
#' \code{(R)}. Default is \code{4} - meaning quartile subdivisions
#' on each side of \code{(R)}. In this scenario, the function
#' returns the quartile in which the medoid of each group falls.
#' This result can be used to further categorize the groups into
#' 'classes'. For example, groups that fall within the \code{1st}
#' quartile may be classified as 'Stable' groups (Adepeju et al. 2021).
#' @param show_plots [TRUE or FALSE] Provides the trajectory group
#' plot. Please, see \code{plot_akstats} function for more
#' plot options.
#' Defaults \code{FALSE}
#'
#' @s3method print_akstats
#' @examples
#'
#' data(traj)
#'
#' trajectry <- data_imputation(traj, id_field = TRUE, method = 1,
#' replace_with = 1, fill_zeros = FALSE)
#'
#' print(trajectry$CompleteData)
#'
#' trajectry <- props(trajectry$CompleteData, id_field = TRUE)
#'
#' aksolution <- akclustr(trajectry, id_field = TRUE,
#' method = "linear", k = c(3,5), crit='Calinski_Harabasz')
#'
#' print_akstats(aksolution, k = 4, show_plots=FALSE)
#'
#' @details Generates the plot of trajectory groupings.
#' Given an \code{ak_object} class (from
#' the \code{akclustr} function), this function show the
#' plots of cluster groups. The plot component draws from
#' \code{plot_akstats} function.
#' @return A plot showing group membership or sizes (proportion)
#' and statistics.
#' @references \code{1}. Adepeju, M. et al. (2021). Anchored k-medoids:
#' A novel adaptation of k-medoids further refined to measure
#' inequality in the exposure to crime across micro places,
#' doi: 10.1007/s42001-021-00103-1.
#' @references \code{2}. Wickham H. (2016). Elegant graphics for
#' Data Analysis. Spring-Verlag New York (2016).
# @importFrom reshape2 melt
#' @importFrom stats quantile
#' @importFrom ggplot2 stat_summary scale_colour_brewer theme_light
#' theme geom_area scale_x_continuous scale_fill_brewer facet_wrap
#' @importFrom dplyr bind_cols
#' @export
print_akstats<- function(ak_object, k = 3, reference = 1,
n_quant = 4,
show_plots=FALSE){
UseMethod('print_akstats')
}
#' @export
print_akstats.default <- function(ak_object, k = 3, reference = 1,
n_quant = 4,
show_plots=FALSE){
#first testing that correct values of k is specified.
#get all values of k..
all_K <- as.vector(unlist(lapply(ak_object$solutions, attributes)))
if(!k %in% all_K){
stop(paste("*----k =", k, "is not applicable!. Print the",
"'akobject' to see allowed k-values----*", sep=" "))
}
# check object type
if(class(ak_object)[1] != "akobject"){
stop("*----Object not right type!! 'akclustr' object required!----*")
}
#test data type/or class
#extract variables
traj <- ak_object$traj
clustr <- as.vector(ak_object$solutions[[k-2]])
id_field <- ak_object$id_field
#testing that data and clusters have equal number of elements
if(length(clustr)!=nrow(traj)){
stop("*----Unequal number of clusters elements and trajectories----*")
}
#joining the data with clusters
clustr <- data.frame(cbind(traj, clusters=clustr))
dat <- traj #back up traj
n_quant <- round(n_quant, digits = 0)
if(n_quant < 2 | n_quant > 10){
stop(paste("*----Please, enter an integer between 2",
"and 10 for the 'n_quant' argument'!!!----*", sep=" "))
}
#test id_field is true
if(id_field==TRUE){
dat <- dat[,2:ncol(dat)]
n_CL <- colnames(clustr)[1]
col_names <- as.vector(clustr[,1])
#test if id field is unique
if(!length(col_names)==length(unique(col_names))){
stop(paste("(: The 'id_field' is not a unique field.",
"Function terminated!!! :)", sep=" "))
}
}
#test if id_field is excluded for traj
if(id_field==FALSE){
clustr <- cbind(seq_len(nrow(clustr)), clustr)
}
#collect cluster list
clusters <- as.vector(clustr[,ncol(clustr)])
data_subset <- clustr[,seq_len((ncol(clustr))-1)]
data_subset <- as.data.frame(data_subset)
colnames(data_subset) <- c("code", seq_len((ncol(data_subset))-1))
#data.subset.melted <- suppressWarnings(melt(data_subset, id="code"))
#tranform wide to long (to resolve the rgl.null
#package built problem)
#avoid using 'melt' function
code_ <- rep(col_names, ncol(data_subset)-1)
d_bind <- NULL
for(v in seq_len(ncol(data_subset)-1)){
d_bind <- c(d_bind, as.numeric(data_subset[,(v+1)]))
}
code <- data.frame(location_ids=as.character(code_))
variable <- data.frame(variable=as.character(rep(seq_len((ncol(data_subset))-1),
each=length(col_names))))
value=data.frame(value = as.numeric(d_bind))
data.subset.melted <- bind_cols(code, variable,value)
#append cluster list with traj
data.subset.melted <- cbind(data.subset.melted,
rep(clusters, ncol(data_subset)-1))
colnames(data.subset.melted) <- c("id","Year","value", "clusters")
#----------------------------------------------------
#preparing the data to generate descriptive statitics
year_uni <- as.vector(unique(data.subset.melted$Year))
order_Cluster <- as.vector(unique(data.subset.melted$clusters))
clusters_uni <-
order_Cluster[order(as.vector(unique(data.subset.melted$clusters)))]
change_ave_yr_ALL <- NULL
for(q in seq_len(length(clusters_uni))){
all_clust_list <-
data.subset.melted[which(data.subset.melted$clusters==clusters_uni[q]),]
ave_yr <- NULL
for(m in seq_len(length(year_uni))){
yr_ <-
all_clust_list[which(as.vector(all_clust_list$Year)==year_uni[m]),]
ave_yr <- c(ave_yr, sum(as.numeric(as.character(yr_$value))))
}
change_ave_yr_ALL <- rbind(change_ave_yr_ALL, ave_yr)
}
#whether to plot the clusters
#----------------------------------------------------
#plotting
#----------------------------------------------------
#calling the 'plot_akstats'
options(rgl.useNULL = TRUE)
plt = plot_akstats(ak_object, k = k, type="lines", y_scaling="fixed")
if(show_plots==TRUE){
#plot
#flush.console()
#dev.new(width=3, height=3)
options(rgl.useNULL = TRUE)
print(plt)
}
#do not show plot
if(show_plots==FALSE){
#Do nothing
}
#----------------------------------------------------
#To generate descriptive statistics
all_statistics <- list()
#variable for change statistics
desc_Stats <- NULL
ll_ <- clusters_uni
group <- clusters_uni
for(n in seq_len(length(ll_))){
#Calculating the number of trajectories
a1 <- length(which(clusters%in%ll_[n]))
a2 <- round((length(which(clusters%in%ll_[n]))/
length(clusters))*100,digits = 1)
a3 <- round((change_ave_yr_ALL[n,1] /
sum(change_ave_yr_ALL[,1]))*100, digits = 1)
a4 <- round((change_ave_yr_ALL[n,ncol(change_ave_yr_ALL)]/
sum(change_ave_yr_ALL[,ncol(change_ave_yr_ALL)]))*100, digits = 1)
a5 <- round((a4-a3), digits=1)
a6 <- round(((a4-a3)/a4)*100, digits=1)
desc_Stats <- rbind(desc_Stats, cbind(a1, a2, a3, a4, a5, a6))
}
colnames(desc_Stats) <- c("n","n(%)","%Prop.time1",
"%Prop.timeT", "Change", "%Change")
rownames(desc_Stats) <- seq_len(nrow(desc_Stats))
desc_Stats <- as.data.frame(cbind(group, desc_Stats))
attrib1 <- c("'n'->size (number of traj.); 'n(%)'->%size;
'%Prop.time1'->% proportion of obs. at time 1;
'%Prop.timeT'-> % proportion of obs. at time T;
'Change'-> absolute change in proportion between time1 and timeT;
'%Change'-> % change in proportion between time 1 and timeT")
#----------------------------------------------------
#To generate slope statistics
#required
sl_List <- NULL
time <- as.numeric(seq_len(ncol(dat)))
for(i in seq_len(nrow(dat))){ #i<-1
b <- coefficients(lm(as.numeric(as.character(dat[i,]))~
as.numeric(as.character(time))))
sl_List <- rbind(sl_List, cbind(as.numeric(b[1]),
as.numeric(b[2])))
}
sl_List <- as.data.frame(cbind(seq_len(nrow(sl_List)), sl_List))
colnames(sl_List) <- c("sn", "intersect","slope")
#---------------------------------------------------------------------
#Set the reference line
#if the reference is citywide average
if(reference == 1){
#calculating the citywide trend (mean)
ref_slope <- mean(as.vector(sl_List$slope))
ref_slope <- data.frame(cbind("City",
round(ref_slope, digits = 8)))
colnames(ref_slope) <- c("gr", "slope")
}
if(reference == 2){
#calculating the medoid of all slopes
ref_slope <- median(as.vector(sl_List$slope))
ref_slope <- data.frame(cbind("City",
round(ref_slope, digits = 8)))
colnames(ref_slope) <- c("gr", "slope")
}
if(reference == 3){
#horizontal line as the reference (slope = 0)
ref_slope <- 0
ref_slope <- data.frame(cbind("City",
round(ref_slope, digits = 8)))
colnames(ref_slope) <- c("gr", "slope")
}
#Generate the linear trendlines for all
#trajectories (dropping all intersects)
dat_slopp<- NULL
for(n in seq_len(nrow(sl_List))){ #k<-1
dat_slopp <- rbind(dat_slopp, (0 + (sl_List[n,3]*(seq_len(ncol(dat))))))
}
change_Stats <- NULL
for(d_ in seq_len(length(clusters_uni))){ #d_ <- 1
ids_ <- which(clusters==clusters_uni[d_])
slope_sign_ <- NULL
for(v in seq_len(2)){ #v=1
if(v==1){
all_1 <- round((length(which(dat_slopp[ids_, ncol(dat_slopp)]>
as.numeric(as.character(ref_slope$slope))))/
length(ids_))*100, digits = 1)
}
if(v==2){
all_2 <- round((length(which(dat_slopp[ids_, ncol(dat_slopp)]<
as.numeric(as.character(ref_slope$slope))))/
length(ids_))*100, digits = 1)
}
}
change_Stats <- rbind(change_Stats ,
cbind(d_, all_1, all_2))
}
colnames(change_Stats) <- c("sn","%+ve Traj.","%-ve Traj.")
rownames(change_Stats) <- seq_len(nrow(change_Stats))
change_Stats <- as.data.frame(cbind(group, change_Stats))
attrib2 <- c("'%+ve Traj.'-> % of trajectories with positive slopes;
'%+ve Traj.'-> % of trajectories with negative slopes")
#-----------------------------------------------------------------
#create the 'n_quant' intervals on each side of the reference line
#get the medoid of each group
gr_medoid <- NULL
for(h_ in seq_len(length(clusters_uni))){
gr_medoid <- rbind(gr_medoid, cbind(clusters_uni[h_],
round(median(as.vector(sl_List$slope)[which(clusters==clusters_uni[h_])]),
digits=8)))
}
gr_medoid <- data.frame(gr_medoid)
colnames(gr_medoid) <- c("gr","slope")
#determine the quantile in which each medoid falls
#first, collate all medoids that are falling
#relative to the reference line
temp_falling_ <-
gr_medoid[which(as.numeric(as.character(gr_medoid$slope)) <
as.numeric(as.character(ref_slope$slope))),]
#append the reference slope
all_f <- c(as.numeric(as.character(temp_falling_$slope)),
as.numeric(as.character(ref_slope$slope)))
#create the intervals
interv_ <- as.numeric(as.character(quantile(c(min(all_f), max(all_f)),
probs = seq(0, 1, 1/n_quant))))
#determine where medoid of each group falls
int_V <- findInterval(as.numeric(as.character(temp_falling_$slope)),
interv_, all.inside = TRUE)
intervals_fall <- (n_quant:1)[int_V]
#first, collate all medoids that are falling relative to the reference line
temp_rising_ <- gr_medoid[which(as.numeric(as.character(gr_medoid$slope))>=
as.numeric(as.character(ref_slope$slope))),]
#append the reference slope
all_r <- c(as.numeric(as.character(ref_slope$slope)),
as.numeric(as.character(temp_rising_$slope)))
#create the intervals
interv_ <- as.numeric(as.character(quantile(c(min(all_r),
max(all_r)) ,
probs = seq(0, 1, 1/n_quant))))
#determine where medoid of each group falls
intervals_rise <- findInterval(as.numeric(as.character(temp_rising_$slope)),
interv_, all.inside = TRUE)
rank_positive <- c("1st (+ve)", "2nd (+ve)", "3rd (+ve)",
"4th (+ve)", "5th (+ve)", "6th (+ve)",
"7th (+ve)", "8th (+ve)", "9th (+ve)",
"10th (+ve)")
rank_negative <- c("1st (-ve)", "2nd (-ve)", "3rd (-ve)",
"4th (-ve)", "5th (-ve)", "6th (-ve)",
"7th (-ve)", "8th (-ve)", "9th (-ve)",
"10th (-ve)")
class1<-rank_negative[intervals_fall]
class2<-rank_positive[intervals_rise]
class <- c(class1, class2)
class <- data.frame(class)
colnames(class) <- c(paste("Qtl:","1st-",n_quant, "th", sep=""))
if(n_quant==2){
colnames(class) <- c(paste("Qtl:","1st-",n_quant, "nd", sep=""))
}
if(n_quant==3){
colnames(class) <- c(paste("Qtl:","1st-",n_quant, "rd", sep=""))
}
change_Stats <- cbind(change_Stats, class)
all_statistics <- list(descriptive_stats = desc_Stats,
change_stats = change_Stats)
#-------------------
return(all_statistics)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.