R/ssim_bandwidth.R

Defines functions ssim_bandwidth

Documented in ssim_bandwidth

#' Bandwidth ranges for the SSIM Index for polygon maps.
#'
#' This function calculates the range of the bandwidth for the SSIM index using the square root of N and optimal trade-off between bias and variance.
#'
#' @param shape A \code{sf} polygon containing the attributes that can create polygon-based maps.
#' @param map1 The name of the first map to compare as a column in the shape.
#' @param map2 The name of the second map to compare as a column in the shape.
#' @param max_bandwidth Maximum size of the bandwidth, and the maximum size needs to be larger than 12.
#' @param standardize If TRUE, standardize the variables before computing the SSIM. Default is TRUE.
#' @param option The option for selecting the range of the bandwidth derived from the optimal trade-off between bias and variance. Default is "midpoint."
#'
#' @return A plot showing the bias/variance trade-off and the range of the optimal trade-off as vertical lines including the square root of N results as well.
#' In addition, the console shows the results.
#'
#' @details This function calculates the bandwidth range for the SSIM index for polygon maps using Gaussian kernel weighting.
#' The bandwidth is calculated by two methods: 1) the square root of N and 2) the optimal trade-off between bias and variance.
#' Users can select the bandwidth values from the range of the optimal trade-off, which minimize the trade-off between bias and variance, generated by two maps.
#'
#' @importFrom ggplot2 ggplot geom_line geom_rect geom_vline labs geom_text
#' @importFrom dplyr select contains
#' @importFrom scales rescale
#' @importFrom stats sd var
#' @examples
#' # Load example sf class object Toronto Area with attributes for maps:
#' # Pampalon Index,CIMD Index,
#' # and percentage of household commuting within the same Census Sub-Division of residence.
#' data("Toronto")
#'
#' # Mapping two attributes
#' plot(Toronto$CIMD_SDD)
#' plot(Toronto$PP_SDD)
#'
#' # Execution of bandwidth with maps above
#' \donttest{
#' ssim_bandwidth(Toronto,"CIMD_SDD","PP_SDD",max_bandwidth=100)
#' }
#'
#' @export ssim_bandwidth


# Functions ---------------------------------------------------------------

ssim_bandwidth<-function(shape, map1,map2,max_bandwidth=max_bandwidth,standardize=TRUE,option="midpoint"){

  if(map1==map2){
    stop("variables are identical")
  }
  if(max_bandwidth < 12){
    stop("The maximum size of the bandwdith is too small to compute the SSIM index")
  }
  if(nrow(shape)<max_bandwidth){
    stop("The maximum size of the bandwdith is larger than the number of lattice")
  }

  if(standardize){
    shape_df<-as.data.frame(shape)
    shape_df$z_score_map1<-(shape_df[,map1]-mean(shape_df[,map1]))/sd(shape_df[,map1])
    shape_df$z_score_map2<-(shape_df[,map2]-mean(shape_df[,map2]))/sd(shape_df[,map2])
    min<-min(min(shape_df$z_score_map1),min(shape_df$z_score_map2))
    shape_df$z_score_map1<-shape_df$z_score_map1-min
    shape_df$z_score_map2<-shape_df$z_score_map2-min
    z_scores<-as.data.frame(cbind(shape_df$z_score_map1,shape_df$z_score_map2))
    colnames(z_scores)<- paste0("zscore",colnames(z_scores))
    names(z_scores)<- sub("V1",map1,names(z_scores))
    names(z_scores)<- sub("V2",map2,names(z_scores))
    map1<-as.character(colnames(z_scores[1]))
    map2<-as.character(colnames(z_scores[2]))
    shape_merged<-cbind(shape,z_scores)


    bw=seq(from=12,to=max_bandwidth,by=1)
    gwsses<-list()

    for(num in bw){
      result<-suppressWarnings(gwss_new(shape_merged,vars = c(map1,map2),bw=num))
      gwsses[[num]]<-result
    }
    df_t<-as.data.frame(matrix())

    for(i in bw){
      temp_df<-as.data.frame(gwsses[[i]])
      colnames(temp_df)<-c(paste(colnames(temp_df),i,sep="."))
      df_t<-cbind(df_t,temp_df)
    }
    df_t<-df_t[-1]

    map1_o<-shape_df$z_score_map1
    map2_o<-shape_df$z_score_map2
  }
  else{
    shape_df<-as.data.frame(shape)
    bw=seq(from=12,to=max_bandwidth,by=1)
    gwsses<-list()
    for(num in bw){
      result<-suppressWarnings(gwss_new(shape_merged,vars = c(map1,map2),bw=num))
      gwsses[[num]]<-result
    }
    df_t<-as.data.frame(matrix())

    for(i in bw){
      temp_df<-as.data.frame(gwsses[[i]])
      colnames(temp_df)<-c(paste(colnames(temp_df),i,sep="."))
      df_t<-cbind(df_t,temp_df)
    }
    df_t<-df_t[-1]
    map1_o<-shape_df$map1
    map2_o<-shape_df$map2
  }
  map1<-as.data.frame(dplyr::select(df_t,contains(map1)))
  map2<-as.data.frame(dplyr::select(df_t,contains(map2)))
  map1_LM<-dplyr::select(map1,contains('_LM'))
  map2_LM<-dplyr::select(map2,contains('_LM'))

  col_names<-paste(seq(from=12,to=max_bandwidth,by=1))
  colnames(map1_LM)<-col_names
  colnames(map2_LM)<-col_names
  df_bias_map1<-apply(map1_LM,2,function(x)(x-map1_o)^2)
  df_bias_map1<-as.data.frame(apply(df_bias_map1,2,mean))
  df_bias_map2<-apply(map2_LM,2,function(x)(x-map2_o)^2)
  df_bias_map2<-as.data.frame(apply(df_bias_map2,2, mean))
  df_variance_map1<-as.data.frame(apply(map1_LM,2,var))
  df_variance_map2<-as.data.frame(apply(map2_LM,2,var))
  bw_order<-as.data.frame(bw)

  df_bias_map1<-as.data.frame(cbind(bw_order,df_bias_map1))
  df_bias_map2<-as.data.frame(cbind(bw_order,df_bias_map2))
  df_variance_map1<-as.data.frame(cbind(bw_order,df_variance_map1))
  df_variance_map2<-as.data.frame(cbind(bw_order,df_variance_map2))
  colnames(df_bias_map1)<-c("Bandwidth","Bias")
  colnames(df_bias_map2)<-c("Bandwidth","Bias")
  colnames(df_variance_map1)<-c("Bandwidth","Variance")
  colnames(df_variance_map2)<-c("Bandwidth","Variance")
  df_bias_map1$Bias<-rescale(df_bias_map1$Bias)
  df_bias_map2$Bias<-rescale(df_bias_map2$Bias)
  df_variance_map1$Variance<-rescale(df_variance_map1$Variance)
  df_variance_map2$Variance<-rescale(df_variance_map2$Variance)

  P_Tradeoff_map1<-as.data.frame(df_bias_map1$Bias-df_variance_map1$Variance)
  Tradeoff_map1<-as.data.frame(cbind(bw_order,P_Tradeoff_map1))
  colnames(Tradeoff_map1)<-c("BW","Tradeoff")
  index_map1<-which.min(abs(Tradeoff_map1$Tradeoff))
  bw_closest_to_zero_map1<-Tradeoff_map1$BW[index_map1]
  bw_closest_to_zero_map1<-ceiling(bw_closest_to_zero_map1)

  P_Tradeoff_map2<-as.data.frame(df_bias_map2$Bias-df_variance_map2$Variance)
  Tradeoff_map2<-as.data.frame(cbind(bw_order,P_Tradeoff_map2))
  colnames(Tradeoff_map2)<-c("BW","Tradeoff")
  index_map2<-which.min(abs(Tradeoff_map2$Tradeoff))
  bw_closest_to_zero_map2<-Tradeoff_map2$BW[index_map2]
  bw_closest_to_zero_map2<-ceiling(bw_closest_to_zero_map2)

  num_rows <- nrow(shape)
  sqrt_num_rows <- ceiling(sqrt(num_rows))

  plot<-ggplot2::ggplot()+ggplot2::geom_line(data=df_bias_map1,ggplot2::aes(Bandwidth,Bias),color="dark blue")+ggplot2::geom_line(data=df_variance_map1,ggplot2::aes(Bandwidth,Variance),linetype="dashed",color="dark blue")+ggplot2::geom_line(data=df_bias_map2,ggplot2::aes(Bandwidth,Bias),color="dark green")+ggplot2::geom_line(data=df_variance_map2,ggplot2::aes(Bandwidth,Variance),linetype="dashed",color="dark green")
  plot<-plot+ggplot2::geom_rect(ggplot2::aes(xmin = min(bw_closest_to_zero_map1,bw_closest_to_zero_map2), xmax = max(bw_closest_to_zero_map1,bw_closest_to_zero_map2), ymin = 0, ymax = 1), fill = "grey", alpha = 0.5)
  plot<-plot+ggplot2::geom_vline(xintercept = sqrt_num_rows, color="black",linewidth=0.3, alpha = 0.5)+geom_vline(xintercept = bw_closest_to_zero_map1,color="red",linewidth=0.3, alpha = 0.5)+geom_vline(xintercept = bw_closest_to_zero_map2,color="red",linewidth=0.3, alpha = 0.5)+labs(x="Bandwidth",y="bias/variance")
  plot<-plot+ggplot2::geom_text(ggplot2::aes(x= sqrt_num_rows,y=0.5), label = as.character(sqrt_num_rows),vjust = -1,size=4)

  if(option=="midpoint"){
    result<- paste("Square root N:", sqrt_num_rows,"Bias-Variance Trade-off:",ceiling(mean(c(bw_closest_to_zero_map1,bw_closest_to_zero_map2))),sep = " ")
  }

  else if(option=="lower"){
    result<- paste("Square root N:", sqrt_num_rows,"Bias-Variance Trade-off:",min(c(bw_closest_to_zero_map1,bw_closest_to_zero_map2)),sep = " ")
  }

  else if(option=="upper"){
    result<- paste("Square root N:", sqrt_num_rows,"Bias-Variance Trade-off:",max(c(bw_closest_to_zero_map1,bw_closest_to_zero_map2)),sep = " ")
  }

  else{
    result<-"Invalid option"
  }
  cat(result)
  return(plot)

}

Try the SSIMmap package in your browser

Any scripts or data that you put into this service are public.

SSIMmap documentation built on Sept. 27, 2023, 1:07 a.m.