R/gwl_fit.R

Defines functions gwl_fit

Documented in gwl_fit

#' Fit a geographically weighted lasso with the selected bandwidth
#'
#' @param bw Bandwidth
#' @param x.var input matrix, of dimension nobs x nvars; each row is an observation vector. x should have 2 or more columns.
#' @param y.var response variable for the lasso
#' @param kernel the geographical kernel shape to compute the weight. passed to [GWmodel::gw.weight()]
#' Can be `gaussian`, `exponential`, `bisquare`, `tricube`, `boxcar`
#' @param dist.mat a distance matrix. can be generated by [compute_distance_matrix()]
#' @param alpha the elasticnet mixing parameter. set 1 for lasso, 0 for ridge. see [glmnet::glmnet()]
#' @param adaptive TRUE or FALSE Whether to perform an adaptive bandwidth search or not. A fixed bandwidth means that samples are selected if they fit a determined fixed radius around a location.
#' In a adaptive bandwidth, the radius around a location varies to gather a fixed number of samples around the investigated location
#' @param progress TRUE/FALSE whether to display a progress bar or not
#' @param nfolds the number f folds for the glmnet cross validation
#'
#' @return a `gwlfit` object containing a fitted Geographically weighted Lasso.
#' @export
#'
#' @examples
#' 
#' predictors <- matrix(data = rnorm(2500), 50,50)
#' y_value <- sample(1:1000, 50)
#' coords <- data.frame("Lat" = rnorm(50), "Long" = rnorm(50))
#' distance_matrix <- compute_distance_matrix(coords)
#' 
#' my.gwl.fit <- gwl_fit(bw = 20,
#'                       x.var = predictors, 
#'                       y.var = y_value,
#'                       kernel = "bisquare",
#'                       dist.mat = distance_matrix, 
#'                       alpha = 1, 
#'                       adaptive = TRUE, 
#'                       progress = TRUE,
#'                       nfolds = 5)
#' 
#' my.gwl.fit
#' 
#' 
gwl_fit <- function(
                       bw,
                       x.var,
                       y.var,
                       kernel,
                       dist.mat,
                       alpha,
                       adaptive,
                       progress = TRUE,
                       nfolds = 5) {


  if(progress){
    pb <- progress::progress_bar$new(format = "  Computing [:bar] :percent eta: :eta", total = nrow(x.var),
                                     clear = FALSE, width = 60)
  }
  
  if(!methods::is(dist.mat, "gwl.dist")){
    stop("dist.mat must be an object computed by compute_distance_matrix()")
  }
  
  dist.parameters <- list(method = dist.mat$method,
                          add.noise = dist.mat$noise)

    coords <- dist.mat$coords
    dist.mat <- dist.mat$dist.mat

  
  coef.mat <- matrix(nrow = nrow(x.var), ncol = ncol(x.var) + 1) #+1 for the intercept
  yhat <- lambda <- ymean <- numeric(nrow(x.var))
  
  cols <- colnames(x.var)
  x.var <- as.matrix(x.var)
  
  model_list <- list()

  for (j in 1:nrow(x.var)) {
    # pt.j <-  x.var[j, ]

    # selecting the neighborhood corresponding to the bw
    if(adaptive) index.j <- order(dist.mat[j, ])[1:bw]
    if(!adaptive) index.j <- which(dist.mat[j, ] < bw)

    # removing observation j from its neighborhood
    index.j <- index.j[-1]

    # data.j <- data[index.j, ]
    x <- as.matrix(x.var[index.j, ])
    y <- as.matrix(y.var[index.j])
    ymean[j] <- mean(y.var[index.j])

    # NA check
    index.na <- as.vector(is.na(y))
    if (sum(index.na) > 0) {
      # data.j <- data.j[!index.na, ]
      x <- x[!index.na, ]
      y <- y[!index.na]
    }

    dist.j <- dist.mat[index.j, j]
    weight.j <- as.vector(GWmodel::gw.weight(c(dist.j), bw, kernel, adaptive))

    lasso.mod <- glmnet::cv.glmnet(x = x, y = y, family = "gaussian", weights = weight.j, nfolds = nfolds,
                           standardize = T, alpha = alpha, type.measure = "mse", offset = NULL, lambda = NULL)

    coef.mat[j, ] <- as.vector(glmnet::coef.glmnet(lasso.mod, s = "lambda.min"))
    yhat[j] = as.numeric(stats::predict(lasso.mod, newx = x.var[j, ], s = "lambda.min")[1])
    lambda[j] <- lasso.mod$lambda.min
    
    model_list[[j]] <- lasso.mod

    if(progress){
      pb$tick()
    }

  }
  rmspe <- sqrt(mean((y.var - yhat)^2))
  
  
  
  gwl_fitted <- list(coefs = coef.mat, yhat = yhat, yvar = y.var, ymean = ymean, lambda = lambda, rmspe = rmspe, bw = bw, models = model_list, coords = coords, dist.param = dist.parameters, cols = cols, adaptive = adaptive)
  
  if(is.null(gwl_fitted$cols)) {
    gwl_fitted$cols <- as.character(1:ncol(x.var))
  }
  

  class(gwl_fitted) <- "gwlfit"

  return(gwl_fitted )
}

Try the GWlasso package in your browser

Any scripts or data that you put into this service are public.

GWlasso documentation built on April 3, 2025, 7:08 p.m.