Nothing
#' Fit a geographically weighted lasso with the selected bandwidth
#'
#' @param bw Bandwidth
#' @param x.var input matrix, of dimension nobs x nvars; each row is an observation vector. x should have 2 or more columns.
#' @param y.var response variable for the lasso
#' @param kernel the geographical kernel shape to compute the weight. passed to [GWmodel::gw.weight()]
#' Can be `gaussian`, `exponential`, `bisquare`, `tricube`, `boxcar`
#' @param dist.mat a distance matrix. can be generated by [compute_distance_matrix()]
#' @param alpha the elasticnet mixing parameter. set 1 for lasso, 0 for ridge. see [glmnet::glmnet()]
#' @param adaptive TRUE or FALSE Whether to perform an adaptive bandwidth search or not. A fixed bandwidth means that samples are selected if they fit a determined fixed radius around a location.
#' In a adaptive bandwidth, the radius around a location varies to gather a fixed number of samples around the investigated location
#' @param progress TRUE/FALSE whether to display a progress bar or not
#' @param nfolds the number f folds for the glmnet cross validation
#'
#' @return a `gwlfit` object containing a fitted Geographically weighted Lasso.
#' @export
#'
#' @examples
#'
#' predictors <- matrix(data = rnorm(2500), 50,50)
#' y_value <- sample(1:1000, 50)
#' coords <- data.frame("Lat" = rnorm(50), "Long" = rnorm(50))
#' distance_matrix <- compute_distance_matrix(coords)
#'
#' my.gwl.fit <- gwl_fit(bw = 20,
#' x.var = predictors,
#' y.var = y_value,
#' kernel = "bisquare",
#' dist.mat = distance_matrix,
#' alpha = 1,
#' adaptive = TRUE,
#' progress = TRUE,
#' nfolds = 5)
#'
#' my.gwl.fit
#'
#'
gwl_fit <- function(
bw,
x.var,
y.var,
kernel,
dist.mat,
alpha,
adaptive,
progress = TRUE,
nfolds = 5) {
if(progress){
pb <- progress::progress_bar$new(format = " Computing [:bar] :percent eta: :eta", total = nrow(x.var),
clear = FALSE, width = 60)
}
if(!methods::is(dist.mat, "gwl.dist")){
stop("dist.mat must be an object computed by compute_distance_matrix()")
}
dist.parameters <- list(method = dist.mat$method,
add.noise = dist.mat$noise)
coords <- dist.mat$coords
dist.mat <- dist.mat$dist.mat
coef.mat <- matrix(nrow = nrow(x.var), ncol = ncol(x.var) + 1) #+1 for the intercept
yhat <- lambda <- ymean <- numeric(nrow(x.var))
cols <- colnames(x.var)
x.var <- as.matrix(x.var)
model_list <- list()
for (j in 1:nrow(x.var)) {
# pt.j <- x.var[j, ]
# selecting the neighborhood corresponding to the bw
if(adaptive) index.j <- order(dist.mat[j, ])[1:bw]
if(!adaptive) index.j <- which(dist.mat[j, ] < bw)
# removing observation j from its neighborhood
index.j <- index.j[-1]
# data.j <- data[index.j, ]
x <- as.matrix(x.var[index.j, ])
y <- as.matrix(y.var[index.j])
ymean[j] <- mean(y.var[index.j])
# NA check
index.na <- as.vector(is.na(y))
if (sum(index.na) > 0) {
# data.j <- data.j[!index.na, ]
x <- x[!index.na, ]
y <- y[!index.na]
}
dist.j <- dist.mat[index.j, j]
weight.j <- as.vector(GWmodel::gw.weight(c(dist.j), bw, kernel, adaptive))
lasso.mod <- glmnet::cv.glmnet(x = x, y = y, family = "gaussian", weights = weight.j, nfolds = nfolds,
standardize = T, alpha = alpha, type.measure = "mse", offset = NULL, lambda = NULL)
coef.mat[j, ] <- as.vector(glmnet::coef.glmnet(lasso.mod, s = "lambda.min"))
yhat[j] = as.numeric(stats::predict(lasso.mod, newx = x.var[j, ], s = "lambda.min")[1])
lambda[j] <- lasso.mod$lambda.min
model_list[[j]] <- lasso.mod
if(progress){
pb$tick()
}
}
rmspe <- sqrt(mean((y.var - yhat)^2))
gwl_fitted <- list(coefs = coef.mat, yhat = yhat, yvar = y.var, ymean = ymean, lambda = lambda, rmspe = rmspe, bw = bw, models = model_list, coords = coords, dist.param = dist.parameters, cols = cols, adaptive = adaptive)
if(is.null(gwl_fitted$cols)) {
gwl_fitted$cols <- as.character(1:ncol(x.var))
}
class(gwl_fitted) <- "gwlfit"
return(gwl_fitted )
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.