Nothing
#' GWRLASSO: a hybrid model that uses the LASSO model for important variable selection and GWR model with exponential kernel for prediction at an unknown location based on the selected variables.
#'
#' @param data_sp A dataframe containing the response variable and the predictor variables, as well as the coordinates of the locations. In the dataframe, first column is the response variable (y), last two columns are coordinates i.e., Latitude and Longitudes and in between them is the set of predictor variables(X's).
#' @param bw A numeric value specifying the bandwidth parameter for the GWR model. It can be noted that, optimum bandwidth value can vary depending on the specific dataset and bandwidth parameter depends on the spatial pattern of the data
#' @param split_value Splitting value for dividing the dataset into training and testing sets, e.g. 0.7 or 0.8
#' @param exponential_kernel Spatial weight function of the GWR model, e.g. exponential_kernel
#' @param nfolds Specifies the number of folds to use in the cross-validation process for selecting the optimal value of the regularization parameter in LASSO
#' @return A list with the following components:
#' - `Important_vars `: Selected important variables based on LASSO model
#' - `Optimum_lamda`: Optimum lamda value obtained by CV approach
#' - `GWR_y_pred_test`: The GWR predictions at testing locations
#' - `R_square`: R-square value
#' - `rrmse`: relative root means square error value
#' - `mse`: mean squared error
#' - `mae`: mean absolute error
#' @examples
#' n<- 100
#' p<- 7
#' m<-sqrt(n)
#' id<-seq(1:n)
#' x<-matrix(runif(n*p), ncol=p)
#' e<-rnorm(n, mean=0, sd=1)
#' xy_grid<-expand.grid(c(1:m),c(1:m))
#' Latitude<-xy_grid[,1]
#' Longitude<-xy_grid[,2]
#' B0<-(Latitude+Longitude)/6
#' B1<-(Latitude/3)
#' B2<-(Longitude/3)
#' B3<-(2*Longitude)
#' B4<-2*(Latitude+Longitude)/6
#' B5<-(4*Longitude/3)
#' B6<-2*(Latitude+Longitude)/18
#' B7<-(4*Longitude/18)
#' y<-B0+(B1*x[,1])+(B2*x[,2])+(B3*x[,3])+(B4*x[,4])+(B5*x[,5])+(B6*x[,6])+(B7*x[,7])+e
#' data_sp<-data.frame(y,x,Latitude,Longitude)
#' GWRLASSO_exp<-GWRLASSO_exponential(data_sp,0.8,0.7,exponential_kernel,10)
#' @references
#' 1. Brunsdon, C., Fotheringham, A.S. and Charlton, M,E. (1996).Geographically weighted regression: a method for exploring spatial non-stationarity. Geogr Anal.28(4),281-298.<DOI:10.1111/j.1538-4632.1996.tb00936.x>.
#' 2. Friedman, J., Hastie, T. and Tibshirani, R. (2010). Regularization Paths for Generalized Linear Models via Coordinate Descent. Journal of Statistical Software,33(1),1-22.<DOI:10.18637/jss.v033.i01>.
#' 3. Wheeler, D. C. (2009).Simultaneous coefficient penalization and model selection in geographically weighted regression: the geographically weighted lasso.Environment and planning A, 41(3), 722-742.<DOI:10.1068/a40256>.
#' @export
#' @import qpdf
#' @import numbers
#' @import glmnet
#' @import Matrix
#' @importFrom stats cor dist model.matrix predict
GWRLASSO_exponential<- function(data_sp,bw,split_value,exponential_kernel,nfolds) {
# Step1: Generation of simulated spatial population with spatial coordinates
data_sp<-as.data.frame(data_sp)
# Step2: Split data into training and testing sets
train_idx <- sample(nrow(data_sp), split_value * nrow(data_sp))
train_data <- data_sp[train_idx,]
nrow(train_data)
test_data <- data_sp[-train_idx, ]
nrow(test_data)
x_train <-subset(train_data[,-c(1,ncol(train_data)-1,ncol(train_data))])
y_train <-train_data[,1]
x_test <- subset(test_data,select = -c(1,ncol(test_data)-1,ncol(test_data)))
y_test <- test_data[,1]
# Step3: Important variable selection using LASSO
# fit LASSO model for variable selection
train_data_LASSO<-subset(train_data,select = -c(ncol(train_data)-1,ncol(train_data)))
x_lasso <- model.matrix(y ~ ., train_data_LASSO)
y_lasso <- train_data_LASSO[, "y"]
### select important variables based on LASSO model
cvfit <- cv.glmnet(x_lasso, y_lasso, alpha = 1, standardize = TRUE)
coef_path <- predict(cvfit, type="coefficients", s=cvfit$lambda.min)
selected_vars <- row.names(coef_path)[which(coef_path != 0)[-1]] # exclude intercept
# Extracting important variables
imp_var_lasso <- x_train[, selected_vars]
# extract optimal lambda value
opt_lambda <- cvfit$lambda.min
# Step5 : Fitting of the GWR model on training data
coords_train<-cbind(train_data[,ncol(train_data)-1],train_data[,ncol(train_data)])
dists<- as.matrix(dist(coords_train))
# Define the Exponential kernel function
exponential_kernel <- function(dists, bw) {
exp(-(dists/bw))
}
weights <- exponential_kernel(dists, bw)
dim(weights)
Xt <- as.matrix(cbind(rep(1, nrow(x_train)),imp_var_lasso))
dim(Xt)
y_tr<- matrix(y_train)
dim(y_tr)
W.train<-list()
for (i in 1:ncol(weights)){
t <- diag(weights[,i],nrow=nrow(x_train), ncol=nrow(x_train))
W.train[[i]]<-t
}
W.train
Beta.train<-list()
for (i in 1:length(W.train)){
lm<- solve((t(Xt)%*%W.train[[i]]%*%Xt))%*%t(Xt)%*%W.train[[i]]%*%y_tr
Beta.train[[i]]<-lm
}
Beta.train
X.tr_row<-list()
for(i in 1:nrow(Xt)){
X.tr_row[[i]]<-(Xt[i,])
}
y_hat.train<-mapply("%*%", X.tr_row,Beta.train)
y_hat.train
# Step6 : Make predictions at the test locations
lasso_imp_X_test<-x_test[, selected_vars]
coords_test<-cbind(test_data[,ncol(test_data)-1],test_data[,ncol(train_data)])
dists_test <- as.matrix(dist(coords_test))
weights_test <- exponential_kernel(dists_test, bw)
dim(weights_test)
Xtest <- as.matrix(cbind(rep(1, nrow(x_test)),lasso_imp_X_test))
dim(Xtest)
ytest<- matrix(y_test)
dim(ytest)
W.test<-list()
for (i in 1:ncol(weights_test)){
test <- diag(weights_test[,i],nrow=nrow(x_test), ncol=nrow(x_test))
W.test[[i]]<-test
}
W.test
Beta.test<-list()
for (i in 1:length(W.test)){
lm_test<- solve((t(Xtest)%*%W.test[[i]]%*%Xtest))%*%t(Xtest)%*%W.test[[i]]%*%ytest
Beta.test[[i]]<-lm_test
}
Beta.test
X.test_row<-list()
for(i in 1:nrow(Xtest)){
X.test_row[[i]]<-(Xtest[i,])
}
y_hat.test<-mapply("%*%", X.test_row,Beta.test)
y_hat.test
# Step7 : Create summary output
# Compute accuracy measures
rrmse_test <- sqrt(mean((y_hat.test - y_test)^2)) / mean(y_test)
mae_test <- mean(abs(y_hat.test - y_test))
mse_test <- mean((y_hat.test - y_test)^2)
r2_test <- cor(y_hat.test, y_test)^2
# Return results
results_GWRLASSO <- list(Important_vars = selected_vars,
Optimum_lamda = opt_lambda,
GWR_y_pred_test = y_hat.test,
R_square = r2_test,
rrmse=rrmse_test,
mse = mse_test,
mae = mae_test)
return(results_GWRLASSO)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.