##########################################
### Function to derive smooth LUR model
##########################################
#' Function for deriving smooth land use regression (LUR) model
#'
#' \code{smoothLUR} fits a smooth land use regression (LUR) model
#' using the gam() function from the mgcv package. The procedure
#' is outlined in \insertCite{Fritsch2021smooth;textual}{smoothLUR}
#'
#' @aliases smoothLUR
#' @param data A data set which contains the dependent variable and
#' the potential predictors.
#' @param x A character vector stating the variable names of the
#' potential continuous predictors (names have to match the column
#' names of `data`).
#' @param x.discr A character vector stating the variable names of the
#' potential discrete predictors (names have to match the column
#' names of `data`).
#' @param spVar1 A character vector stating the variable name referring
#' to longitude (name has to match the column name of `data`).
#' @param spVar2 A character vector stating the variable name referring
#' to latitude (name has to match the column name of `data`).
#' @param y A character string indicating the name of the dependent
#' variable (name has to match the column name of `data`).
#' @param thresh A numeric value that indicates the maximum share of
#' zero values; if the share is exceeded, the corresponding potential
#' predictor is excluded.
#' @return An object of class `smoothLUR` with the following elements:
#'
#' \item{coefficients}{a vector containing the coefficient estimates}
#'
#' It has `...`, `...`, and `...` methods.
#'
#' @author Svenia Behm and Markus Fritsch
#' @export
#' @importFrom stats as.formula
#' @importFrom stats gaussian
#' @importFrom mgcv gam
#'
#' @seealso
#'
#' \code{\link{parLUR}} for parametric land use regression (LUR)
#' models.
#' \code{\link{kFoldCV}} for k-fold cross-validation for
#' parLUR and smoothLUR objects.
#'
#' @references
#' \insertAllCited{}
#'
#' @examples
#' \dontrun{
#' ## Load data set
#' data(monSitesDE, package="smoothLUR")
#' set.seed(42)
#'
#' ## Code example
#' dat <- monSitesDE[sample(1:nrow(monSitesDE), 40),]
#' m1 <- smoothLUR(data = dat
#' ,x = c("Lon", "Lat", "Alt", "HighDens"
#' ,"LowDens", "Ind", "Transp", "Seap", "Airp", "Constr"
#' ,"UrbGreen", "Agri", "Forest", "PopDens"
#' ,"PriRoad", "SecRoad", "FedAuto", "LocRoute")
#' ,spVar1 = "Lon"
#' ,spVar2 = "Lat"
#' ,y = "Y"
#' ,thresh = 0.95)
#'
#' summary(m1)
#' summary(m1)$adj.r.squared
#' BIC(m1)
#' AIC(m1)
#'
#' \donttest{
#' ## Load data set
#' data(monSitesDE, package="smoothLUR")
#' dat <- monSitesDE
#' m1 <- smoothLUR(data = dat,
#' ,x = c("Lon", "Lat", "Alt", "HighDens"
#' ,"LowDens", "Ind", "Transp", "Seap", "Airp", "Constr"
#' ,"UrbGreen", "Agri", "Forest", "PopDens"
#' ,"PriRoad", "SecRoad", "FedAuto", "LocRoute")
#' ,spVar1 = "Lon"
#' ,spVar2 = "Lat"
#' ,y = "Y"
#' ,thresh = 0.95)
#'
#' summary(m1)
#' summary(m1)$adj.r.squared
#' BIC(m1)
#' AIC(m1)
#'
#' }
#' }
smoothLUR <- function(
data
,x
,x.discr = NA
,spVar1
,spVar2
,y
,thresh = 0.95
){
if(!is.na(x.discr[1])){
names.dat <- c(y, x, x.discr)
} else{
names.dat <- c(y, x)
}
dat <- data.frame(subset(x = data, select = names.dat))
dat <- dat[, apply(X = dat, MARGIN = 2, FUN = function(x){ return(c(sum(x == 0)/length(x) < thresh))})]
dat.cont <- dat[, apply(X = dat, MARGIN = 2, FUN = function(x){ return(c(length(unique(x)) > 8))})]
# 9 parameters have to be estimated by default for each univariate thin plate regression spline
if(!is.na(x.discr[1])){
dat.discr <- dat[, x.discr]
dat <- cbind(dat.cont, dat.discr)
} else{
dat <- dat.cont
}
names.dat[!(names.dat %in% names(dat))]
predAdj <- x[x %in% names(dat)]
Y <- dat[, y]
if(!is.na(x.discr[1])){
X <- subset(x = dat, select = c(x[x %in% names(dat)], x.discr[x.discr %in% names(dat)]))
names.cont.tmp <- names(X)[!names(X) %in% c(spVar1, spVar2, x.discr)]
names.discr.tmp <- names(X)[!names(X) %in% c(spVar1, spVar2, x)]
form.tmp <- stats::as.formula(paste("Y ~ s(",spVar1, ",", spVar2,",k = -1, bs=\"tp\") + ", # bivariate spline for longitude and latitude always considered in the model
paste0("s(", names.cont.tmp, ",k=8, bs=\"tp\")", collapse = "+"), "+", # continuous predictor names
paste0(names.discr.tmp, collapse = "+"), # discrete predictor names
sep = ""))
} else{
X <- subset(x = dat, select = c(x[x %in% names(dat)]))
names.cont.tmp <- names(X)[!names(X) %in% c(spVar1, spVar2)]
form.tmp <- stats::as.formula(paste("Y ~ s(",spVar1, ",", spVar2,",k = -1, bs=\"tp\") + ", # bivariate spline for longitude and latitude always considered in the model
paste0("s(", names.cont.tmp, ",k=8, bs=\"tp\")", collapse = "+"), # continuous predictor names
sep = ""))
}
# to enable 10-fold CV for traffic/industrial sites the parameter k has to be reduced,
# otherwise the error message
# "Fehler in gam(formula = form.tmp, fit = TRUE, method = "P-ML", data = cbind(y, :
# Model has more coefficients than data"
# is returned
gam.tmp <- mgcv::gam(formula=form.tmp, fit=TRUE, method="P-ML", data=cbind(y,X), family=gaussian(),
weights=NULL, subset=NULL, offset=NULL, optimizer=c("outer", "newton"), scale=0,
select=TRUE, knots=NULL, sp=NULL, min.sp=NULL, H=NULL, gamma=1, paraPen=NULL, G=NULL)
# attr(gam.tmp, "class") <- "smoothLUR"
return(gam.tmp)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.