#' Prediction at new location using kriging and GAM trend.
#'
#' The method performs a universal kriging estimate where the trend is
#' described by a generalized additive model (GAM). The trend is not
#' regularized (penalty) and must be chosen parsimoniously.
#'
#' @param form Formula defining the trend of the model.
#' @param x Data for training the model.
#' @param xnew Data at new locations.
#' @param loc Formula defining the coordinates (eulidean distance).
#' @param ... Other arguments pass to \link{autoKrige}.
#'
#' @return
#'
#' \item{pred}{Prediction at new locations.}
#' \item{pred.se}{Standard deviation at new locations.}
#' \item{vgm}{Sample variogram.}
#' \item{model}{Fitted variogram model. See \link{vgm}.}
#'
#' @seealso \code{\link{nnBagging}}, \code{\link{roiKrige}}
#'
#' @export
#'
#' @examples
#'
#' ## Gather info in on data.frame
#' xd <- cbind( l1 = log(sapply(floodStream, mean)),
#' floodVars, lon = floodCoord[,1], lat = floodCoord[,2])
#' nsite <- nrow(xd)
#'
#' ## identify a validation and a training set
#' valid <- seq(nsite) %in% sample(seq(nsite), round(.2*nsite))
#' train <- !valid
#'
#' ## formula of GAM trend using spline
#' library(splines)
#' l1Form <- l1 ~ area + slope + elev + ns(map, df = 10) +
#' ns(len, df = 12) + ns(wb, df = 8)
#'
#' fit <- gamKrige(l1Form, x = xd[train,], xnew = xd[valid,], loc = ~lon+lat )
#' print(fit)
#' plot(fit)
#' plotVario(fit)
#' predict(fit)
#'
gamKrige <- function(form, x, xnew, loc,
model = c('Exp', 'Sph', 'Gau'), ...){
x <- as.data.frame(x)
xnew <- as.data.frame(xnew)
## Extract coordinate information
crd <- as.matrix(model.frame(loc,x))
colnames(crd) <- paste('c',seq(ncol(crd)), sep = '')
fl <- paste('~',paste(colnames(crd), collapse = ' + '))
fl <- as.formula(fl)
## Extract trend information
x <- as.matrix(model.frame(form, x))
colnames(x) <- c('y',paste('x',seq(ncol(x)-1),sep=''))
ff <- paste(colnames(x)[-1], collapse = ' + ')
ff <- paste(colnames(x)[1],'~', ff)
ff <- as.formula(ff)
## Extract new sites information
crdNew <- as.matrix(model.frame(loc,xnew))
colnames(crdNew) <- colnames(crd)
xnew <- as.matrix(model.frame(form, xnew))
colnames(xnew) <- colnames(x)
## Transform in spDataFrame
x <- data.frame(x, crd)
sp::coordinates(x) <- fl
xnew <- data.frame(xnew, crdNew)
sp::coordinates(xnew) <- fl
## Perform kriging
out <- automap::autoKrige(ff, input_data = x,
new_data = xnew, model = model,
debug.level = 0, ...)
ans <- list(vgm = out$exp_var,
model = out$var_model,
pred = out$krige_output$var1.pred,
pred.se = out$krige_output$var1.stdev,
call = list(trend = 'gam', spatial = 'kriging'),
fitted = NULL, fitted.se = NULL,
trend = NULL, trend.se = NULL,
resid.se = NULL)
class(ans) <- 'regKrige'
ans
}
#############################################################################
#' Prediction at new location using neural network and modeling of the residuals
#'
#' The method uses neural network and bagging to estimate a trend
#' and then correct for spatial correlation in the residual.
#' The method uses a single hidden layer network that is carried out by
#' \link{nnet}. Initially, the input
#' variables are transformed using a SVD decomposition and the trend
#' is obtained by aggregating (average) several bootstrap samples.
#' The residuals are further predicted using kriging or thin plate spline.
#'
#' @param form Formula defining the trend.
#' @param x Data for training the model
#' @param xnew Data at new locations.
#' @param n Number of units
#' @param k Number of bootstrap sample
#' @param model Model for the residuals. Either 'tps' or
#' variogram models (see \link{vgm}).
#' @param loc Formula providing the spatial coordinates
#' @param ... Other argument pass to \link{autoKrige}
#'
#' @return
#'
#' \item{pred}{Prediction at new locations.}
#' \item{vgm}{Sample variogram.}
#' \item{model}{Fitted variogram model. See \link{vgm}.}
#' \item{resid.se}{Standard deviations of the kriging.}
#'
#' @seealso \link{gamKrige}, \link{roiKrige}.
#'
#' @export
#'
#' @examples
#'
#' ## Gather info in on data.frame
#' xd <- cbind( l1 = log(sapply(floodStream, mean)),
#' floodVars, lon = floodCoord[,1], lat = floodCoord[,2])
#' nsite <- nrow(xd)
#'
#' ## identify a validation and a training set
#' valid <- seq(nsite) %in% sample(seq(nsite), round(.2*nsite))
#' train <- !valid
#'
#' l1Form <- l1 ~ area + slope + elev + map + len + wb
#'
#' fit <- nnKrige(l1Form, x = xd[train,], xnew = xd[valid,], n = 30,
#' model = c('Exp','Gau','Sph'), loc = ~lon+lat)
#' print(fit)
#' plot(fit)
#' predict(fit)
#'
#' fit <- nnKrige(l1Form, x = xd[train,], xnew = xd[valid,], n = 30,
#' model = 'tps', loc = ~lon+lat)
#'
#' fit <- nnKrige(l1Form, x = xd[train,], xnew = xd[valid,], n = 30)
#'
#'
nnKrige<- function(form, x, xnew, n, k = 20,
model = 'none', loc, ...){
## if kriging will be used
if(any(model != 'none')){
## dataFrame of all coordinates
crd <- rbind(model.frame(loc, x),
model.frame(loc, xnew))
}
## Reformat dataFrame
x <- model.frame(form,x)
nx <- nrow(x)
xname <- colnames(x)
u <- model.frame(delete.response(terms(form)),xnew)
u <- cbind(0,u)
colnames(u) <- xname
## Pre-treatement of all the variables by SVD decomposition
u <- rbind(x,u)
u <- data.frame(u[,1],svd(u[,-1])$u)
colnames(u) <- xname
train <- seq(nx)
valid <- seq(nx+1, nrow(u))
## Predict neural networks on valid set using bagging
pred <- matrix(NA, k,length(valid))
yhat <- matrix(NA, k,length(train))
for(ii in seq(k)){
sset <- sample(train, length(train), replace = TRUE)
fit <- nnet::nnet(form, u[sset,], linout = TRUE, size = n,
trace = FALSE)
pred[ii,] <- predict(fit, u[valid,])
yhat[ii,] <- predict(fit, u[train,])
}
## Aggregate baggings
pred <- colMeans(pred)
yhat <- colMeans(yhat)
## compute residuals
yres <- u[train,1] - yhat
ans <- list(fitted = yhat, fitted.se = NULL, resid = yres,
trend = pred, trend.se = NULL)
if(any(model == 'tps')){
## predict residual using Thin plate spline
crd <- data.frame(yres = c(yres, pred), crd)
fit <- fields::Tps(crd[train,-1],crd[train,1])
eHat <- predict(fit, crd[valid,-1])
ans$pred.se <- NULL
ans$pred <- pred + eHat
ans$call <- list(trend = 'nnet', spatial = 'tps')
} else if(any(model == 'none')){
## if residuals not predicted
ans$pred <- pred
ans$pred.se <- NULL
ans$call <- list(trend = 'nnet', spatial = 'none')
} else if(all(model != 'none')){
## predict residuals using kriging
crd <- data.frame(yres = c(yres, pred), crd)
sp::coordinates(crd) <- loc
out <- automap::autoKrige(yres~1, beta = 0, model = model,
input_data = crd[train,],
new_data = crd[valid,], debug.level = 0,
...)
ans$pred <- pred + out$krige_output$var1.pred
ans$resid.se <- out$krige_output$var1.stdev
ans$pred.se <- NULL
ans$vgm <- out$exp_var
ans$model <- out$var_model
ans$call <- list(trend = 'nnet', spatial = 'kriging')
}
class(ans) <- 'regKrige'
return(ans)
}
#############################################################################
#' Prediction at new location using local regression and modeling of the residuals
#'
#' Return the prediction of a local regression model using region of influence (ROI).
#' The residuals are further predicted using kriging or thin plate spline.
#'
#' @param form Formula defining the trend.
#'
#' @param x Data for trainging the model.
#'
#' @param xnew Data at new locations. Validation set.
#'
#' @param nk Size of the neighborhoods.
#'
#' @param loc Formula. Covariates of the similarity between sites
#' (euclidean distance). Used for delineation.
#'
#' @param grp Factor defining groups for calibrating \code{nk}.
#'
#' @param model Model for predicting the residuals. Could be 'tps' or
#' a vector of variogram models. See \link{vgm}.
#'
#' @param loc.spatial Formula defining the spatial covariates (euclidean distance).
#' Used for predicting residuals with spatial correlation.
#'
#' @param ker Logical, should weight be used to favor closer sites.
#' @param ...
#'
#' @return
#'
#' \item{pred}{Prediction at new sites.}
#' \item{pred.se}{Standard deviation at new sites.}
#' \item{trend}{Part of the prediction attributed to trend}
#' \item{trend.se}{Standard deviation associated with the trend}
#' \item{fitted}{Fitted values (training sites)}
#' \item{fitted.se}{Standard deviaton of the fitted values}
#' \item{vgm}{Sample variogram.}
#' \item{model}{Fitted variogram model. See \link{vgm}.}
#'
#' @seealso \link{gamKrige}, \link{nnKrige}
#'
#' @export
#'
#' @examples
#'
#' # Gather info in on data.frame
#' xd <- cbind( l1 = log(sapply(floodStream, mean)),
#' floodVars, lon = floodCoord[,1], lat = floodCoord[,2])
#' nsite <- nrow(xd)
#'
#' # identify a validation and a training set
#' valid <- seq(nsite) %in% sample(seq(nsite), round(.2*nsite))
#' train <- !valid
#'
#' l1Form <- l1 ~ area + slope + elev + map + len + wb
#' nform <- ~ area + slope + elev + map + len + wb
#'
#' # Using kriging
#' fit <- roiKrige(l1Form, x = xd[train,], xnew = xd[valid,],
#' n = 50, ker = TRUE,
#' model = c('Exp','Gau','Sph'),
#' loc = nform, loc.spatial = ~lon+lat)
#' print(fit)
#' plot(fit)
#' predict(fit)
#'
#' # Without spatial component
#' fit <- roiKrige(l1Form, x = xd[train,], xnew = xd[valid,],
#' n = 20, ker = FALSE, model = 'none', loc = nform)
#'
#' # Using Thin plate spline
#' fit <- roiKrige(l1Form, x = xd[train,], xnew = xd[valid,],
#' n = 20, loc = nform, model = 'tps',
#' loc.spatial = ~lon+lat)
#'
roiKrige <- function(form, x, xnew, nk, loc,
grp = NULL, model = 'none',
loc.spatial = NULL, ker = TRUE){
## Extract coordinate and Euclidean distance
crd <- rbind(model.frame(loc, x),
model.frame(loc, xnew))
h <- as.matrix(dist(crd))
if(is.null(model))
model <- 'none'
## Extract Coordinate of the spatial model
if(any(model != 'none'))
crd2 <- rbind(model.frame(loc.spatial,x),
model.frame(loc.spatial,xnew))
## Grouping for neigborhood size
if(!is.null(grp)){
grp <- rbind(model.frame(grp, x),
model.frame(grp, xnew))
}
if(!is.null(grp) & length(nk) > 1)
nk <- nk[grp[,1]]
else
nk <- rep(nk,nrow(h))
## Reformat the data
x <- data.frame(model.frame(form,x))
xnew <- data.frame(model.frame(delete.response(terms(form)), xnew))
train <- seq(nrow(x))
valid <- seq(nrow(x)+1,nrow(crd))
## compute mean at target
pred <- predSe <- rep(NA, length(valid))
for(ii in seq_along(valid)){
## ## Delineate a neighborhood
nn <- FindNearest(h[valid[ii],train], nk[valid[ii]])
## Fit local linear model. With weight if necessary.
if(ker){
ww <- 1-(h[valid[ii],nn]/max(h[valid[ii],nn]))^2
fit <- lm(form, data = cbind(x[nn,],ww = ww),weights = ww)
} else{
fit <- lm(form, data = x[nn,])
}
out <- predict(fit,xnew[ii,], se.fit = TRUE)
pred[ii] <- out$fit
predSe[ii] <- out$se.fit
}
## create a list for the output
ans <- list()
## compute fitting residuals if necessary
if(any(model != 'none')){
yhat <- yhatSe <- rep(NA, length(train))
for(ii in train){
## Delineate a neighborhood
nn <- FindNearest(h[ii,train], nk[ii])
## Fit local linear model. With weight if necessary.
if(ker){
ww <- 1-(h[ii,nn]/max(h[ii,nn]))^2
fit <- lm(form, data = cbind(x[nn,], ww = ww), weights = ww)
} else {
fit <- lm(form, data = x[nn,])
}
out <- predict(fit, x[ii,], se.fit = TRUE)
yhat[ii] <- out$fit
yhatSe[ii] <- out$se.fit
}
## compute residuals
yres <- x[,1]-yhat
ans$trend <- pred
ans$trend.se <- predSe
ans$fitted <- yhat
ans$fitted.se <- yhatSe
ans$resid <- yres
crd2 <- data.frame(yres = c(yres, rep(0,length(pred))), crd2)
}
## Perform the prediction of the residual
if(any(model == 'tps')){
suppressWarnings(fit <- fields::Tps(crd2[train,-1],crd2[train,1]))
eHat <- predict(fit, crd2[valid,-1])
ans$pred <- as.numeric(pred + eHat)
ans$call <- list(trend = 'roi', spatial = 'tps')
} else if(any(model == 'none')){
ans$pred <- pred
ans$pred.se <- predSe
ans$call <- list(trend = 'roi', spatial = 'none')
} else if(!any(model %in% c('none','tps'))){
sp::coordinates(crd2) <- loc.spatial
## Perform simple kriging on the residuals
out <- automap::autoKrige(yres~1, beta = 0, model = model,
input_data = crd2[train,],
new_data = crd2[valid,], debug.level = 0)
ans$pred <- pred + out$krige_output$var1.pred
ans$resid.se <- out$krige_output$var1.stdev
ans$pred.se <- sqrt(predSe^2 + ans$resid.se^2)
ans$vgm <- out$exp_var
ans$model <- out$var_model
ans$call <- list(trend = 'roi', spatial = 'kriging')
}
class(ans) <- c('regKrige','list')
ans
}
###############################################################################
## Utility functions
###############################################################################
#' @export
plot.regKrige <- function(obj, ...){
if(obj$call$trend == 'gam'){
plot(obj$pred, obj$se,
xlab = 'Predicted values',
ylab = 'Standard Deviation', ...)
} else {
sdev <- sd(obj$resid)
plot(obj$fitted, obj$resid,
xlab = 'Fitted values',
ylab = 'Residuals', ...)
abline(h=0)
abline( h = c(-2,2) * sdev, lty = 3 )
}
}
#' @export
plotVario <- function(x, ...) UseMethod('plotVario',x)
#' @export
plotVario.regKrige <- function(obj) plot(obj$vgm, obj$model)
#' @export
predict.regKrige <- function(obj){
if(is.null(obj$pred.se))
ans <- obj$pred
else
ans <- data.frame(pred = obj$pred, se = obj$pred.se)
return(ans)
}
#' @export
fitted.regKrige <- function(obj) obj$fitted
#' @export
residuals.regKrige <- function(obj) obj$fitted
#' @export
print.regKrige <- function(obj){
cat('\nNonparametric regression model using spatial component\n')
cat('\nTrend : ', obj$call$trend)
cat('\nSpatial : ', obj$call$spatial)
cat('\nNumber of sites', length(obj$pred))
}
#' @export
FindNearest <- function(h,n) order(h)[seq(n)]
#' @export
qqplot.regKrige <- function(obj){
qqnorm(scale(fit$resid))
abline(0,1)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.