########################################################################
#' Prediction at ungauged sites using region of influence and kriging
#'
#' Return the prediction of a local regression model using region of
#' influence (ROI) where the the residuals are further predicted
#' by kriging.
#'
#' @param x Data for trainging the model.
#'
#' @param xnew Data at new locations (Validation set).
#'
#' @param k Number of sites in the neighborhoods.
#'
#' @param trend Formula defining the trend.
#'
#' @param similarity Formula defining covariates used
#' for forming region of influence based on Euclidien distance.
#'
#' @param kriging Formula defining the spatial covariates .
#' used for predicting residuals with spatial correlation.
#'
#' @param model Variogram model. See \code{\link[gstat]{vgm}}
#'
#' @param ker Should a kernel be used in the local regression model.
#'
#' @param ... More arguments to pass to kriging function \code{\link{autoKrige}}
#'
#' @return
#'
#' \item{pred}{Prediction at new sites.}
#' \item{pred.se}{Standard deviation at new sites.}
#' \item{trend}{Part of the prediction attributed to trend}
#' \item{trend.se}{Standard deviation associated with the trend}
#' \item{fitted}{Fitted values (training sites)}
#' \item{fitted.se}{Standard deviaton of the fitted values}
#' \item{vgm}{Sample variogram.}
#' \item{model}{Fitted variogram model.}
#'
#' @export
#'
#' @examples
#' attach(canFlood)
#'
#' ## Transform data if necessary
#' xdf <- with(canFlood,
#' data.frame(y = log(L1),
#' area = scale(log(BASIN_AREA)),
#' wb = scale(log(WB_AREA_PC)),
#' stream = scale(log(STREAM_DEN)),
#' map = scale(log(PPTAVG_BAS)),
#' lon = ELON,
#' lat = ELAT))
#'
#' # Note that the coordinates were transform to a Euclidean space using
#' # multidimentional scaling
#'
#' ## select a validation and training set
#' vid <- runif(nrow(xdf)) > .8
#' tid <- !vid
#'
#' # formula of the relationship between flood quantile and descriptors
#' ftrend <- y ~ area + map + wb + stream
#' fsimilarity <- ~ area + map + wb + stream
#'
#' ## Fit a local regression model
#' fit <- FitRoi(x = xdf[tid,], xnew = xdf[vid,], k = 50,
#' ftrend, similarity = fsimilarity)
#' print(fit)
#' sd(xdf[vid,'y'] - fit$pred)
#'
#' ## Refit the model and perform the kriging of the residuals
#' fitk <- FitRoi(x = xdf[tid,], xnew = xdf[vid, ], k =50,
#' ftrend, loc = fsimilarity ,
#' kriging = ~ lon + lat)
#' print(fitk)
#' sd(xdf[vid,'y'] - fitk$trend)
#' sd(xdf[vid,'y'] - fitk$pred)
#'
#' ifold0 <- sample(floor((1:n)/(n+1)*fold)+1)
#'
#' ## Perform cross-validation. Long to compute
#' ## This speed up the computation as there is no search for a best model
#' f <- FitRoi.cv(x = xdf, k = seq(20,150, 10), fold = ifold0,
#' ftrend, similarity = ~ lon + lat,
#' verbose = TRUE, crit = 'mad')
#'
FitRoi <- function(x, xnew, k, trend, similarity, kriging = NULL, ker = TRUE, ...){
x <- as.data.frame(x)
xnew <- as.data.frame(xnew)
## Extract coordinate and compute euclidean distance
crd <- rbind(model.frame(similarity, x),
model.frame(similarity, xnew))
h <- as.matrix(dist(crd))
## Extract coordinate of the spatial model
if(!is.null(kriging))
crd2 <- rbind(model.frame(kriging,x),
model.frame(kriging,xnew))
## Create a vector of neighborhood size
if(length(k) == 1)
nk <- rep(k,nrow(h))
else
nk <- k
## Reformat the data
x <- data.frame(model.frame(trend,x))
xnew <- data.frame(model.frame(delete.response(terms(trend)), xnew))
train <- seq(nrow(x))
valid <- seq(nrow(x)+1,nrow(crd))
## compute mean at target
pred <- predSe <- rep(NA, length(valid))
for(ii in seq_along(valid)){
## ## Delineate a neighborhood
nn <- FindNearest(h[valid[ii],train], nk[valid[ii]])
## Fit local linear model. With weight if necessary.
if(ker){
ww <- 1-(h[valid[ii],nn]/max(h[valid[ii],nn]))^2
fit <- lm(trend, data = cbind(x[nn,],ww = ww),weights = ww)
} else{
fit <- lm(trend, data = x[nn,])
}
out <- predict(fit, xnew[ii,], se.fit = TRUE)
pred[ii] <- out$fit
predSe[ii] <- out$se.fit
}
ans <- list(call = list(k = k, npred = length(valid), nsite = length(train),
ker = ker, kriging = kriging, trend = trend,
similarity = similarity))
## create a list for the output
if(!is.null(kriging)){
## For all training set
yhat <- yhatSe <- rep(NA, length(train))
for(ii in train){
## Delineate a neighborhood
nn <- FindNearest(h[ii,train], nk[ii])
## Fit local linear model. With weight if necessary.
if(ker){
ww <- 1-(h[ii,nn]/max(h[ii,nn]))^2
fit <- lm(trend, data = cbind(x[nn,], ww = ww), weights = ww)
} else {
fit <- lm(trend, data = x[nn,])
}
out <- predict(fit, x[ii,], se.fit = TRUE)
yhat[ii] <- out$fit
yhatSe[ii] <- out$se.fit
}
## compute residuals
yres <- x[,1]-yhat
## Save known component
ans$trend <- pred
ans$trend.se <- predSe
ans$fitted <- yhat
ans$fitted.se <- yhatSe
ans$resid <- yres
## Extract the coordinates for the residuals
crd2 <- data.frame(yres = c(yres, rep(0,length(pred))), crd2)
sp::coordinates(crd2) <- kriging
## Perform simple kriging on the residuals
out <- automap::autoKrige(
yres~1, input_data = crd2[train,],
new_data = crd2[valid,], beta = 0, debug.level = 0,
...)
ans$pred <- pred + out$krige_output$var1.pred
ans$resid.se <- out$krige_output$var1.stdev
ans$pred.se <- sqrt(predSe^2 + ans$resid.se^2)
ans$vgm <- out$exp_var
ans$model <- out$var_model
} else {
ans$pred <- pred
ans$pred.se <- predSe
}
## return
class(ans) <- 'roi'
ans
}
FindNearest <- function(h,n) order(h)[seq(n)]
#' @export
print.roi <- function(obj){
cat('\n\nLocal regression\n')
cat('\nNumber of sites:', obj$call$nsite)
cat('\nNumber of targets:', obj$call$npred)
cat('\n\nROI:')
cat('\n Trend:', format(obj$call$trend))
cat('\n size:', sort(unique(obj$call$k)))
cat('\n Similarity', format(obj$call$similarity))
cat('\n Kernel:', format(obj$call$ker))
if(!is.null(obj$model)){
cat('\n\nKriging:')
cat('\n Coord:', format(obj$call$kriging),'\n\n')
print(as.data.frame(obj$model)[,1:4])
}
}
#' @export
FitRoi.cv <- function(x, k, trend, similarity,
kriging = NULL, ker = TRUE,
crit = 'mad', fold = 5,
verbose = TRUE, ...){
x <- as.data.frame(x)
## Get response variable
yhat <- x[,all.vars(trend)[1]]
n <- length(yhat)
## Create indexes of cross-validation group
if(length(fold) == 1){
ifold <- sample(floor((1:n)/(n+1)*fold)+1)
fold <- 1:nfold
} else {
ifold <- as.integer(factor(fold))
fold <- 1:length(unique(ifold))
}
## Function that compute the cross-validation criteria for a given
## neighborhood size
FunCv <- function(nk){
## Compute prediction by local regression
pred <- rep(NA,n)
for(jj in fold){
pred[ifold == jj] <-
FitRoi(x = x[ifold != jj,],
xnew = x[ifold == jj,],
k = nk,
trend = trend,
similarity = similarity,
kriging = kriging,
ker = ker, ...)$pred
}
## Compute cross-validation criteria
err <- yhat - pred
rerr <- yhat/pred - 1
list(pred = pred,
rmse = sqrt(mean(err^2)),
rrmse = sqrt(mean(rerr^2)),
nsh = 1 - sum(err^2)/sum((yhat-mean(yhat))^2),
mad = mean(abs(err)),
rmad = mean(abs(rerr)),
smad = 1 - sum(abs(err))/sum(abs(yhat-median(yhat))))
}
if(is.matrix(k))
ntry <- ncol(k)
else
ntry <- length(k)
cv <- data.frame(matrix(NA, ntry,6))
colnames(cv) <- c('rmse','rrmse','nsh','mad','rmad','smad')
best <- Inf
if(verbose)
bar <- txtProgressBar()
for(ii in seq_along(k)){
if(verbose)
setTxtProgressBar(bar, ii/ntry)
## Get the cross-validation criteria for every sites
if(is.matrix(k))
fii <- FunCv(k[ii,])
else
fii <- FunCv(k[ii])
cv[ii,] <- c(fii$rmse, fii$rrmse, fii$nsh,
fii$mad, fii$rmad, fii$smad)
## Keep the best model
if(crit == 'rmse')
cii <- fii$rmse
else if(crit == 'rrmse')
cii <- fii$rrmse
else if(crit == 'nsh')
cii <- fii$rrmse
else if(crit == 'mad')
cii <- fii$mad
else if(crit == 'rmad')
cii <- fii$rmad
else if(crit == 'smad')
cii <- fii$rmad
if(cii < best){
pred <- fii$pred
best <- cii
}
}
## return
ans <- list(cv = cbind(k,cv), pred = pred, crit = crit)
class(ans) <- 'roiCv'
ans
}
#' @export
print.roiCv <- function(obj){
lstCrit <- c('rmse','rrmse','nsh','mad','rmad','smad')
bid <- which(obj$crit == lstCrit)+1
best <- order(obj$cv[,bid])[1:3]
cat('\nCross-validation for local regression\n')
cat('\nBest 3 sizes of neighborhood\n')
print(obj$cv[best,])
}
#' @export
plot.roiCv <- function(obj,...){
lstCrit <- c('rmse','rrmse','nsh','mad','rmad','smad')
bid <- which(obj$crit == lstCrit)+1
best <- which.min(obj$cv[,bid])
form <- as.formula(paste(obj$crit,'~k'))
plot(form, obj$cv, type = 'l', ...)
points(obj$cv[best,1],obj$cv[best,bid], col = 2, pch = 16)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.