R/regKrige.R

#' Prediction at new location using kriging and GAM trend.
#'
#'   The method performs a universal kriging estimate where the trend is
#'   described by a generalized additive model (GAM). The trend is not
#'   regularized (penalty) and must be chosen parsimoniously.
#'
#' @param form Formula defining the trend of the model.
#' @param x   Data for training the model.
#' @param xnew Data at new locations.
#' @param loc Formula defining the coordinates (eulidean distance).
#' @param ... Other arguments pass to \link{autoKrige}.
#'
#' @return
#'
#' \item{pred}{Prediction at new locations.}
#' \item{pred.se}{Standard deviation at new locations.}
#' \item{vgm}{Sample variogram.}
#' \item{model}{Fitted variogram model. See \link{vgm}.}
#'
#' @seealso \code{\link{nnBagging}}, \code{\link{roiKrige}}
#'
#' @export
#'
#' @examples
#'
#'  ## Gather info in on data.frame
#'  xd <- cbind( l1 = log(sapply(floodStream, mean)),
#'              floodVars, lon = floodCoord[,1], lat = floodCoord[,2])
#'  nsite <- nrow(xd)
#'
#'  ## identify a validation and a training set
#'  valid <- seq(nsite) %in% sample(seq(nsite), round(.2*nsite))
#'  train <- !valid
#'
#'  ## formula of GAM trend using spline
#'  library(splines)
#'  l1Form <- l1 ~ area + slope + elev + ns(map, df = 10) +
#'            ns(len, df = 12) + ns(wb, df = 8)
#'
#'  fit <- gamKrige(l1Form, x = xd[train,], xnew = xd[valid,], loc = ~lon+lat )
#'  print(fit)
#'  plot(fit)
#'  plotVario(fit)
#'  predict(fit)
#'
gamKrige <- function(form, x, xnew, loc,
                     model = c('Exp', 'Sph', 'Gau'), ...){

	x <- as.data.frame(x)
	xnew <- as.data.frame(xnew)
	## Extract coordinate information
	crd <- as.matrix(model.frame(loc,x))
	colnames(crd) <- paste('c',seq(ncol(crd)), sep = '')
	fl <- paste('~',paste(colnames(crd), collapse = ' + '))
	fl <- as.formula(fl)

	## Extract trend information
	x <- as.matrix(model.frame(form, x))
	colnames(x) <- c('y',paste('x',seq(ncol(x)-1),sep=''))
	ff <- paste(colnames(x)[-1], collapse = ' + ')
	ff <- paste(colnames(x)[1],'~', ff)
	ff <- as.formula(ff)

	## Extract new sites information
	crdNew <- as.matrix(model.frame(loc,xnew))
	colnames(crdNew) <- colnames(crd)
	xnew <- as.matrix(model.frame(form, xnew))
	colnames(xnew) <- colnames(x)

	## Transform in spDataFrame
	x <- data.frame(x, crd)
	sp::coordinates(x) <- fl

	xnew <- data.frame(xnew, crdNew)
	sp::coordinates(xnew) <- fl

	## Perform kriging
	out <- automap::autoKrige(ff, input_data = x,
														new_data = xnew, model = model,
								            debug.level = 0, ...)

	ans <- 	list(vgm = out$exp_var,
							 model = out$var_model,
							 pred = out$krige_output$var1.pred,
							 pred.se = out$krige_output$var1.stdev,
							 call = list(trend = 'gam', spatial = 'kriging'),
							 fitted = NULL, fitted.se = NULL,
							 trend = NULL, trend.se = NULL,
							 resid.se = NULL)


	class(ans) <- 'regKrige'
	ans
}



#############################################################################
#' Prediction at new location using neural network and modeling of the residuals
#'
#'  The method uses neural network and bagging to estimate a trend
#'  and then correct for spatial correlation in the residual.
#'  The method uses a single hidden layer network that is carried out by
#'  \link{nnet}. Initially, the input
#'  variables are transformed using a SVD decomposition and the trend
#'  is obtained by aggregating (average) several bootstrap samples.
#'  The residuals are further predicted using kriging or thin plate spline.
#'
#' @param form Formula defining the trend.
#' @param x  Data for training the model
#' @param xnew Data at new locations.
#' @param n Number of units
#' @param k Number of bootstrap sample
#' @param model Model for the residuals. Either 'tps' or
#'              variogram models (see \link{vgm}).
#' @param loc Formula providing the spatial coordinates
#' @param ... Other argument pass to \link{autoKrige}
#'
#' @return
#'
#' \item{pred}{Prediction at new locations.}
#' \item{vgm}{Sample variogram.}
#' \item{model}{Fitted variogram model. See \link{vgm}.}
#' \item{resid.se}{Standard deviations of the kriging.}
#'
#' @seealso \link{gamKrige}, \link{roiKrige}.
#'
#' @export
#'
#' @examples
#'
#' ## Gather info in on data.frame
#'  xd <- cbind( l1 = log(sapply(floodStream, mean)),
#'              floodVars, lon = floodCoord[,1], lat = floodCoord[,2])
#'  nsite <- nrow(xd)
#'
#'  ## identify a validation and a training set
#'  valid <- seq(nsite) %in% sample(seq(nsite), round(.2*nsite))
#'  train <- !valid
#'
#'  l1Form <- l1 ~ area + slope + elev + map + len + wb
#'
#'  fit <- nnKrige(l1Form, x = xd[train,], xnew = xd[valid,], n = 30,
#'                 model = c('Exp','Gau','Sph'), loc = ~lon+lat)
#'  print(fit)
#'  plot(fit)
#'  predict(fit)
#'
#'  fit <- nnKrige(l1Form, x = xd[train,], xnew = xd[valid,], n = 30,
#'                 model = 'tps', loc = ~lon+lat)
#'
#'  fit <- nnKrige(l1Form, x = xd[train,], xnew = xd[valid,], n = 30)
#'
#'
nnKrige<- function(form, x, xnew, n, k = 20,
											model = 'none', loc, ...){

	## if kriging will be used
	if(any(model != 'none')){
		## dataFrame of all coordinates
		crd <- rbind(model.frame(loc, x),
								 model.frame(loc, xnew))

	}

	## Reformat dataFrame
	x <- model.frame(form,x)
	nx <- nrow(x)
	xname <- colnames(x)

	u <- model.frame(delete.response(terms(form)),xnew)
	u <- cbind(0,u)
	colnames(u) <- xname

	## Pre-treatement of all the variables by SVD decomposition
	u <- rbind(x,u)
	u <- data.frame(u[,1],svd(u[,-1])$u)
	colnames(u) <- xname

	train <- seq(nx)
	valid <- seq(nx+1, nrow(u))

	## Predict neural networks on valid set using bagging
  pred <- matrix(NA, k,length(valid))
  yhat <- matrix(NA, k,length(train))
  for(ii in seq(k)){

  	sset <- sample(train, length(train), replace = TRUE)
    fit <- nnet::nnet(form, u[sset,], linout = TRUE, size = n,
							trace = FALSE)

    pred[ii,] <- predict(fit, u[valid,])
    yhat[ii,] <- predict(fit, u[train,])
  }

  ## Aggregate baggings
  pred <- colMeans(pred)
  yhat <- colMeans(yhat)

  ## compute residuals
  yres <- u[train,1] - yhat
  ans <- list(fitted = yhat, fitted.se = NULL, resid = yres,
              trend = pred, trend.se = NULL)

  if(any(model == 'tps')){

    ## predict residual using Thin plate spline
  	crd <- data.frame(yres = c(yres, pred), crd)
  	fit <- fields::Tps(crd[train,-1],crd[train,1])
  	eHat <- predict(fit, crd[valid,-1])

  	ans$pred.se <- NULL
  	ans$pred <- pred + eHat
  	ans$call <- list(trend = 'nnet', spatial = 'tps')

  } else if(any(model == 'none')){

    ## if residuals not predicted
  	ans$pred <- pred
  	ans$pred.se <- NULL
  	ans$call <- list(trend = 'nnet', spatial = 'none')

  } else if(all(model != 'none')){

    ## predict residuals using kriging
    crd <- data.frame(yres = c(yres, pred), crd)
    sp::coordinates(crd) <- loc

    out <- automap::autoKrige(yres~1, beta = 0, model = model,
    													input_data = crd[train,],
    													new_data = crd[valid,], debug.level = 0,
    													...)

    ans$pred <- pred + out$krige_output$var1.pred
    ans$resid.se <- out$krige_output$var1.stdev
    ans$pred.se <- NULL

    ans$vgm <- out$exp_var
    ans$model <- out$var_model
    ans$call <- list(trend = 'nnet', spatial = 'kriging')

  }

  class(ans) <- 'regKrige'
  return(ans)
}

#############################################################################
#' Prediction at new location using local regression and modeling of the residuals
#'
#' Return the prediction of a local regression model using region of influence (ROI).
#' The residuals are further predicted using kriging or thin plate spline.
#'
#' @param form Formula defining the trend.
#'
#' @param x Data for trainging the model.
#'
#' @param xnew Data at new locations. Validation set.
#'
#' @param nk Size of the neighborhoods.
#'
#' @param loc Formula. Covariates of the similarity between sites
#'   (euclidean distance). Used for delineation.
#'
#' @param grp Factor defining groups for calibrating \code{nk}.
#'
#' @param model Model for predicting the residuals. Could be 'tps' or
#'   a vector of variogram models. See \link{vgm}.
#'
#' @param loc.spatial Formula defining the spatial covariates (euclidean distance).
#'   Used for predicting residuals with spatial correlation.
#'
#' @param ker Logical, should weight be used to favor closer sites.
#' @param ...
#'
#' @return
#'
#' \item{pred}{Prediction at new sites.}
#' \item{pred.se}{Standard deviation at new sites.}
#' \item{trend}{Part of the prediction attributed to trend}
#' \item{trend.se}{Standard deviation associated with the trend}
#' \item{fitted}{Fitted values (training sites)}
#' \item{fitted.se}{Standard deviaton of the fitted values}
#' \item{vgm}{Sample variogram.}
#' \item{model}{Fitted variogram model. See \link{vgm}.}
#'
#' @seealso \link{gamKrige}, \link{nnKrige}
#'
#' @export
#'
#' @examples
#'
#'  # Gather info in on data.frame
#'  xd <- cbind( l1 = log(sapply(floodStream, mean)),
#'              floodVars, lon = floodCoord[,1], lat = floodCoord[,2])
#'  nsite <- nrow(xd)
#'
#'  # identify a validation and a training set
#'  valid <- seq(nsite) %in% sample(seq(nsite), round(.2*nsite))
#'  train <- !valid
#'
#'  l1Form <- l1 ~ area + slope + elev + map + len + wb
#'  nform <- ~ area + slope + elev + map + len + wb
#'
#'  # Using kriging
#'  fit <- roiKrige(l1Form, x = xd[train,], xnew = xd[valid,],
#'                  n = 50, ker = TRUE,
#'                 model = c('Exp','Gau','Sph'),
#'                 loc = nform, loc.spatial = ~lon+lat)
#'  print(fit)
#'  plot(fit)
#'  predict(fit)
#'
#'  # Without spatial component
#'  fit <- roiKrige(l1Form, x = xd[train,], xnew = xd[valid,],
#'                  n = 20, ker = FALSE, model = 'none', loc = nform)
#'
#'  # Using Thin plate spline
#'  fit <- roiKrige(l1Form, x = xd[train,], xnew = xd[valid,],
#'                  n = 20, loc = nform, model = 'tps',
#'                 loc.spatial = ~lon+lat)
#'
roiKrige <- function(form, x, xnew, nk, loc,
										 grp = NULL, model = 'none',
										 loc.spatial = NULL, ker = TRUE){

	## Extract coordinate and Euclidean distance
	crd <- rbind(model.frame(loc, x),
								 model.frame(loc, xnew))

	h <- as.matrix(dist(crd))

	if(is.null(model))
		model <- 'none'

	## Extract Coordinate of the spatial model
	if(any(model != 'none'))
	  crd2 <- rbind(model.frame(loc.spatial,x),
								model.frame(loc.spatial,xnew))

	## Grouping for neigborhood size
	if(!is.null(grp)){
		grp <- rbind(model.frame(grp, x),
								 model.frame(grp, xnew))

	}

  if(!is.null(grp) & length(nk) > 1)
	  nk <- nk[grp[,1]]
  else
	  nk <- rep(nk,nrow(h))


  ## Reformat the data
	x <- data.frame(model.frame(form,x))
	xnew <- data.frame(model.frame(delete.response(terms(form)), xnew))

	train <- seq(nrow(x))
	valid <- seq(nrow(x)+1,nrow(crd))

  ## compute mean at target
	pred <- predSe <- rep(NA, length(valid))

	for(ii in seq_along(valid)){

	  ## ## Delineate a neighborhood
		nn <- FindNearest(h[valid[ii],train], nk[valid[ii]])

		## Fit local linear model. With weight if necessary.
		if(ker){
			ww <- 1-(h[valid[ii],nn]/max(h[valid[ii],nn]))^2
			fit <- lm(form, data = cbind(x[nn,],ww = ww),weights = ww)
		} else{
			fit <- lm(form, data = x[nn,])
		}

		out <- predict(fit,xnew[ii,], se.fit = TRUE)
		pred[ii] <- out$fit
		predSe[ii] <- out$se.fit
	}

	## create a list for the output
	ans <- list()

	## compute fitting residuals if necessary
	if(any(model != 'none')){

		yhat <- yhatSe <- rep(NA, length(train))
		for(ii in train){

		  ## Delineate a neighborhood
			nn <- FindNearest(h[ii,train], nk[ii])

			## Fit local linear model. With weight if necessary.
			if(ker){
				ww <- 1-(h[ii,nn]/max(h[ii,nn]))^2
				fit <- lm(form, data = cbind(x[nn,], ww = ww), weights = ww)
			} else {
				fit <- lm(form, data = x[nn,])
			}

			out <- predict(fit, x[ii,], se.fit = TRUE)
			yhat[ii] <- out$fit
			yhatSe[ii] <- out$se.fit
		}

		## compute residuals
		yres <- x[,1]-yhat

		ans$trend <- pred
		ans$trend.se <- predSe
		ans$fitted <- yhat
		ans$fitted.se <- yhatSe
		ans$resid <- yres

		crd2 <- data.frame(yres = c(yres, rep(0,length(pred))), crd2)
	}

	## Perform the prediction of the residual
	if(any(model == 'tps')){

		suppressWarnings(fit <- fields::Tps(crd2[train,-1],crd2[train,1]))
		eHat <- predict(fit, crd2[valid,-1])

		ans$pred <- as.numeric(pred + eHat)
		ans$call <- list(trend = 'roi', spatial = 'tps')

	} else if(any(model == 'none')){
		ans$pred <- pred
		ans$pred.se <- predSe
		ans$call <- list(trend = 'roi', spatial = 'none')

	} else if(!any(model %in% c('none','tps'))){

		sp::coordinates(crd2) <- loc.spatial

		## Perform simple kriging on the residuals

		out <- automap::autoKrige(yres~1, beta = 0, model = model,
															input_data = crd2[train,],
															new_data = crd2[valid,], debug.level = 0)

		ans$pred <- pred + out$krige_output$var1.pred
		ans$resid.se <- out$krige_output$var1.stdev
		ans$pred.se <- sqrt(predSe^2 + ans$resid.se^2)

		ans$vgm <- out$exp_var
		ans$model <- out$var_model
		ans$call <- list(trend = 'roi', spatial = 'kriging')
	}

	class(ans) <- c('regKrige','list')
	ans
}

###############################################################################
## Utility functions
###############################################################################

#' @export
plot.regKrige <- function(obj, ...){

  if(obj$call$trend == 'gam'){
    plot(obj$pred, obj$se,
         xlab = 'Predicted values',
         ylab = 'Standard Deviation', ...)

  } else {
    sdev <- sd(obj$resid)
    plot(obj$fitted, obj$resid,
         xlab = 'Fitted values',
         ylab = 'Residuals', ...)
    abline(h=0)
    abline( h = c(-2,2) * sdev, lty = 3 )

  }

}

#' @export
plotVario <- function(x, ...) UseMethod('plotVario',x)

#' @export
plotVario.regKrige <- function(obj) plot(obj$vgm, obj$model)

#' @export
predict.regKrige <- function(obj){

  if(is.null(obj$pred.se))
    ans <- obj$pred
  else
    ans <- data.frame(pred = obj$pred, se = obj$pred.se)


  return(ans)
}

#' @export
fitted.regKrige <- function(obj) obj$fitted

#' @export
residuals.regKrige <- function(obj) obj$fitted


#' @export
print.regKrige <- function(obj){
  cat('\nNonparametric regression model using spatial component\n')
  cat('\nTrend   : ',  obj$call$trend)
  cat('\nSpatial : ',  obj$call$spatial)
  cat('\nNumber of sites', length(obj$pred))

}
#' @export
FindNearest <- function(h,n) order(h)[seq(n)]

#' @export
qqplot.regKrige <- function(obj){
  qqnorm(scale(fit$resid))
  abline(0,1)

}
martindurocher/floodRFA documentation built on June 5, 2019, 8:44 p.m.