#######################################################################
#' Perform frequency analysis of a target site
#' using Region of influence (ROI) and Generalized least squares (GLS)
#'
#' Returns the fitting of the regression models at a target
#' site with the best calibration settings.
#'
#' @param form Formula or list of formulas to try.
#'
#' @param x Data frame containing all variables in the formulas.
#'
#' @param x0 Data.frame or list containing the input variables of
#' the target
#'
#' @param nk Scalar or vector providing a series of neighborhood sizes
#' to try.
#'
#' @param S Sampling covariance matrix for the output variable.
#'
#' @param distance Vector of distance between the donors and target
#'
#' @param criteria Criteria to select the best calibration. One of
#' 'gcv','vp0','avp', 'avpo'.
#'
#'
#' @param lambda Scalar that modified the penalty in the GCV criteria
#'
#' @param sigTol Lower limit for the model variance during the clalibration
#'
#' @param verbose If true a progress bar is printed.
#'
#' @details
#'
#' The GLS model was proposed by Stedinger and Takser (1985):
#' \deqn{y= X \beta + \eta + \epsilon}
#' where \eqn{\eta} and \eqn{\epsilon} are two independent terms of errors.
#' The total error is \eqn{\epsilon = \eta + \delta} and includes
#' respectively the error due to sampling, with known covariance
#' matrix \eqn{S}, and a iid term of error \eqn{\eta} with variance
#' \eqn{\sigma^2}.
#' Therefore, the covariance matrix of the total error is then given by
#' \deqn{ \Lambda = \sigma^2 I + S = \sigma^2 G.}
#'
#' For a given model error \eqn{\sigma^2}, the cholesky decomposition provide
#' the matrix decomposition \eqn{G^{-1} = UU'}.
#' Solving the initial GLS model with known model variance is then equivant
#' to solving the ordinary least-squares problem
#' \deqn{U'y = UX + \epsilon^\ast.}
#' A new estimation of the model error can be obtained by
#' \deqn{\hat\sigma^2 = (n-p)^{-1} \sum_{i =1 }^n \epsilon_i^\ast}
#' where \eqn{n} is the number of sites and \eqn{p} the number of
#' parameters.
#' Subsequent iterations are then performed to improved the global
#' fitting of the GLS model until \eqn{\sigma^2}
#' has converged.
#'
#'
#' @section References:
#'
#' Stedinger, J. R., & Tasker, G. D. (1985). Regional Hydrologic Analysis:
#' 1. Ordinary, Weighted, and Generalized Least Squares Compared.
#' Water Resources Research, 21(9), 1421–1432.
#' https://doi.org/10.1029/WR021i009p01421
#'
#' Reis, D. S., Stedinger, J. R., & Martins, E. S. (2005). Bayesian
#' generalized least squares regression with application to log
#' Pearson type 3 regional skew estimation. Water Resources Research,
#' 41(10), W10419. https://doi.org/10.1029/2004WR003445
#'
#' Kjeldsen, T. R., & Jones, D. A. (2009). An exploratory analysis of
#' error components in hydrological regression modeling. Water Resources
#' Research, 45(2), W02407. https://doi.org/10.1029/2007WR006283
#'
#' @export
#'
#' @examples
#' xd <- ungaugedDemoData()
#'
#' lform <- list(y ~ x,
#' y ~ x + I(x^2))
#'
#' fit <- roiGls( lform, xd$data[-1,], xd$data[1,],
#' S = xd$S[-1,-1],
#' distance = xd$distance[1,-1],
#' nk = seq(20,200,10))
#'
#' # Show best calibration settings
#' print(fit)
#'
#' # Summary of the model
#' summary(fit)
#' plot(fit)
roiGls <- function(form, ...) UseMethod('roiGls', form)
#' @export
#' @rdname roiGls
roiGls.formula <- function(form, x, x0, nk, S = NULL, distance = NULL,
criteria = 'vp0', lambda = 1, sigTol = 0,
verbose = FALSE){
## Extract data from formula
x <- model.frame(form,x)
xm <- model.matrix(form,x)
tt <- delete.response(terms(form))
xm0 <- as.numeric(model.matrix(tt, x0))
x0 <- model.frame(tt, x0)
names(x0) <- names(x)[-1]
nk <- sort(nk)
if(max(nk)> nrow(x))
stop('Some sizes are larger than the population')
## Default covariance is identity
if(is.null(S)) S <- diag(nrow(x))
else if( nrow(S) != nrow(x)) stop('Wrong size for S')
## Compute distance between output variables (if necessary)
if(is.null(distance)){
distance <- apply(x[,-1], 1, function(x1,x2){
sqrt(sum((x1-x2)^2))}, x2 = as.numeric(x0))
} else if(length(distance) != nrow(x)){
stop('Wrong size for distance')
}
nid <- order(distance)
## Create storage
y0 <- vp0 <- avpo <- avp <-
gcv <- sig2 <- matrix(NA,1,length(nk))
## Set monitoring bar
if(verbose) bar <- txtProgressBar(style = 3)
## Loop among ROI sizes
for(ii in 1:length(nk)){
sid <- nid[1:nk[ii]]
## Do not use a neigborhood with Infinite or NA distance
if(sum(!is.finite(distance[sid])) > 0) next
## Fit the GLS model
mdl <- lmFlood(form, x[sid,], S = S[sid,sid])
## Compute the criteria
y0[ii] <- sum(xm0 * coef(mdl))
sig2[ii] <- mdl$sig2
vp0[ii] <- mdl$sig2 +
crossprod(xm0, vcov(mdl,'beta') %*% xm0)
A <- tcrossprod(xm[sid,] %*% vcov(mdl,'beta'), xm[sid,] )
avp[ii] <- mdl$sig2 + mean(diag(A))
avpo[ii] <- avp[ii] - 2 * mdl$sig2 * mdl$rank/nk[ii]
gcv[ii] <- (nk[ii]/(nk[ii]- lambda * mdl$rank)^2 *
sum(residuals(mdl,'b')^2))
## Select a criteria
if(criteria == 'gcv')
crit = gcv[ii]
else if(criteria == 'avp')
crit = avp[ii]
else if(criteria == 'avpo')
crit = avpo[ii]
else if(criteria == 'vp0')
crit = vp0[ii]
## Keep the best model information
if(ii == 1){
bestMdl <- mdl
bestNeighbor <- sid
bestii <- ii
bestCrit <- crit
} else if(bestMdl$sig2 < sigTol & mdl$sig2 > sigTol){
bestMdl <- mdl
bestNeighbor <- sid
bestii <- ii
bestCrit <- crit
} else if(crit < bestCrit & mdl$sig2 > sigTol){
bestMdl <- mdl
bestNeighbor <- sid
bestii <- ii
bestCrit <- crit
}
## Monitoring
if(verbose) setTxtProgressBar(bar,ii/length(nk))
}
ans <- list(data = x, distance = distance, S = S,
mdl = bestMdl, ii = bestii, lambda = lambda,
critValue = bestCrit, criteria = criteria,
donor = bestNeighbor,
iiform = 1, form = list(form), sigTol = sigTol,
y0 = y0, x0 = x0, nk = nk, sig2 = sig2,
gcv = gcv, vp0 = vp0, avp = avp, avpo = avpo)
class(ans) <- 'roiGls'
return(ans)
}
#' @export
#' @rdname roiGls
roiGls.list <- function(form, x, x0, ...){
## Create storage
y0 <- sig2 <- gcv <- avp <- vp0 <- avpo <- list()
## Loop through formulas
for(ii in 1:length(form)){
## Fit a ROI-GLS
tmp <- roiGls(form[[ii]], x, x0, ...)
## Extract criteria
y0[[ii]] <- tmp$y0
sig2[[ii]] <- tmp$sig2
gcv[[ii]] <- tmp$gcv
avp[[ii]] <- tmp$avp
avpo[[ii]] <- tmp$avpo
vp0[[ii]] <- tmp$vp0
##update the best model
if(ii == 1){
ans <- tmp
iiform <- ii
}
else if(tmp$critValue < ans$critValue) {
ans <- tmp
iiform <- ii
}
}
## Keep info on best model
ans$form <- form
ans$iiform <- iiform
ans$data <- x
ans$x0 <- x0
## Reshape the list of criteria in matrix
nsize <- length(sig2[[1]])
if(nsize > 1){
ans$y0 <- matrix(unlist(y0), ncol = nsize, byrow = TRUE)
ans$sig2 <- matrix(unlist(sig2), ncol = nsize, byrow = TRUE)
ans$gcv <- matrix(unlist(gcv), ncol = nsize, byrow = TRUE)
ans$avp <- matrix(unlist(avp), ncol = nsize, byrow = TRUE)
ans$vp0 <- matrix(unlist(vp0), ncol = nsize, byrow = TRUE)
ans$avpo <- matrix(unlist(avpo), ncol = nsize, byrow = TRUE)
}
return(ans)
}
#################################################################
#' Update a ROI-GLS model
#'
#' Refit the ROI-GLS model with best calibration after changes.
#' Try to avoid to unnesssary fitting to identify the new best calibration
#' settings.
#'
#' @param obj A ROI-GLS model. Normally an output of function \code{roiGls}
#'
#' @param selectNk Neighborhood size to use. Must be in the list
#' of candidate sizes.
#'
#' @param selectForm Formula to use. Must be in the list of candidate formulas.
#' @param newCriteria Criteria to select the best calibration settings.
#'
#' @param newLambda New parameter \code{lambda} used for the GCV criteria.
#'
#' @param newSigTol New parameter \code{tol} for the model variance.
#'
#' @param newNk Vector of new neighborhood sizes to try.
#'
#' #' @param newForm List of New formulas to try.
#'
#' @param rmNk A ROI size to remove from the candidate sizes
#'
#' @param rmForm A formula to remove from the candidate formula
#'
#' @export
roiUpdate <- function(obj,...) UseMethod('roiUpdate',obj)
#' @export
#' @rdname roiUpdate
roiUpdate.list <- function(obj, ...){
bar <- txtProgressBar(style = 3)
setTxtProgressBar(bar,0)
for(kk in 1:length(obj)){
obj[[kk]] <- roiUpdate(obj[[kk]], ...)
setTxtProgressBar(bar,kk/length(obj))
}
return(obj)
}
#' @export
roiUpdate.roiGls <- function(obj, selectNk = NULL, selectForm = NULL,
newCriteria = NULL, newLambda = NULL, newSigTol = NULL,
newNk = NULL, newForm = NULL,
rmNk = NULL, rmForm = NULL)
{
## Update GCV with new lambda parameter
if(!is.null(newLambda)){
np <- sapply(obj$form, function(z) ncol(model.matrix(z,obj$data)))
for(jj in 1:nrow(obj$gcv)){
for(kk in 1:ncol(obj$gcv)){
obj$gcv[jj,kk] <- (obj$gcv[jj,kk] *
(obj$nk[kk] - obj$lambda * np[jj])^2 /
(obj$nk[kk] - newLambda * np[jj])^2)
}
}
obj$lambda <- newLambda
}#endif
## remove sizes form list
if(!is.null(rmNk)){
rmNkId <- which(rmNk == obj$nk)
obj$nk <- obj$nk[-rmNkId]
obj$y0 <- obj$y0[,-rmNkId]
obj$sig2 <- obj$sig2[,-rmNkId]
obj$gcv <- obj$gcv[,-rmNkId]
obj$avp <- obj$avp[,-rmNkId]
obj$vp0 <- obj$vp0[,-rmNkId]
obj$avpo <- obj$avpo[,-rmNkId]
}
## remove formulas from list
if(!is.null(rmForm)){
rmFormId <- which(sapply(obj$form, function(f) rmForm == f))
obj$form <- obj$form[-rmFormId]
obj$y0 <- obj$y0[-rmFormId,]
obj$sig2 <- obj$sig2[-rmFormId,]
obj$gcv <- obj$gcv[-rmFormId,]
obj$avp <- obj$avp[-rmFormId,]
obj$vp0 <- obj$vp0[-rmFormId,]
obj$avpo <- obj$avpo[-rmFormId,]
}
## Add new sizes to the list
if(!is.null(newNk)){
## Upgrade the storage for the selection criteria
obj$nk <- c(newNk,obj$nk)
newBlock <- matrix(NA,nrow(obj$sig2),length(newNk))
obj$y0 <- cbind(newBlock,obj$y0)
obj$sig2 <- cbind(newBlock,obj$sig2)
obj$gcv <- cbind(newBlock,obj$gcv)
obj$avp <- cbind(newBlock,obj$avp)
obj$vp0 <- cbind(newBlock,obj$vp0)
obj$avpo <- cbind(newBlock,obj$avpo)
}
## Add Formulas to the list
if(!is.null(newForm)){
if(!is.list(newForm)) newForm <- list(newForm)
obj$form <- append(newForm,obj$form)
newBlock <- matrix(NA,length(newForm),ncol(obj$sig2))
obj$y0 <- rbind(newBlock,obj$y0)
obj$sig2 <- rbind(newBlock,obj$sig2)
obj$gcv <- rbind(newBlock,obj$gcv)
obj$avp <- rbind(newBlock,obj$avp)
obj$vp0 <- rbind(newBlock,obj$vp0)
obj$avpo <- rbind(newBlock,obj$avpo)
}
## Fit models with new sizes and formulas
for(jj in 1:nrow(obj$sig2)){
for(kk in 1:ncol(obj$sig2)){
if(is.na(obj$sig2[jj,kk])){
new <- roiGls(obj$form[[jj]], x = obj$data, x0 = obj$x0,
nk = obj$nk[kk], sigTol = obj$sigTol,
distance = obj$distance, S = obj$S, verbose = FALSE,
criteria = obj$criteria, lambda = obj$lambda)
obj$y0[jj,kk] <- new$y0[1]
obj$sig2[jj,kk] <- new$sig2[1]
obj$gcv[jj,kk] <- new$gcv[1]
obj$avp[jj,kk] <- new$avp[1]
obj$vp0[jj,kk] <- new$vp0[1]
obj$avpo[jj,kk] <- new$avpo[1]
}# endif
}
}#endfor
## Extract the selection criteria
if(!is.null(newCriteria)) obj$criteria <- newCriteria
if(obj$criteria == 'gcv')
crit = obj$gcv
else if(obj$criteria == 'avp')
crit = obj$avp
else if(obj$criteria == 'vp0')
crit = obj$vp0
else if(obj$criteria == 'avpo')
crit = obj$avpo
## Avoid selecting a model with a near zero variance
if(!is.null(newSigTol)) obj$sigTol <- newSigTol
crit[obj$sig2 < obj$sigTol] <- Inf
## identify the new calibration setting
if(is.null(selectNk) & is.null(selectForm)){
## optimal
obj$iiform <- which.min(apply(crit,1,min))
obj$ii <- which.min(crit[obj$iiform,])
} else if(!is.null(selectNk) & !is.null(selectForm)){
## Both size and formula known
obj$iiform <- which(sapply(obj$form, function(f) selectForm == f))
obj$ii <- which(selectNk == obj$nk)
} else if(!is.null(selectNk)){
## Known ROI size
obj$ii <- which(selectNk == obj$nk)
obj$iiform <- which.min(crit[,obj$ii])
} else{
## Known formula
obj$iiform <- which(sapply(obj$form, function(f) selectForm == f))
obj$ii <- which.min(crit[obj$iiform,])
}
## refit the GLS model
new <- roiGls(obj$form[[obj$iiform]], x = obj$data, x0 = obj$x0,
nk = obj$nk[obj$ii], sigTol = obj$sigTol,
distance = obj$distance, S = obj$S, verbose = FALSE,
criteria = obj$criteria, lambda = obj$lambda)
## update the best model components
obj$mdl <- new$mdl
obj$critValue <- new$critValue
obj$donor <- new$donor
return(obj)
}
########################################################
#' Plot and summary diagnostic of ROI-GLS calibration
#'
#' The function \code{roiPlot} draw the prediction variance and
#' the GCV of the calibrated ROI-GLS model in respect of the number
#' of sites in the provided list \code{nk}.
#' Also, most of the function use to analyse the output of a \code{lm}
#' model avaible to analyse the GLS model in its OLS form.
#' See \code{\link{roiGLS}}.
#'
#' @param obj A ROI-GLS model. Normally output of function \code{roiGls}
#'
#' @param iiform Indices of the formula to plot. By default the
#' formula of the best calibration.
#'
#' @param ... Other parameter for the generic plot function
#'
#' @export
#'
#' @examples
#'
#' xd <- ungaugedDemoData()
#'
#' fit <- roiGls(y ~ x,
#' xd$data[-1,], xd$data[1,], S = xd$S[-1,-1],
#' distance = xd$distance[1,-1],
#' nk = seq(20,100,10))
#'
#' roiPlot(fit, type = 'l')
#'
#' print(fit)
#'
#' summary(fit)
#'
#' coef(fit)
#'
#' residuals(fit)
roiPlot <- function(obj, iiform = NULL, xlab = 'Number of sites',
ylab = 'Model error',
ylim = NULL, pos = 'topright',
type = NULL, ...){
if(is.null(iiform)) iiform <- obj$iiform
if(is.matrix(obj$gcv)){
obj$gcv <- obj$gcv[iiform,]
obj$vp0 <- obj$vp0[iiform,]
}
# find bounds for the graphic
if(is.null(ylim)){
yall <- c(obj$gcv, obj$vp0)
ylim <- c(0.95 * min(yall), 1.05 * max(yall))
}
## Plot the graphic
plot(obj$nk,obj$vp0, ylim = ylim, xlab = xlab, ylab = ylab ,
type = type, ...)
points(obj$nk,obj$gcv, col = 2, type = type)
abline(v = obj$nk[obj$ii], lty = 2, col = 3)
legend(pos, col = c(1,2), lty = rep(1,3),
legend = c('VP0', 'GCV'))
}
#' @export
#' @rdname roiPlot
summary.roiGls <- function(obj) {
print(summary(obj$mdl))
cat('\nSummary of the best ROI size\n')
## Extract the selection criteria
if(obj$criteria == 'gcv')
crit = obj$gcv
else if(obj$criteria == 'avp')
crit = obj$avp
else if(obj$criteria == 'vp0')
crit = obj$vp0
else if(obj$criteria == 'avpo')
crit = obj$avpo
oid <- order(crit[obj$iiform,])
if(length(oid) > 5) oid <- oid[1:5]
print(data.frame( Size = obj$nk[oid],
SD0 = sqrt(obj$vp0[obj$iiform,oid]),
GCV = obj$gcv[obj$iiform,oid],
AVP = obj$avp[obj$iiform,oid],
AVPO = obj$avpo[obj$iiform,oid]))
cat('\n Nearest sites\n')
print(obj$donor)
cat('\n Summary of the best formulas\n')
nid <- t(apply(crit,1,order))
fid <- order(apply(crit,1,min))
if(length(fid) > 5) fid <- fid[1:5]
nid <- nid[fid,1]
print(obj$form[fid])
if(length(fid) == 1){
print(data.frame( Size = obj$nk[nid],
SD0 = (sqrt(obj$vp0[fid,nid])),
GCV = (obj$gcv[fid,nid]),
AVP = (obj$avp[fid,nid]),
AVPO = (obj$avpo[fid,nid])))
} else{
print(data.frame( Size = obj$nk[nid],
SD0 = diag(sqrt(obj$vp0[fid,nid])),
GCV = diag(obj$gcv[fid,nid]),
AVP = diag(obj$avp[fid,nid]),
AVPO = diag(obj$avpo[fid,nid])))
}
}
##################################################################
#' Linear model with observational errors
#'
#' @export
#' @examples
#'
#' nn <- 1000
#' xx <- data.frame( x1 = runif(nn), x2 = runif(nn)) * 2
#'
#' S <- exp(-as.matrix(dist(xx)))
#'
#' sres <- mnormt::rmnorm(1, varcov = S)
#'
#' y <- 5 -2 * xx[,1] + 3 * xx[,2] + rnorm(nn) + sres
#'
#' fit <- lmFlood(y ~ 1+ x1 + x2, xx, S)
#'
#' summary(fit)
lmFlood <- function(form, x, S , tol = 1e-8){
## extract info from formula
x <- model.frame(form,x)
xm <- model.matrix(form,x)
n <- nrow(x)
vnames <- colnames(xm)
## Solve initial solution
fit <- lm(x[,1] ~ xm -1)
nedf <- n-fit$rank
## Fit an initial OLS model
sig2 <- sum(residuals(fit)^2)/nedf
sig2Old <- Inf
B <- NULL
## Iteration until model variance converge
while(abs(sig2-sig2Old) > tol & sig2 > tol){
B <- chol(solve(diag(nrow(S)) + S/sig2))
By <- as.numeric(B %*% x[,1])
Bx <- B %*% xm
## Fit OLS model
fit <- lm(By ~ Bx - 1)
## Update model variance
sig2Old <- sig2
sig2 <- sum(residuals(fit)^2)/nedf
}
## Keep model info
ans <- list(model = x,
formula = form,
rank = fit$rank,
df.residuals = nedf,
coefficients = coef(fit),
fitted.values = xm %*% coef(fit),
B = B,
qr = fit$qr,
residuals.b = residuals(fit),
fitted.b = fitted(fit),
sig2 = sig2,
S = S,
vcov = vcov(fit),
llik = as.numeric(logLik(fit)),
dev = deviance(fit))
ans$residuals <- x[,1] - ans$fitted.values
names(ans$coefficients) <- vnames
rownames(ans$vcov) <- colnames(ans$vcov) <- vnames
class(ans) <- 'lmFlood'
return(ans)
}
#' @export
fitted.lmFlood <- function(obj, type = 'working'){
if(type == 'working') ans <- obj$fitted.values
else if(type == 'b') ans <- obj$fitted.b
return(as.vector(ans))
}
#' @export
residuals.lmFlood <- function(obj, type = 'working'){
if(type == 'working') ans <- obj$residuals
else if(type == 'b') ans <- obj$residuals.b
else if(type == 'sb') ans <- obj$residuals.b/sqrt(obj$sig2)
else if(type == 'pearson'){
ans <- obj$residuals/sqrt(obj$sig2+ diag(obj$S))
}
return(as.vector(ans))
}
#' @export
vcov.lmFlood <- function(obj, type = 'total'){
if(type == 'total') ans <- obj$sig2 * diag(nrow(obj$S)) + obj$S
else if(type == 'sampling') ans <- obj$S
else if(type == 'model') ans <- obj$sig2 * diag(nrow(obj$S))
else if(type == 'beta') ans <- obj$vcov
return(ans)
}
predict.lmFlood <- function(obj, newData = NULL){
if(is.null(newData)){
ans <- fitted(obj,'working')
} else if(is.matrix(newData)) {
ans <- newData %*% coef(obj)
} else{
tt <- delete.response(terms(obj$form))
ans <- model.matrix(tt, as.list(newData)) %*% coef(obj)
}
return(as.vector(ans))
}
#' @export
print.lmFlood <- function(obj){
cat('\nGeneralized least squares fit of the flood model')
cat('\n\nModel:\n')
print(obj$formula)
n <- nrow(obj$model)
cat('\nStandard model error:', format(sqrt(obj$sig2), digits = 4))
cat('\nObs: ', n, ', DoF: ', n-obj$rank, '\n\n', sep = '')
}
#' @export
summary.lmFlood <- function(obj){
n <- nrow(obj$model)
p <- obj$rank
res <- summary(residuals(obj,'pearson'))
dev <- 1 - obj$dev /deviance(lm(I(obj$B %*% obj$model[,1]) ~ 1))
nhs <- 1 - var(residuals(obj))/var(obj$model[,1])
llik <- data.frame( AIC = 2 * (p - obj$llik) ,
BIC = log(n) * p - 2*obj$llik,
llik = obj$llik,
dev.Expl. = dev,
Nash = nhs)
beta <- coef(obj)
std <- sqrt(diag(obj$vcov))
tval <- beta / std
pval <- 1 - pt(abs(tval), nrow(obj$model) - obj$rank)
corr <- cov2cor(obj$vcov)
coef <- data.frame(Estimate = beta,
Std.Error = std,
t.value = tval,
p.value = pval)
shapiro <- shapiro.test(residuals(obj,'pearson'))
ans <- list(n = n, p = p,
res = res,
llik = llik,
coef = coef,
sig2 = obj$sig2,
corr = corr,
formula = obj$formula,
shapiro = shapiro)
class(ans) <- 'summary.lmFlood'
return(ans)
}
#' @export
print.summary.lmFlood <- function(obj){
cat('\nGeneralized least squares fit of the flood model')
cat('\n\nModel:\n')
print(obj$formula)
cat('\n')
print(obj$llik, digits = 4, row.names = FALSE)
cat('\n\nCoefficients:\n')
print(obj$coef, digits = 4)
cat('\n\nCorrelation:\n')
tmp <- format(round(obj$corr,2))
tmp[upper.tri(tmp)] <- ""
diag(tmp) <- ""
print(tmp[-1,-obj$p], quote = FALSE)
cat('\n\nResiduals:\n')
print(obj$res)
cat('\nStandard model error:', format(sqrt(obj$sig2), digits = 4))
cat('\nObs: ', obj$n, ', DoF: ', obj$n-obj$p, '\n\n', sep = '')
print(obj$shapiro)
}
#' @export
qqplot <- function(x, ...) UseMethod('qqplot', x)
#' @export
qqplot.default <- function(x) stats::qqplot(x, ...)
#' @export
qqplot.lmFlood <- function(x){
n <- length(x$residuals)
emp <- sort(scale(residuals(x)))
theo <- qnorm((1:n)/(n+1))
plot(theo, emp, ylab = 'Emprical quantiles', xlab = 'Theoritical quantiles')
abline(0,1, lty = 3)
}
#' @export
qqplot.roiGls <- function(x) qqplot(x$mdl)
#' @export
AIC.lmFlood <- function(obj, k = 2) k*p - 2 * obj$llik
#' @export
logLik.lmFlood <- function(obj) obj$llik
#' @export
plot.lmFlood <- function(obj , type = 'working',
ylab = 'Residuals', xlab = 'Fitted',
...) {
if(type %in% c('working','pearson')){
plot(fitted(obj,'working'), residuals(obj,'pearson'),
xlab = xlab, ylab = ylab,...)
abline(h=0, lty = 3)
} else {
plot(fitted(obj,'b'), residuals(obj,'sb'),
xlab = xlab, ylab = ylab,...)
abline(h=0, lty = 3)
}
}
#' @export
termplot.lmFlood <- function(obj, vid = 1, alpha = .05, ci = TRUE,
ylab = 'Partial residuals', xlab = NULL,
col = 'black', ...){
## Extract and sort the selected variable
xm <- model.matrix(obj$formula, obj$model)
oid <- order(xm[,vid])
x <- xm[oid,vid]
vname <- colnames(xm)[vid]
## Compute partial residuals and fitted values
beta <- coef(obj)[vid]
err <- residuals(obj)[oid]
hat <- beta * x
## Plot the results
if(is.null(xlab)) xlab <- vname
plot(x, hat + err, xlab = xlab, ylab = ylab, col = col)
lines(x, hat, col = col)
}
################################################################
#' Utility functions for roiGls
#'
#' Provide a link with the \code{\link{lmFlood}} model
#'
#' @export
plot.roiGls <- function(x, ...) plot(x$mdl,...)
#' @export
termplot <- function(model, ...) UseMethod('termplot',model)
#' @export
termplot.default <- function(model, ...) stats::termplot(model, ...)
#' @export
termplot.roiGls <- function(obj,...) termplot(obj$mdl, ...)
#' @export
coef.roiGls <- function(obj) coef(obj$mdl)
#' @export
residuals.roiGls <- function(obj,...) residuals(obj$mdl,...)
#' @export
fitted.roiGls <- function(obj,...) fitted(obj$mdl,...)
#' @export
AIC.roiGls <- function(obj,...) AIC(obj$mdl,...)
#' @export
predict.roiGls <- function(obj,...) predict(obj$mdl,...)
#' @export
confint.roiGls <- function(obj,...) confint(obj$mdl)
#' @export
vcov.roiGls <- function(obj,...) vcov(obj$mdl,...)
#' @export
print.roiGls <- function(obj, ...){
cat('\nROI model with', obj$nk[obj$ii], 'sites \n')
print(obj$form[[obj$iiform]])
cat('\nCenter at\n')
tt <- delete.response(terms(obj$form[[obj$iiform]]))
print(as.numeric(model.frame(tt,obj$x0)), digits = 4)
}
###############################################################
#' Performing ridge regression in GLS flood framework
#'
#' @export
ridge2Edf <- function(z, r) sum(z/(z+r))
#' @export
edf2Ridge <- function(z, edf){
## Fin and initial interval
l1 <- length(z); l0 <- 0
while(ridge2Edf(z,l1) > edf ){
l0 <- l1
l1 <- 2*l1
}
return(uniroot(function(r) edf - ridge2Edf(z,r), c(l0,l1) )$root)
}
#' @export
ridgeCoef <- function(beta, z, ridge) beta * z/(z+ridge)
#' @export
ridgeFlood <- function(form, x, S , criteria = 'gcv', ridge.val = NULL,
ridge.lim = NULL, edf.lim = NULL, ridge.n = 100,
tol = 1e-8){
## extract info from formula
x <- model.frame(form,x)
xm <- model.matrix(form,x)
n <- nrow(x)
p <- ncol(xm)
nedf <- n-p
ybar <- mean(x[,1])
y <- x[,1] - ybar
## Fit an initial OLS model
d <- svd(xm)
beta <- d$v %*% diag(1/d$d) %*% t(d$u) %*% x[,1]
w <- as.numeric(y - xm %*% beta)
sig2 <- sum(w*w)/nedf
sig2Old <- Inf
U <- NULL
## Loop throught iterative least-squares
while(abs(sig2-sig2Old) > tol & sig2 > tol){
U <- chol(solve(diag(nrow(S)) + S/sig2))
Uy <- as.numeric(U %*% y)
Ux <- U %*% xm
## Fit OLS model
d <- svd(Ux)
beta <- d$v %*% diag(1/d$d) %*% t(d$u) %*% Uy
w <- as.numeric(Uy - Ux %*% beta)
## Update model variance
sig2Old <- sig2
sig2 <- sum(w*w)/nedf
}
## Determine a list of ridge coefficients to evaluate
dsq <- d$d * d$d
if(criteria == 'hkb'){
ridge <- p * sig2 / sum(beta^2)
}
else if(criteria == 'gcv'){
## transform the edf in ridge coefficient
if(!is.null(ridge.val)){
ridge <- ridge.val
} else if(!is.null(ridge.lim)){
ridge <- seq(ridge.lim[1], ridge.lim[2], l = ridge.n)
} else if(!is.null(edf.lim)){
ridge <- seq(edf2Ridge(dsq, edf.lim[2]),
edf2Ridge(dsq, edf.lim[1]), l = ridge.n)
} else{
ridge <- seq(0, edf2Ridge(dsq,1), l = ridge.n)
}
}
## Seach for the best ridge coefficient
tab <- matrix(NA, length(ridge), 3 + p)
bid <- (1:p)+3
colnames(tab) <- c('ridge','edf','gcv', colnames(xm))
tab[,1] <- ridge
for(ii in 1:nrow(tab)){
beta0 <- ridgeCoef(beta, dsq, ridge[ii])
res0 <- Uy - Ux %*% beta0
edf0 <- ridge2Edf(dsq,ridge[ii])
gcv0 <- n/(n-edf0)^2 * sum(res0^2)
tab[ii,2] <- edf0
tab[ii,3] <- gcv0
tab[ii,bid] <- beta0
}
if(nrow(tab)== 1) oid <- 1
else oid <- which.min(tab[,'gcv'])
beta <- tab[oid,bid]
fitted <- ybar + xm %*% beta
resid <- x[,1] - fitted
edf <- tab[oid,'edf']
w <- as.numeric(Uy - Ux %*% beta)
sig2 <- sum(w*w)/(n-edf)
ans <- list(coefficients = beta,
fitted.values = fitted,
residuals = resid,
df.residuals = n - edf,
edf = edf,
rank = p,
model = x,
ridge.table = tab,
formula = form,
svd = d,
sig2 = sig2)
class(ans) <- c('list','ridgeFlood')
return(ans)
}
###############################################################
#' Managing list of roiGls object
#'
#' A list of \link{roiGLS} model output. The following functions
#' extract the common elements, the predicted values and the
#' coefficients.
#'
#' @param l List of roiGls output
#'
#' @param type String that provides the element to extract. One of
#' 'nk', 'sig2', 'gcv','avp','avpo','vp0' or 'formula.
#'
#' @param se Should the standard error be returned.
#'
#' @export
#' @examples
#'
#' xd <- ungaugedDemoData()
#'
#' mdls <- list()
#'
#' ## Perform leave-one-out cross validation
#' for(kk in 1:3){
#' mdls[[kk]] <- roiGls( y ~ x,
#' x = xd$data[-kk,], x0 = xd$data[kk,],
#' S = xd$S[-kk,-kk], distance = xd$distance[kk,-kk],
#' nk = seq(20,50,10))
#' }
#'
#' roiCoef(mdls)
#'
#' roiGet(mdls, 'vp0')
#' @export
roiGet <- function(l, type){
if( type == 'nk'){
ans <- sapply(l, function(z) z$nk[z$ii])
} else if(type == 'sig2'){
ans <- sapply(l, function(z) z$mdl$sig2[z$iiform,z$ii])
} else if(type == 'gcv'){
ans <- sapply(l, function(z) z$gcv[z$iiform,z$ii])
}else if(type == 'avp'){
ans <- sapply(l, function(z) z$avp[z$iiform,z$ii])
} else if(type == 'vp0'){
ans <- sapply(l, function(z) z$vp0[z$iiform,z$ii])
} else if(type == 'avpo'){
ans <- sapply(l, function(z) z$avpo[z$iiform,z$ii])
}else if(type == 'formula'){
ans <- sapply(l, function(z) z$form[[iiform]])
}else if(type == 'edf'){
ans <- sapply(l, function(z) z$mdl$edf)
}
return(ans)
}
#' @export
#' @rdname roiGet
roiPred <- function(l, se = FALSE){
ans <- sapply(l, function(z) z$y0[z$iiform,z$ii])
if(se == FALSE) return(ans)
spred <- sapply(l, function(z) sqrt(z$vp0[z$iiform,z$ii]))
return(data.frame(yhat = ans, se = spred))
}
#' @export
list2mat <- function(l, empty = NA){
lnames <- sapply(l, function(z) names(z))
unames <- unique(unlist(lnames))
ans <- matrix(empty,length(l),length(unames))
colnames(ans) <- unames
for(ii in 1:length(l)){
ans[ii,names(l[[ii]])] <- as.numeric(l[[ii]])
}
return(ans)
}
#' @export
#' @rdname roiGet
roiCoef <- function(l){
return(t(sapply(l, function(z) coef(z$mdl))))
}
######################################################################
#' Demo data for ungauged analysis
#'
#' Returns a list containing the at-site means and drainage area
#' at the logarithm scale; a matrix of distance between sites and
#' a matrix of sampling error assuming correlation from
#' exponential model. For illustration purpose only.
#'
#' @export
#'
ungaugedDemoData <- function(){
## Extract the site with descriptors
info <- canadaFlood$info
ams <- split(canadaFlood$ams$flow, canadaFlood$ams$id)
avg <- sapply(ams, mean)
## Approximation of the standard error at logarithm scale (delta method)
sigma <- sapply(ams,sd) / sqrt(sapply(ams,length))/ avg
## Compute geographical distance
coord <- info[,c('lon','lat')]
h <- geosphere::distm(coord,coord, geosphere::distHaversine)/1000
## Covariance matrix
S <- diag(sigma) %*% corModel(c(900,.12), h) %*% diag(sigma)
return(list(data = data.frame( y = log(avg), x = log(info$area)),
S = S,
distance = h))
}
###############################################################
#' Performing ridge regression in GLS flood framework
#'
#' #Experimental
#'
#' @export
ridgeFlood <- function(form, x, S , criteria = 'gcv', ridge.val = NULL,
ridge.lim = NULL, edf.lim = NULL, ridge.n = 100,
tol = 1e-8){
## extract info from formula
x <- model.frame(form,x)
xm <- model.matrix(form,x)
n <- nrow(x)
p <- ncol(xm)
nedf <- n-p
ybar <- mean(x[,1])
y <- x[,1] - ybar
## Fit an initial OLS model
d <- svd(xm)
beta <- d$v %*% diag(1/d$d) %*% t(d$u) %*% x[,1]
w <- as.numeric(y - xm %*% beta)
sig2 <- sum(w*w)/nedf
sig2Old <- Inf
U <- NULL
## Loop throught iterative least-squares
while(abs(sig2-sig2Old) > tol & sig2 > tol){
U <- chol(solve(diag(nrow(S)) + S/sig2))
Uy <- as.numeric(U %*% y)
Ux <- U %*% xm
## Fit OLS model
d <- svd(Ux)
beta <- d$v %*% diag(1/d$d) %*% t(d$u) %*% Uy
w <- as.numeric(Uy - Ux %*% beta)
## Update model variance
sig2Old <- sig2
sig2 <- sum(w*w)/nedf
}
## Determine a list of ridge coefficients to evaluate
dsq <- d$d * d$d
if(criteria == 'hkb'){
ridge <- p * sig2 / sum(beta^2)
}
else if(criteria == 'gcv'){
## transform the edf in ridge coefficient
if(!is.null(ridge.val)){
ridge <- ridge.val
} else if(!is.null(ridge.lim)){
ridge <- seq(ridge.lim[1], ridge.lim[2], l = ridge.n)
} else if(!is.null(edf.lim)){
ridge <- seq(edf2Ridge(dsq, edf.lim[2]),
edf2Ridge(dsq, edf.lim[1]), l = ridge.n)
} else{
ridge <- seq(0, edf2Ridge(dsq,1), l = ridge.n)
}
}
## Seach for the best ridge coefficient
tab <- matrix(NA, length(ridge), 3 + p)
bid <- (1:p)+3
colnames(tab) <- c('ridge','edf','gcv', colnames(xm))
tab[,1] <- ridge
for(ii in 1:nrow(tab)){
beta0 <- ridgeCoef(beta, dsq, ridge[ii])
res0 <- Uy - Ux %*% beta0
edf0 <- ridge2Edf(dsq,ridge[ii])
gcv0 <- n/(n-edf0)^2 * sum(res0^2)
tab[ii,2] <- edf0
tab[ii,3] <- gcv0
tab[ii,bid] <- beta0
}
if(nrow(tab)== 1) oid <- 1
else oid <- which.min(tab[,'gcv'])
beta <- tab[oid,bid]
fitted <- ybar + xm %*% beta
resid <- x[,1] - fitted
edf <- tab[oid,'edf']
w <- as.numeric(Uy - Ux %*% beta)
sig2 <- sum(w*w)/(n-edf)
ans <- list(coefficients = beta,
fitted.values = fitted,
residuals = resid,
df.residuals = n - edf,
edf = edf,
rank = p,
model = x,
ridge.table = tab,
formula = form,
svd = d,
sig2 = sig2)
class(ans) <- c('list','ridgeFlood')
return(ans)
}
#' @export
#' @rdname ridgeFlood
ridge2Edf <- function(z, r) sum(z/(z+r))
#' @export
#' @rdname ridgeFlood
edf2Ridge <- function(z, edf){
## Fin and initial interval
l1 <- length(z); l0 <- 0
while(ridge2Edf(z,l1) > edf ){
l0 <- l1
l1 <- 2*l1
}
return(uniroot(function(r) edf - ridge2Edf(z,r), c(l0,l1) )$root)
}
#' @export
#' @rdname ridgeFlood
ridgeCoef <- function(beta, z, ridge)
beta * z/(z+ridge)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.