#############################################################################
#' Simulation of a regional dataset with intersite correlation.
#'
#' Returns a dataset containing multiple time series in the form of
#' a matrix where the sites are in columns.
#' Different record lengths can be specified for each site
#' and missing values are filled accordingly at the beginning.
#' The rows (time) are independent and the intersite correlation model
#' is based on a multivariate Normal distribution.
#'
#' @param x,obj Matrix (in rows) of parameters or L-moments for all sites to simulate.
#' Can also be a pooling group or intersite model, where key arguments are extracted
#' from the input object.
#'
#' @param distr Marginal distribution of each site.
#' If only one value is passed as argument, the same is used for all sites.
#'
#' @param nrec Record lengths of the sites.
#' If only one value is passed in argument, the same is used for all sites.
#'
#' @param corr Correlation matrix for the dependence between site.
#' If only one value is passed, the correlation is assumed
#' the for every pair of sites.
#'
#' @param corr.sqrt Squared correlation matrix. Can be passed to speed up
#' multiple calls.
#'
#' @param lmom Logical. Is the argument `x` a matrix of L-moments or
#' distribution parameters
#'
#' @param lscale Logical. Is the second L-moments the scale (`TRUE`)
#' or the LCV (`FALSE`).
#'
#' @param long Logical. Should the output be returned in a long format.
#'
#' @export
#'
#' @examples
#'
#' ## Extract data
#' data(canFlood)
#' lmom <- canFlood[1:5, c('L1','LCV','LSK')]
#'
#' ## Simulate data base on at-site L-moments
#' sim <- RegSim(lmom, distr = 'gev', nrec = 11:15, corr = .4)
#'
#' head(sim)
#'
#' sim <- RegSim(lmom,
#' distr = c(rep('gev',4),'gno'),
#' nrec = 10:15,
#' corr = .4, long = TRUE)
#' head(sim)
#'
#' ############################################################################
RegSim <- function(x, ...) UseMethod('RegSim', x)
#' @export
RegSim.data.frame <- function(x, ...)
RegSim.matrix(as.matrix(x), ...)
#' @export
#' @rdname RegSim
RegSim.matrix <-
function(
x,
distr,
nrec,
corr = 0,
corr.sqrt = FALSE,
lmom = TRUE,
lscale = FALSE,
long = FALSE)
{
## Extract info
nsite <- nrow(x)
nrec.max <- max(nrec)
## Expend distr if constant
if(length(distr) == 1)
distr <- rep(distr, nsite)
## Create a matrix if only a scalar is passed as correlation
if (length(corr) == 1){
corr <- matrix(corr, nsite, nsite)
diag(corr) <- 1
}
## Create a matrix if only a scalar is passed as correlation
if (length(nrec) == 1){
nrec <- rep(nrec,nsite)
}
## Decomposed the correlation matrix if necessary
if( any(is.na(corr)))
stop('Missing value in the correlation matrix')
if (corr.sqrt){
corr.L <- corr
} else {
corr.L <- chol(corr)
}
## Simulate a normal copula
u <- matrix(rnorm(nrec.max * nsite), nrec.max, nsite)
u <- pnorm(u %*% corr.L)
## ------------------------------------------ ##
## Convert L-moment to parameter if necessary
## ------------------------------------------ ##
if (lmom){
## Extract list of functions that convert L-moments to parameter
ffunc <- lapply(distr,
function(d) getFromNamespace(paste0('pel',d),'lmom'))
## If LCV is passed correct the 2nd L-moments
if(lscale == FALSE)
x[,2] <- x[,2] * x[,1]
## Estimate parameters for every site
para <- lapply(1:nrow(x), function(kk) ffunc[[kk]](x[kk, ]))
} else {
## If parameter were passed as matrix, transform to list
para <- lapply(split(x,1:nrow(x)), na.omit)
}
## Extract a list of quantile functions for each site
qfunc <- lapply(distr, function(d){
getFromNamespace(paste0('qua',d),'lmom')
})
## Transform uniform data to proper marginal distribution
ans <- sapply(1:nsite, function(kk){
qfunc[[kk]](u[,kk], para[[kk]])
})
## Adjust record length by adding NA to early data
Rlen <- function(x,n){
nrm <- length(x)-n
if(nrm > 0)
x[1:nrm] <- NA
return(x)
}
ans <- sapply(1:nsite, function(kk) Rlen(ans[,kk], nrec[kk]))
## ------------------------------------- ##
## Transform the data in the long format
## ------------------------------------- ##
if(long){
ans <- cbind(expand.grid(time = 1:nrow(ans), site = 1:ncol(ans)),
value = as.numeric(ans))
ans <- ans[!is.na(ans$value), ]
}
return(ans)
}
#' @export
#' @rdname RegSim
RegSim.intersite <- function(obj, distr, n = 1)
{
## Extract dimension
nsite <- nrow(obj$lmom)
nmax <- max(obj$nrec)
## Pre decompose the correlation matrix for faster simulation
corr.L <- chol(obj$corr)
## -------------------- ##
## Perform simulation
## -------------------- ##
if(n == 1){
## Return a matrix if only one dataset is simulated
ans<- RegSim(obj$lmom,
distr = distr,
nrec = obj$nrec,
corr = corr.L,
corr.sqrt = TRUE)
} else {
ans <- replicate(n, RegSim(obj$lmom,
distr = distr,
nrec = obj$nrec,
corr = corr.L,
corr.sqrt = TRUE))
}
return(ans)
}
#' @export
#' @rdname RegSim
RegSim.poolgrp <- function(obj, n = 1, long = FALSE, ...)
{
nsite <- nrow(obj$lmom)
nmax <- max(obj$nrec)
## ---------------------------------------------------------- ##
## Pre decompose the correlation matrix for faster simulation
## ---------------------------------------------------------- ##
if (obj$inter == 'avg'){
## Create a matrix with constant correlation coefficients
cor0 <- matrix(obj$inter.avg, nsite, nsite)
diag(cor0) <- 1
cor0 <-as.matrix(Matrix::nearPD(cor0, corr = TRUE)$mat)
} else{
cor0 <- obj$inter.corr
}
corr.L <- chol(cor0)
## ---------------------------------------------------------- ##
## Perform simulation
## ---------------------------------------------------------- ##
lmom0 <- t(replicate(nsite,obj$rlmom))
Sim1 <- function(){
out <- RegSim(lmom0, distr = obj$distr, nrec = obj$nrec,
corr = corr.L, corr.sqrt = TRUE, long = FALSE)
mapply('*', as.data.frame(out), obj$lmom[,1])
}
ans <- replicate(n, Sim1())
colnames(ans) <- obj$id
## Case there is one simulation
if(n == 1)
ans <- ans[ , , 1]
if (n == 1 & long){
ids <- expand.grid(1:nrow(ans),
1:ncol(ans))[,2:1]
ans <- cbind(ids, as.numeric(ans))
colnames(ans) <- c('site','time','value')
}
## Case
if (n >1 & long) {
ids <- expand.grid(1:nrow(ans),
1:ncol(ans),
1:n)[,3:1]
ans <- cbind(ids, as.numeric(ans))
ans <- na.omit(ans)
colnames(ans) <- c('rep','site','time','value')
}
return(ans)
}
############################################################################
#' Modeling intersite correlation
#'
#' Returns an intersite correlation model.
#' The function computes the sample L-moments and estimates the empirical
#' intersite correlation matrix between sites.
#' A power exponential model can be fitted to smooth the intersite correlations
#' in respect with the distance.
#'
#' @param form Formula that describes the response value, sites and time variables.
#' Must have the form \code{value ~ site + time}.
#'
#' @param x Data. If not specify by a formula, the columns must be respectively :
#' value, site and time.
#'
#' @param distance Matrix of distances. Must be of the same order as the sites
#' appear in x.
#'
#' @param nmin Minimal number of paired observations for computing
#' the empirical correlation, otherwise no correlation is assumed.
#'
#' @param smooth Logical. Should the intersite correlation matrix be smoothed by
#' a power exponential model.
#'
#' @param smooth.p Exponent of the power exponential model.
#'
#' @param smooth.hmax Maximal distance used for fitting the power exponential model.
#'
#' @param nmom Number of L-moments to evaluate.
#'
#' @return
#'
#' \item{lmom}{Matrix of L-moments.}
#' \item{nrec}{Record lengths.}
#' \item{distance}{Distance matrix.}
#' \item{corr}{Intersite correlation matrix.}
#' \item{para}{Parameters of the power exponential model.}
#'
#' @export
#'
#' @examples
#'
#' h <- flowAtlantic$distance
#' a <- flowAtlantic$ams
#'
#' Intersite(ams ~ id + year, a)
#'
#' Intersite(ams ~ id + year, a, distance = h,
#' smooth = TRUE, smooth.hmax = 500)
#'
############################################################################
Intersite <- function(x, ...) UseMethod('Intersite',x)
#' @export
#' @rdname Intersite
Intersite.data.frame <-function(
x,
distance = NULL,
nmin = 20,
smooth = FALSE,
smooth.p = 1,
smooth.hmax = Inf,
nmom = 4)
{
## Convert data to wide format
xmat <- DataWide(x)
##---------------------------------##
## Compute the correlation matrix
##---------------------------------##
## compute correlation matrix using spearman rho
corr <- cor(xmat, use = 'pairwise.complete.obs', method = 'spearman')
corr <- 2*sin(pi/6*corr)
## compute the length of each pair and remove the correlation coefficient
## that were evaluated with too few pairs
nmat <- crossprod(!is.na(xmat))
nid <- (nmat < nmin)
if (any(nid))
corr[nid] <- NA
#impute zero to missing values
nid <- is.na(corr)
if (any(nid))
corr[nid] <- 0
## force the diagonal to 1. Error could happen if nmin > nsite
diag(corr) <- 1
##-----------------------------------------------------------##
## Fit a power exponential model on the pairwise correlation
##-----------------------------------------------------------##
if (smooth){
if (is.null(distance))
stop('Distance must be specified for smoothing correlation')
## extract pairs
pcor <- corr[lower.tri(corr)]
ph <- distance[lower.tri(distance)]
w <- nmat[lower.tri(nmat)]
## remove pairs too far apart
pid <- (ph <= smooth.hmax)
pcor <- pcor[pid]
ph <- ph[pid]
w <- w[pid]
## Define power exponential model
Fpow <- function(p,h){
## ensure positiveness
p[1] <- exp(p[1])/(1+exp(p[1]))
p[2] <- exp(p[2])
return(p[1] * exp(-3*( h / p[2])^smooth.p))
}
## Define MSE of the correlation fit
Fobj <- function(p,h) sum(w*(pcor - Fpow(p,h))^2)
## Optimize the MSE
sol <- optim(c(0,log(mean(ph))),
Fobj, h = ph)
## Create the new smooth correlation matrix base on fitted model
corr <- Fpow(sol$par,distance)
diag(corr) <- 1
## Extract the parameters of the correlation model
para <- c(1-exp(sol$par[1])/(1+exp(sol$par[1])),
exp(sol$par[2]),
smooth.p)
names(para) <- c('nugget','range','smooth')
} else {
para <- NULL
}
## Empirical correlation matrix
## Correct for positive definitness
corr <- as.matrix(Matrix::nearPD(corr, corr = TRUE)$mat)
## Compute the record lengths
nrec <- apply(xmat,2,function(z) length(na.omit(z)))
## There should be at least 4 L-moments
nmom <- max(2,nmom)
## Compute the Lmoments
lmom <- t(apply(xmat, 2, function(z) lmom::samlmu(na.omit(z),nmom)))
colnames(lmom) <- c('L1','LCV','LSK','LKUR','TAU5','TAU6')[1:nmom]
lmom[,2] <- lmom[,2]/lmom[,1]
## Return results
ans <- list(lmom = lmom,
nrec = nrec,
distance = distance,
corr = corr,
para = para)
class(ans) <- 'intersite'
return(ans)
}
#' @export
#' @rdname Intersite
Intersite.formula <- function(form, x, ...){
x <- model.frame(form, as.data.frame(x))
Intersite(x, ...)
}
#' @export
Intersite.matrix <- function(x, ...){
Intersite(as.data.frame(x), ...)
}
#' @export
print.intersite <- function(obj)
{
## number of site
n <- nrow(obj$corr)
pcor <- obj$corr[lower.tri(obj$corr)]
cat('\nIntersite correlation\n')
cat('\nNumber of site: ',n,'\n')
## Correlation model
if (all(obj$para == -1)){
cat('Model: emp\n')
cat('Average:', round(mean(pcor),3))
mx <- pcor[which.max(abs(pcor))]
cat('\nMax (abs):', round(mx,3))
} else{
cat('Model: exp\nParameters:\n')
print(round(obj$para,3))
}
}
####################
#' Transform streamflow data to wide format
#'
#' Return a matrix where every columns correspond to a specific site.
#' Missing value are added.
#'
#' @param x Data in long format.
#'
#' @param form Formula that specify the variable.
#' Must have the form: `value ~ site + time`.
#'
#'
#' @export
#'
#' @examples
#'
#' DataWide(flowAtlantic$ams, ams ~ id + year)
#'
DataWide <- function(x, form = NULL){
## Force data to be a data.frame
x <- as.data.frame(x)
## If a formula is passed, extract the variable
if(!is.null(form))
x <- model.frame(form,x)
## Verify that there is no missing value in the data
x <- na.omit(x)
## verify that there is no duplicate
if(max(table(x[,2],x[,3]))>1)
stop('There is some duplicates')
##---------------------------------##
## reshape data in matrix wide format
##---------------------------------##
## Extract level for the site and time id
site.name <- unique(as.character(x[,2]))
time.name <- unique(as.character(x[,3]))
nsite <- length(site.name)
if(nsite < 2)
stop('Must have more than one site')
## Transform site and time id in integer
## note that factor sort the label
x[,2] <- as.integer(factor(as.character(x[,2])))
x[,3] <- as.integer(factor(as.character(x[,3])))
## split the data in list
xlst <- split(x, x[,2])
nc <- length(xlst)
nr <- max(x[,3])
## Sort site in the original order
site.order <- unique(x[,2])
xlst <- xlst[site.order]
## allocate memory
xmat <- matrix(NA, nr, nc)
## Affect the value to the wide format
for(jj in 1:nc)
xmat[xlst[[jj]][,3],jj] <- xlst[[jj]][,1]
colnames(xmat) <- site.name
rownames(xmat) <- sort(time.name)
return(xmat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.