#'Weighing Matrix for ROI-WLS (WREG)
#'
#'@description \code{Omega.WLS.ROImatchMatLab} calculates the weighting function
#'for a weighted least-squares regression using regions-of-influence. This is
#'largely legacy code to match WREG v. 1.05 idiosyncrasies.
#'
#'@param Y.all The dependent variable of interest at all sites in the network,
#' with any transformations already applied.
#'@param X.all The independent variables at all sites in the network, with any
#' transformations already applied. Each row represents a site and each column
#' represents a particular independent variable. The rows must be in the same
#' order as the dependent variables in \code{Y}.
#'@param LP3.all A dataframe containing the fitted Log-Pearson Type III standard
#' deviate, standard deviation and skew for all sites in the network. The
#' names of this data frame are \code{S}, \code{K} and \code{G}. The order of
#' the rows must be the same as \code{Y}.
#'@param RecordLengths.all \code{RecordLengths.all} may be a matrix whose rows
#' and columns are in the same order as \code{Y}. Each \code{(r,c)} element
#' represents the length of concurrent record between sites \code{r} and
#' \code{c}. The diagonal elements therefore represent each site's full record
#' length. For \dQuote{WLS}, only the at-site record lengths are needed. In
#' this case, \code{RecordLengths} can be a vector or a matrix. Here, this
#' should only include sites in the current region of influence.
#'@param NDX A vector listing the indices of the sites that comprise the region
#' of influence.
#'
#'@details This is a legacy function that matches the idiosyncrasies of WREG v.
#' 1.05. This includes using all sites to implement the sigma regression,
#' averaging across all record lengths, using arbitrary record lengths to
#' estimate weights and using a new function for estimating the MSE of the
#' basic OLS model.
#'
#' This function will become obsolete once all idiosyncrasies are assessed.
#'
#'@return \code{Omega.WLS.ROImatchMatLab} returns a list with three elements:
#' \item{Omega}{The estimated weighting matrix.} \item{var.modelerror.0}{The
#' estimated model error variance for a constant-value model.}
#' \item{var.modelerror.k}{The estimated model error variance for a k-variable
#' model.}
#'
#'
#' @examples
#' # Import some example data
#' peakFQdir <- paste0(
#' file.path(system.file("exampleDirectory", package = "WREG"),
#' "pfqImport"))
#' gisFilePath <- file.path(peakFQdir, "pfqSiteInfo.txt")
#' importedData <- importPeakFQ(pfqPath = peakFQdir, gisFile = gisFilePath)
#'
#' # Organizing input data
#' lp3Data <- importedData$LP3f
#' lp3Data$K <- importedData$LP3k$AEP_0.5
#' Y <- importedData$Y$AEP_0.5
#' X <- importedData$X[c("Sand", "OutletElev", "Slope")]
#'
#' #### Geographic Region-of-Influence
#' i <- 1 # Site of interest
#' n <- 10 # size of region of influence
#' Gdist <- vector(length=length(Y)) # Empty vector for geographic distances
#' for (j in 1:length(Y)) {
#' if (i!=j) {
#' #### Geographic distance
#' Gdist[j] <- Dist.WREG(Lat1 = importedData$BasChars$Lat[i],
#' Long1 = importedData$BasChars$Long[i],
#' Lat2 = importedData$BasChars$Lat[j],
#' Long2 = importedData$BasChars$Long[j]) # Intersite distance, miles
#' } else {
#' Gdist[j] <- Inf # To block self identification.
#' }
#' }
#' temp <- sort.int(Gdist,index.return=TRUE)
#' NDX <- temp$ix[1:n] # Sites to use in this regression
#'
#' # Compute weighting matrix
#' weightingResult <- Omega.WLS.ROImatchMatLab(Y.all = Y, X.all = X,
#' LP3.all = lp3Data, RecordLengths.all = importedData$recLen, NDX = NDX)
#'
#'@export
Omega.WLS.ROImatchMatLab <- function(Y.all,X.all,LP3.all,RecordLengths.all,NDX) {
# William Farmer, USGS, January 26, 2015
#
# WREG v 1.05 (MatLab) is inconsistent in which components are used to develop equations in RoI-WLS
# Eq. 15 is implemented with all sites
# The mean inverse record length is implemented with all sites
# Some upfront error handling
wregValidation((!missing(X.all)&!missing(Y.all))&&
(length(Y.all)!=nrow(X.all)), "eq", FALSE,
paste0("The length of Y.all must be the same as ",
"the number of rows in X.all."), warnFlag = TRUE)
if (!wregValidation(missing(Y.all), "eq", FALSE,
"Dependent variable (Y.all) must be provided.", warnFlag = TRUE)) {
if (!wregValidation(Y.all, "numeric", message =
"Dependent variable (Y.all) must be provided as class numeric.",
warnFlag = TRUE)) {
wregValidation(sum(is.na(Y.all)), "eq", 0 ,
paste0("The depedent variable (Y.all) contains missing ",
"values. These must be removed."),
warnFlag = TRUE)
wregValidation(sum(is.infinite(Y.all)), "eq", 0 ,
paste0("The depedent variable (Y.all) contains infinite ",
"values. These must be removed."),
warnFlag = TRUE)
}
}
if (!wregValidation(missing(X.all), "eq", FALSE,
"Dependent variable (X.all) must be provided.", warnFlag = TRUE)) {
if (!wregValidation(X.all, "numeric", message =
"Dependent variable (X.all) must be provided as class numeric.",
warnFlag = TRUE)) {
wregValidation(sum(is.na(X.all)), "eq", 0 ,
paste0("The depedent variable (X.all) contains missing ",
"values. These must be removed."),
warnFlag = TRUE)
wregValidation(X.all, "infinite", message=
paste0("The depedent variable (X.all) contains infinite ",
"values. These must be removed."),
warnFlag = TRUE)
}
}
wregValidation(missing(NDX) | !is.numeric(NDX) | !is.vector(NDX), "eq", FALSE,
paste0("NDX must be provided as a numeric vector."),
warnFlag = TRUE)
wregValidation(!sum(is.element(NDX, 1:length(Y))) == length(NDX), "eq", FALSE,
paste0("NDX must be valid indices of inputs like Y.all"),
warnFlag = TRUE)
# Error checking LP3
if (!wregValidation(missing(LP3.all), "eq", FALSE, "LP3.all must be provided as a data frame with elements named
'S', 'K' and 'G' for standard deivation, deviate and skew, respectively.",
warnFlag = TRUE)){
if (!wregValidation(!is.data.frame(LP3.all), "eq", FALSE,
paste("LP3.all must be provided as a data frame with elements named",
"'S', 'K' and 'G' for standard deivation, deviate and skew,",
"respectively."), warnFlag = TRUE)){
wregValidation(sum(is.element(c("S","K","G"),names(LP3.all))), "eq", 3,
paste("In valid elements: The names of the elements in LP3.all are",
names(LP3.all),". LP3.all must be provided as a data frame with elements named",
"'S', 'K' and 'G' for standard deivation, deviate and skew,",
"respectively."), warnFlag = TRUE)
if(wregValidation((length(unique(apply(cbind(LP3.all$S,LP3.all$K,LP3.all$G),FUN=class,MARGIN=2)))!=1)|
(unique(apply(cbind(LP3.all$S,LP3.all$K,LP3.all$G),FUN=class,MARGIN=2))!="numeric"), "eq", FALSE,
"LP3.all must be provided as a numeric array", warnFlag = TRUE)){
wregValidation(sum(is.infinite(LP3.all$S),is.infinite(LP3.all$K),is.infinite(LP3.all$G)), "eq", 0,
"LP3.all must be provided as a numeric array", warnFlag = TRUE)
wregValidation(sum(is.na(LP3.all$S),is.na(LP3.all$K),is.na(LP3.all$G)), "eq", 0,
paste0("Some elements of LP3.all$S, LP3.all$K, and LP3.all$G contain missing ",
"values. These must be removed."), warnFlag = TRUE)
}
}
}
if(!wregValidation(missing(RecordLengths.all), "eq", FALSE,
"A matrix of RecordLengths.all must be provided as input.", warnFlag = TRUE)){
wregValidation(ncol(RecordLengths.all), "eq", nrow(RecordLengths.all),
"RecordLengths.all must be provided as a square array", warnFlag = TRUE)
wregValidation(RecordLengths.all, "numeric", message =
"RecordLengths.all must be provided as a numeric array", warnFlag = TRUE)
}
if (warn("check")) {
stop("Invalid inputs were provided. See warnings().", warn("get"))
}
## Get the subset parameters
Y<-Y.all[NDX] # Subset of dependent variables in region of influence.
X<-X.all[NDX,] # Subset of independent variables in region of influence.
LP3<-data.frame(LP3.all)[NDX,] # Subset of LP3 parameters for region of influence.
if (is.matrix(RecordLengths.all)) {
RecordLengths.all <- diag(RecordLengths.all)
}
RecordLengths <- RecordLengths.all[NDX]
## Initial OLS (basis for others)
Omega <- diag(nrow(X)) # Temporary weighting matrix (identity) for initial OLS
B_hat <- solve(t(X)%*%solve(Omega)%*%as.matrix(X))%*%t(X)%*%solve(Omega)%*%Y # OLS estimated coefficients
Y_hat <- as.matrix(X)%*%B_hat # OLS model estimates
e <- Y-Y_hat # OLS model residuals
MSE.OLS <- sum(e^2)/(nrow(X)-ncol(X)) # OLS mean square-error (k-variable)
MSE.OLS <- sd(e)^2 # BUG: MatLab code uses this expression in RoI WLS. See line 1450 of WREG v 1.05
MSE.OLS.0 <- sum((Y-matrix(1,ncol=1,nrow=length(Y))%*%solve(t(matrix(1,ncol=1,nrow=length(Y)))%*%solve(Omega)%*%matrix(1,ncol=1,nrow=length(Y)))%*%t(matrix(1,ncol=1,nrow=length(Y)))%*%solve(Omega)%*%Y)^2)/(nrow(X)-1) # OLS mean square-error (constant model)
## Estimate k-variable model-error variance
Omega <- diag(nrow(X.all)) # Temporary weighting matrix (identity) for k-variable sigma regression. Eq 15.
B.SigReg <- solve(t(X.all)%*%solve(Omega)%*%as.matrix(X.all))%*%t(X.all)%*%solve(Omega)%*%LP3.all$S # OLS estimated coefficients for model of LP3 standard deviations
# BUG: MatLab fits beta regression (Eq 15) using all sites. See lines 1305-1316 and 1413-1419 of WREG v 1.05
Yhat.SigReg <- as.matrix(X.all)%*%B.SigReg # Estimates of sigma regression
S_bar <- mean(Yhat.SigReg[NDX]) # Average sigma of limited set
G_bar <- mean(LP3$G) # Average skew of limited-set LP3
K_bar <- mean(LP3$K) # Average standard deviate of limited-set LP3
c1 <- max(0,(1+K_bar^2*(1+0.75*G_bar^2)/2+K_bar*G_bar)*S_bar^2) # Coefficeint for calculation of the variance of at-site skew. Eq 29.
var.modelerror.k <- max(0,MSE.OLS-c1*mean(1/RecordLengths.all)) # k-variable model-error variance. Eq 14.
# BUG: MatLab code uses average of all inverse record lengths. See lines 1453-1461.
## Estimate 0-order model-error variance
B.SigReg <- solve(t(matrix(1,ncol=1,nrow=nrow(X.all)))%*%solve(Omega)%*%matrix(1,ncol=1,nrow=nrow(X.all)))%*%t(matrix(1,ncol=1,nrow=nrow(X.all)))%*%solve(Omega)%*%LP3.all$S # Temporary weighting matrix (identity) for constant-model sigma regression. Eq 15.
Yhat.SigReg <- matrix(1,ncol=1,nrow=nrow(X.all))%*%B.SigReg # Estimates of sigma regression
S_bar <- mean(Yhat.SigReg[NDX]) # Average sigma of limited set
c1.0 <- max(0,(1+K_bar^2*(1+0.75*G_bar^2)/2+K_bar*G_bar)*S_bar^2) # Coefficeint for calculation of the variance of at-site skew. Eq 29.
var.modelerror.0 <- max(0,MSE.OLS.0-c1.0*mean(1/RecordLengths.all)) # Constant-model model-error variance. Eq 14.
## Final weighting matrix
Omega <- diag((var.modelerror.k+c1/RecordLengths.all[1:length(NDX)])) # WLS weighting matrix. Eq 12
# BUG: MatLab code mis-shuffles the record lengths. See lines 1457-1459 of WREG v. 1.05
## Output
Output <- list(Omega=Omega,var.modelerror.0=var.modelerror.0,var.modelerror.k=var.modelerror.k)
return(Output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.