#' Maintenance of Variance Extension, Type 2
#'
#' Calculates the Maintenance Of Variance Extension, Type 2 (MOVE.2) for record
#' extension by fitting a Line of Organic Correlation (LOC) regression model.
#'
#' @details MOVE.2 has a necessarily predefined method for missing values---the
#'response variable is assumed to contain missing values and they are the values
#'to be estimated by the model equation. For the function \code{move.2}, missing
#'values in the explanatory variable are excluded from the computations.
#'
#'If \code{distribution} is "normal," then the data in the explanatory variable
#'and the response variable are assumed to have a bivariate normal distribution.
#'Otherwise, they are assumed to have a bivariate log-normal distribution and a
#'logarithmic transform is applied to both the explanatory variable and the
#'response variable before analysis. The natural logarithm is used if
#'\code{distribution} is "lognormal" and the commmon logarithm is used if
#'\code{distribution} is "commonlog." If either the response or the explanatory
#'has zero values, then the "log1p" option can be used. That option adds 1 to
#'each value and then computes the natural logarithm.
#'Alternatively, the output from \code{optimBoxCox}
#'that contains both the response and explanatory variables can be supplied to
#'transform those variables by other than a logarithmic transform.
#'
#' @param formula a formula object with the response variable on the left of
#'the ~ operator and a single explantory variable on the right.
#' @param data the data frame containing the variables named in \code{formula}.
#' @param subset an optional vector specifying a subset of observations to be
#'used in the fitting process.
#' @param distribution either "normal," "lognormal," "commonlog," "log1p,"
#'or an object of class "optimBoxCox" indicating the nature of the bivariate
#'distribution, see \bold{Details}.
#' @param lag the number of days to account for time of travel between the explanatory
#'and response sites. If the explanatory site is upstream, then lag can be positive,
#'otherwise, lag can be negative to account for the travel time between the sites.
#' @return An object of class "move.2" having these components:
#'\item{coefficients}{ the intercept and slope of the line describing the fit.}
#'\item{R}{ Pearson's correlation coefficient.}
#'\item{p.value}{ the p-value from the correlation test, given a two-sided alternate hypothesis.}
#'\item{call}{ the matched call to \code{move.1}.}
#'\item{fitted.values}{ the fitted LOC values for the response.}
#'\item{residuals}{ a 2-column matrix containing the signed distance from the
#'predicted to to the corresponding the response variable and the explanatory variable values.}
#'\item{x}{ the (possibly transformed and lagged) values for the explanatory variable.}
#'\item{y}{ the (possibly transformed) values for the response variable. }
#'\item{lag}{ the value of the \code{lag} argument. }
#'\item{xstats}{ the mean and standard deviation of the (possibly
#'transformed) values for the explanatory variable.}
#'\item{ystats}{ the mean and standard deviation of the (possibly
#'transformed) values for the response variable.}
#'\item{var.names}{ the names of the response variable and the explanatory variable.}
#'\item{model}{ the model frame.}
#'\item{data}{ the data frame supplied in \code{data}.}
#'\item{distribution}{ the value supplied in \code{distribution}.}
#' @note Objects of class "move.2" have \code{print}, \code{predict}, and
#' \code{plot} methods.
#' @seealso \code{\link{predict.move.2}}, \code{\link{plot.move.2}},
#' \code{\link{optimBoxCox}}
#' @references Hirsch, R.M., 1982, A comparison of four streamflow record
#'extension techniques: Water Resources Research, v. 18, p. 1081--1088.\cr
#'Moog, D.B., Whiting, P.J., and Thomas, R.B., 1999, Streamflow record extension using
#'power transformations and applicaitons to sediment transport: Water Resources
#'Research, v. 35, p 243--254.
#' @keywords models regression
#' @examples
#'\dontrun{
#'# See the vignette:
#'vignette("RecordExtension", package="smwrStats")
#'}
#' @export move.2
move.2 <- function(formula, data, subset, distribution="normal", lag=0) {
##
## Process formula as with regular linear regression!
call <- match.call()
m <- match.call(expand.dots = FALSE)
m$distribution <- m$lag <- NULL
m$drop.unused.levels <- TRUE
m$na.action <- "na.pass"
m[[1L]] <- as.name("model.frame")
m <- eval(m, sys.parent())
if(ncol(m) != 2L)
stop("Must specify a single response and a single explanatory variable in the formula")
Yall <- m[, 1L]
yname <- names(m)[1L]
Xall <- shiftData(m[, 2L], lag)
xname <- names(m)[2L]
# Extract the matching data and the extended x data (subscript 2 in Hirsch)
good <- complete.cases(Xall, Yall)
X <- Xall[good]
Y <- Yall[good]
X2 <- Xall[!is.na(Xall) & !good]
fit <- list(coefficients=double(2L))
## Extract missing value info
fit$na.action <- attr(m, "na.action")
## Adjust for transforms
if(inherits(distribution, "character")) {
distribution <- match.arg(distribution, c("normal", "lognormal", "commonlog", "log1p"))
addTrans <- FALSE
# Check for 0s and force to log1p
if(min(c(Y,X, X2)) == 0 && distribution != "normal" && distribution != "log1p") {
warning("Zero values in data, changing distribution to \"log1p\"")
distribution <- "log1p"
call$distribution <- "log1p"
}
if(distribution == "lognormal") {
Y <- log(Y)
X <- log(X)
X2 <- log(X2)
xname <- paste("log(", xname, ")", sep='')
yname <- paste("log(", yname, ")", sep='')
} else if(distribution == "commonlog") {
Y <- log10(Y)
X <- log10(X)
X2 <- log10(X2)
xname <- paste("log10(", xname, ")", sep='')
yname <- paste("log10(", yname, ")", sep='')
} else if(distribution == "log1p") {
Y <- log1p(Y)
X <- log1p(X)
X2 <- log1p(X2)
xname <- paste("log1p(", xname, ")", sep='')
yname <- paste("log1p(", yname, ")", sep='')
}
## Otherwise normal
} else { # Must be the output from optimBoxCox
## Transform according to boxCox
## Find the corresponding name
xpick <- which(distribution$names == xname)
ypick <- which(distribution$names == yname)
xtrans <- round(distribution$lambda[xpick], 1L)
xgm <- signif(distribution$gm[xpick], 4)
xname <- paste("boxCox(", xname, ",", xtrans, ",", xgm, ")", sep='')
X <- as.numeric(boxCox(X, xtrans, xgm))
X2 <- as.numeric(boxCox(X2, xtrans, xgm))
ytrans <- round(distribution$lambda[ypick], 1L)
ygm <- signif(distribution$gm[ypick], 4)
yname <- paste("boxCox(", yname, ",", ytrans, ",", ygm, ")", sep='')
Y <- as.numeric(boxCox(Y, ytrans, ygm))
addTrans <- TRUE
}
ybar <- mean(Y)
yvar <- var(Y)
ystats.orig <- c(concurrent.mean=ybar, concurrent.sd=sqrt(yvar))
xbar <- mean(X)
xvar <- var(X)
xstats.orig <- c(concurrent.mean=xbar, concurrent.sd=sqrt(xvar))
retcor <- cor.test(X, Y)
fit$R <- retcor$estimate
fit$p.value <- retcor$p.value
# MOVE.2 code here
N1 <- length(X)
if(!(N2 <- length(X2))) {
warning("There is no extended period for the explanatory variable (N2=0)!")
} else {
ybar1 <- ybar
yvar1 <- yvar
xbar1 <- xbar
xvar1 <- xvar
xbar2 <- mean(X2)
xvar2 <- var(X2)
xbar <- mean(c(X, X2))
xvar <- var(c(X, X2))
# Compute the adjusted mean of ybar
ybar <- ybar1 + N2/(N1 + N2) * fit$R * sqrt(yvar1/xvar2) * (xbar2 - xbar1)
alpha2 <- N2*(N1 - 4)*(N1 - 1)/((N2 - 1) * (N1 - 3) * (N1 -2))
R2 <- fit$R^2
yvar <- ((N1 - 1)*yvar1 +
(N2 - 1)*R2*yvar1/xvar1*xvar2 +
(N2 - 1)*alpha2*(1 - R2)*yvar1 +
N1*N2/(N1 + N2)*R2*yvar1/xvar1*(xbar2 - xbar1)^2)/(N1 + N2 - 1)
}
fit$coefficients[2L] <- sqrt(yvar/xvar)
if(fit$R < 0)
fit$coefficients[2L] <- - fit$coefficients[2L]
fit$coefficients[1L] <- ybar - fit$coefficients[2L] * xbar
names(fit$coefficients) <- c("(Intercept)", xname)
## Complete steps for the model
fit$call <- call
fit$fitted.values <- cbind(1, X) %*% as.matrix(fit$coefficients)
yresiduals <- Y - fit$fitted.values
## Now reverse the sense of the fit to get X-residuals
junk <- double(2L)
junk[2L] <- 1/fit$coefficients[2L]
junk[1L] <- xbar - junk[2L] * ybar
xfitted <- cbind(1, Y) %*% as.matrix(junk)
xresiduals <- X - xfitted
fit$residuals <- cbind(yresiduals, xresiduals)
colnames(fit$residuals) <- c(yname, xname)
## Pack the object with other necessary information
fit$x <- X
fit$y <- Y
fit$lag <- lag
fit$xstats <- c(mean=xbar, sd=sqrt(xvar))
fit$xstats.orig <- xstats.orig
fit$ystats <- c(corrected.mean=as.vector(ybar),
corrected.sd=as.vector(sqrt(yvar)))
fit$ystats.orig <- ystats.orig
fit$var.names <- c(yname, xname)
fit$N1 <- N1
fit$N2 <- N2
fit$model <- m
fit$data <- data
fit$distribution <- distribution # Required to evaluate if not character
if(addTrans) {
fit$xtrans <- xtrans
fit$xgm <- xgm
fit$ytrans <- ytrans
fit$ygm <- ygm
}
## set class to move.2
oldClass(fit) <- "move.2"
return(fit)
}
## The default extraction methods: coef, residuals, fitted work
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.