R/loadReg.R

#' Actions & checks for when the user [indirectly] calls loadReg()
#' 
#' Require that the user has loaded rloadest. Also give the user a message about
#' rloadest if appropriate.
#' 
#' @keywords internal
checkRloadestStatus <- function() {
  rloadest_loaded <- "package:rloadest" %in% search()
  if(!rloadest_loaded) {
    stop("to use loadReg2(), please call library(rloadest) first")
  } else if(!pkg_env$rloadest_msg_given) {
    message("You are fitting an rloadest model (loadReg). ",
            "Please remember to cite both citation('loadflex') and citation('rloadest').")
    pkg_env$rloadest_msg_given <- TRUE
  }
}

#' Extracts and imports metadata from an rloadest loadReg model into an object of class 
#' "metadata"
#' #' 
#' @inheritParams getMetadata 
#' @importFrom rloadest loadReg
#' @param load.model a loadReg object
#' @export
#' @return Object of class "metadata" with slots modified according to the
#'   metadata contained in load.model
#' @family getMetadata
getMetadata.loadReg <- function(load.model) {

  metadata(
    constituent=load.model$constituent,
    flow=load.model$flow,
    dates=load.model$dates,
    conc.units=load.model$conc.units, 
    flow.units=load.model$flow.units, 
    load.units=load.model$load.units, 
    load.rate.units=paste0(load.model$load.units, "/day"),
    site.name=load.model$station)

}


#' Resample the coefficients from a loadReg model.
#' 
#' Dives deep into loadReg objects to replace the coefficients for the purpose
#' of simulating new predictions.
#' 
#' @importFrom stats rchisq
#' @importFrom MASS mvrnorm
#' @param fit a loadReg object
#' @param flux.or.conc Should the resampling be done for the coefficients 
#'   appropriate to flux or those for concentration?
#' @return A new loadReg object with resampled coefficients such that one of 
#'   predConc or predLoad (corresponding to the value of \code{flux.or.conc}) 
#'   will make predictions reflecting those new coefficients. No other 
#'   properties of the returned model are guaranteed.
#' @export
resampleCoefficients.loadReg <- function(fit, flux.or.conc) {
  
  # Validate arguments
  match.arg.loadflex(flux.or.conc)
  
  # We only need to adjust the one model fit we're going to use
  onefit <- switch(
    flux.or.conc,
    flux=fit$lfit,
    conc=fit$cfit)
  
  ## Resample Coefficients ##
  
  # Like simple linear models, the vector of fitted coefficients for an AMLE 
  # model (called omega-hat in Cohn 2005) is distributed according to a 
  # multivariate normal, according to paragraph 28 of Cohn 2005. The parameters 
  # are mean M = Beta + Aarrow * sigma, variance = SIGMA = Carrow*sigma^2 (eq. 
  # 32) where Carrow = (Varrow - gamma(gamma')(Vomegahatomegahat))/N. These have
  # non-obvious names in the LOADEST code, but they're present.
  
  
  ## Extract useful info from the model

  # Calculate S2_U, C, and M (Cohn 2005 section 4.4) as done in TAC_LOAD.f
  PARMLE <- onefit$PARMLE # PARMLE[1:NPAR] == Beta_hat and PARMLE[NPAR+1] == sigma_hat^2, as used in Eq. 29.
  NPAR <- onefit$NPAR # The number of actual parameters, K in Eq. 29
  CV <- onefit$CV # "covar. of std normals censored at XSI's w.r.t. S**2"
  SCV <- onefit$SCV # "covar. of standard normals censored at XSI's w.r.t. S". somethign special is stored in SCV[NPAR+1,NPAR+1]. this is the version used to calculate C, and is Varrow in eq. 33.
  NOBSC <- onefit$NOBSC # equal to the N used in Cohn 2005 - number of calibration observations
  # P24: B_Betaihat = Bias_1[Betaihat]/sigma and B_sigmahat = Bias_1[sigma_hat]/sigma
  BIAS <- onefit$BIAS
  SBIAS <- onefit$SBIAS # maybe SBIAS = fit$BIAS/NOBSC, where BIAS == the B_arrow of text after Eq 31?
  S2 <- PARMLE[NPAR+1]
  # DO 10 I=1,NPAR
  #   GAMMA(I) = SCV(I,NPAR+1)/SCV(NPAR+1,NPAR+1)    # Text between Eqs 29 & 30
  GAMMA <- SCV[1:NPAR,NPAR+1]/SCV[NPAR+1,NPAR+1]
  #   OMEGA(I) = PARMLE(I)-GAMMA(I)*SQRT(S2)         # Eq 29
  OMEGA <- PARMLE[1:NPAR]-GAMMA*sqrt(S2)
  #   B(I) = SBIAS(I)-GAMMA(I)*(1.D0+SBIAS(NPAR+1))  # B(I) == the A_arrow of text after Eq 31?
  # 10   CONTINUE
  # DO 30 I=1,NPAR
  #   DO 20 K=1,NPAR
  #     C(I,K) = SCV(I,K)-SCV(NPAR+1,NPAR+1)*GAMMA(I)*GAMMA(K)
  #   20      CONTINUE
  # 30   CONTINUE
  C <- SCV[1:NPAR,1:NPAR] - SCV[NPAR+1,NPAR+1]*(GAMMA %*% t(GAMMA))
  # S2_U = S2/(1.D0+BIAS(NPAR+1))
  S2_U <- S2/(1+BIAS[NPAR+1]) # S2_U (the AMLE estimate) is used for sigma^2 in eq. 59. should work for eq. 32, too.
  
  
  ## This part parallels the resampling of a regular linear model (lm) and is 
  ## adapted from 
  ## http://www.clayford.net/statistics/simulation-to-represent-uncertainty-in-regression-coefficients/
  
  coefs <- OMEGA
  cov.scaled <- C * S2_U # scaled covariance matrix
  cov.unscaled <- C # unscaled covariance matrix
  s.hat <- sqrt(S2_U) # residual standard error, AMLE estimate
  # n - k is supposed to be degrees of freedom. eq. 36 has df = v = 
  # 2N*((1+Bs2/N)^2/Vs2s2). If I understand right that the calculation of S2_U 
  # (S2_U <- S2/(1+BIAS[NPAR+1]), above) is the same equation as in paragraph 
  # 29, then Bs2/N is stored in BIAS[NPAR+1]. Vs2s2 is probably 
  # CV[NPAR+1,NPAR+1], though I'd like more evidence to be sure. To be like an 
  # lm, n.minus.k should be close to 23 for the app2 data (less if there were 
  # censored data). What I get with 2*NOBSC*((1+BIAS[NPAR+1])^2 / 
  # CV[NPAR+1,NPAR+1]) happens to be exactly the square of 23...so...take the 
  # sqrt? I guess I will...Ultimately, though, we could head for sigma^2=v*sA^2 
  # / chi_v^2 which chi_v^2 is a random variable with the given chi-square 
  # distribution, v degrees of freedom. Alternatively, Eqn. 41 gives a second relation 
  # between sA^2 and sigma^2, solveable for sigma using the Pythagorean theorem
  # because the LHS, b1, and b2 are known. Still, surely this is close enough for a
  # first cut.
  n.minus.k <- sqrt(2*NOBSC*((1+BIAS[NPAR+1])^2 / CV[NPAR+1,NPAR+1]))
  
  # Simulate residual standard deviation. Is it still the case, even with the 
  # altered distribution of s and sigma, that sigma.hat.sim (==s.hat.sim) is chi
  # square with df=n.minus.k? Could be close but not perfect. rloadest::EXPON.f 
  # assumes "S2 is a GAMMA(ALPHA,SIGMA^2*KAPPA) random variable", but then, S2
  # is not SIGMA2, and SIGMA2 is the s.hat^2 we really want.
  s.hat.sim <- s.hat*sqrt(n.minus.k/rchisq(1, n.minus.k))
  
  # Simulate regression coefficients
  omega.sim <- mvrnorm(n=1, mu=coefs, Sigma=s.hat.sim^2*cov.unscaled)
  
  
  ## Now work the coefficients back into their places in the loadReg$Xfit (onefit) object
  
  # To adjust the model enough that new predictions reflect the resampled 
  # coefficients, we need to make sure that PARMLE, BIAS, CV, SBIAS, and SCV are
  # up to date (because these are the variables used by 
  # predLoad/estlday/TAC_LOAD to calculate MVUE, which are the point load 
  # estimates). The other arguments to TAC_LOAD are NPAR and XLPRED; of those,
  # NPAR should stay the same, and XLPRED will be computed by the predLoad or
  # predConc call from the new predictor data, so we don't need to worry about
  # them.
  
  # Put PARMLE back together with new coefficients and S2
  PARMLE <- omega.sim - GAMMA*sqrt(S2)
  S2 <- s.hat.sim * (1+BIAS[NPAR+1])
  onefit$PARMLE <- c(PARMLE, S2)
  # Even though TAC_LOAD, etc. use PARMLE, coef.loadReg ~= coef.censReg uses
  # PARAML. Within rloadest, PARAML is also used for plot.loadReg and
  # print.loadReg. It's quick enough, I guess...let's replace it here.
  onefit$PARAML <- onefit$PARMLE
  
  # Leave BIAS, CV, SBIAS, and SCV untouched for prediction of new points. 
  # Prediction of intervals doesn't matter; simulateSolute offers no uncertainty
  # information. But TAC_LOAD also uses these for reprediction (in particular, 
  # for the g_v(...) term in eq. 42) - BIAS and CV go into the calculation of 
  # ALPHA and KAPPA, and SBIAS and SCV go into the calculation of GAMMA, A1, and
  # B1. All that said, I think their original values are the ones we want: bias 
  # and the covariance structure of the coefficients shouldn't change just
  # because we resample the coefficients.
  
  # Put the updated onefit back into a loadReg object
  if(flux.or.conc=="flux") {
    fit$lfit <- onefit
  } else {
    fit$cfit <- onefit
  }
  
  # Return the loadReg object
  fit
  
}

#' Extract model summary statistics from an [rloadest::loadReg()] model
#' 
#' Produce a 1-row data.frame of model metrics. The relevant metrics for loadReg
#' models are largely the same as those reported by the `rloadest` package, 
#' though reported in this streamlined data.frame format for bulk reporting. 
#' `summarizeModel.loadReg` should rarely be accessed directly; instead, call 
#' `summarizeModel()` on a [loadReg] object.
#' 
#' @md
#' @inheritParams summarizeModel
#' @param flux.or.conc character. Which internal model (the flux model or the 
#'   concentration model) should be summarized? An [rloadest::loadReg] model is 
#'   actually two different models for (1) flux and (2) concentration, each 
#'   fitted to the same data and with the same model structure except for 
#'   whether the left-hand side of the model formula is flux or concentration. 
#'   Some of the model metrics differ between these two internal models.
#' @return Returns a 1-row data frame with the following columns:
#'   
#'   * `eqn` - the regression equation, possibly in the form `const ~ model(x)`
#'   where `x` is the `Number` of a pre-defined equation in [rloadest::Models]
#'   
#'   * `RMSE` - the square root of the mean squared error. Errors will be 
#'   computed from either fluxes or concentrations, as determined by the value 
#'   of `pred.format` that was passed to [loadReg2()] when this model was 
#'   created
#'   
#'   * `r.squared` - the r-squared value, generalized for censored data, 
#'   describing the amount of observed variation explained by the model
#'   
#'   * `p.value` - the p-value for the overall model fit
#'   
#'   * `cor.resid` - the serial correlation of the model residuals
#'   
#'   * `PPCC` - the probability plot correlation coefficient measuring the 
#'   normality of the residuals
#'   
#'   * `Intercept`, `lnQ`, `lnQ2`, `DECTIME`, `DECTIME2`, `sin.DECTIME`, 
#'   `cos.DECTIME`, etc. - the fitted value of the intercept and other terms 
#'   included in this model (list differs by model equation)
#'   
#'   * `Intercept.SE`, `lnQ.SE`, `lnQ2.SE`, `DECTIME.SE`, `DECTIME2.SE`, 
#'   `sin.DECTIME.SE`, `cos.DECTIME.SE`, etc. - the standard error of the fitted
#'   estimates of these terms
#'   
#'   * `Intercept.p.value`, `lnQ.p.value`, `lnQ2.p.value`, `DECTIME.p.value`, 
#'   `DECTIME2.p.value`, `sin.DECTIME.p.value`, `cos.DECTIME.p.value` - the 
#'   p-values for each of these model terms
#' @importFrom smwrStats rmse
#' @importFrom utils capture.output
#' @importFrom stats coef
#' @export
summarizeModel.loadReg <- function(load.model, flux.or.conc=c("flux", "conc"), ...) {
  
  flux.or.conc <- match.arg.loadflex(flux.or.conc)
  loadReg.model <- c("flux"="load","conc"="concentration")[[flux.or.conc]]
  loadReg.fit <- load.model[[c(flux='lfit',conc='cfit')[[flux.or.conc]]]]
  
  # summarize the coefficient estimates, standard errors, and p-values
  coefs <- coef(load.model, summary=TRUE, which=loadReg.model)
  coefsDF <- setNames(
    data.frame(
      t(coefs[,'Estimate']),
      SE=t(coefs[,'Std. Error']),
      pval=t(coefs[,'p-value'])
    ),
    nm = paste0(
      rep(gsub('(Intercept)', 'Intercept', row.names(coefs), fixed=TRUE), 3), # coefficient names
      rep(c('', '.SE', '.p.value'), each=nrow(coefs))) # aspect of coefficient being described
  )
  
  # package coefs and other overall statistics into a single 1-row data.frame
  retDF <- data.frame(
    eqn = capture.output(loadReg.fit$call[[2]]),
    RMSE = rmse(load.model, model=loadReg.model),
    r.squared = rlmetricRsq(loadReg.fit),
    p.value = rlmetricPVal(loadReg.fit),
    cor.resid = loadReg.fit$SCORR,
    PPCC = rlmetricPPCC(loadReg.fit),
    coefsDF,
    stringsAsFactors=FALSE
  )
  return(retDF)
}

#' Compute the [generalized] R squared of a loadReg fit
#' 
#' Helper function to compute the R squared like rloadest does in print.loadReg
#' @param fit the loadReg lfit or cfit object
#' @keywords internal
rlmetricRsq <- function(fit) {
  if(length(which(fit$CENSFLAG)) == 0) {
    fit$RSQ/100 # R squared for no censored data
  } else {
    1 - exp((fit$LLR1 - fit$LLR)*2/fit$NOBSC) # Generalized R squared for censored data
  }
}

#' Compute the p-value of a loadReg fit
#' 
#' Helper function to compute the p-value like rloadest does in print.loadReg
#' @importFrom stats pchisq
#' @param fit the loadReg lfit or cfit object
#' @keywords internal
rlmetricPVal <- function(fit) {
  G2 <- signif(2*(fit$LLR - fit$LLR1), 4)
  pval <- 1 - pchisq(G2, fit$NPAR - 1)
  return(pval)
}

#' Compute the probability plot correlation coefficient of a loadReg fit
#' 
#' Helper function to compute the probability plot correlation coefficient as 
#' rloadest does in print.loadReg, lines 141-144
#' @importFrom smwrQW censPPCC.test as.lcens
#' @importFrom stats residuals
#' @param fit the loadReg lfit or cfit object
#' @keywords internal
rlmetricPPCC <- function(fit) {
  if(fit$method == 'AMLE') {
    Res <- residuals(fit, type="working", suppress.na.action=TRUE)
    ppcc <- censPPCC.test(as.lcens(Res, censor.codes=fit$CENSFLAG))
    return(unname(ppcc$statistic))
  } else {
    NA # PPCC cannot be computed for method MLE
  }
}
McDowellLab/loadflex documentation built on May 8, 2019, 9:48 a.m.