# ####
#' Compute an estimate of difference based on GAM results
#'
#' @param gamRslt output from gam model
#' @param iSpec data set specifications (see details for required content)
#' @param analySpec analytical specifications
#' @param base.yr.set vector of years used for baseline period
#' @param test.yr.set vector of years used for test period
#' @param doy.set vector of days used to establish sub-annual analyses
#' (see details)
#' @param alpha alpha level for computing confidence intervals
#' @param flow.detrended data generated by detrended.flow.
#'Default = flow.detrended.
#' @param salinity.detrended data generated by detrended.salinity.
#' Default = detrended.salinity.
#'
#' @details
#' iSpec is a list containing information about the date range and
#' transformations. Specifically, iSpec must include iSpec$yearBegin,
#' iSpec$yearEnd, iSpec$centerYear corresponding to beginning year of record,
#' ending year of record and centering year. Also, iSpec must include
#' iSpec$transform and iSpec$logConst. (See online help for selectData for
#' more information on these values.)
#'
#' base.yr.set and test.yr.set represent two time periods used to compare
#' differences. For example, base.yr.set=c(1999,2000) and test.yr.set=c(2013,
#' 2014) would compare GAM predictions from 1999-2000 versus 2013-2014. There
#' is no particular limit to the number of years included in the specification
#' for base.yr.set and test.yr.set. For example, a user could specify
#' c(2001:2002,2004) to use the years 2001, 2002, and 2004, skipping 2003
#' because 2003 was an abnormal year (particularly wet, particularly dry,
#' hurricanes, etc.).
#'
#' base.yr.set and test.yr.set must be within the years specified by
#' the range from iSpec$yearBegin to iSpec$yearEnd (inclusive). If not, this
#' function defaults to using the first two years (or last two years) of
#' record. If base.yr.set and test.yr.set are left to their default values of
#' NA, then the first two and last two years will be used
#'
#' doy.set represents the days of year for which GAM predictions are made and
#' used to compute base.yr and test.yr means. For example doy.set= c(15, 46,
#' 75, 106, 136, 167, 197, 228, 259, 289, 320, 350) would result in the 15th
#' of each month being used in the analysis; whereas doy.set= c(15, 46, 75)
#' would just use Jan-15, Feb-15, and Mar-15. (Keep in mind that this package
#' uses a 366 day calendar every year such that Mar-1 is always day 61,
#' regardless of leap year.) If doy.set is left to the default value of NA,
#' then c(15, 46, 75, 106, 136, 167, 197, 228, 259, 289, 320, 350) is used.
#'
#' The baseDay function has been added to this package from the smwrBase
#' package.
#'
#' @examples
#' # run analysisOrganizeData function to create the list analySpec
#' dfr <- analysisOrganizeData (dataCensored, report=NA)
#' df <- dfr[["df"]]
#' analySpec <- dfr[["analySpec"]]
#'
#' # set GAM models to just one model
#' analySpec$gamModels <- list(
#' list(option=2
#' , name= "Non-linear trend with Seasonality (plus Interactions)"
#' , model= "~ cyear + s(cyear) + s(doy,bs='cc') +
#' ti(cyear,doy,bs=c('tp','cc'))"
#' , deriv=FALSE))
#'
#' # run GAM for a single water quality variable, station and layer
#' gamResult <- gamTest(df, 'tn', 'CB5.4', 'S', analySpec=analySpec)
#'
#' # use gamDiff to replicate estimates of change calculated in the above
#' gamDiff(gamRslt=gamResult[["gamOutput2"]]$gamRslt,
#' iSpec=gamResult$iSpec, analySpec=analySpec,
#' base.yr.set = NA, test.yr.set = NA,
#' doy.set = NA, alpha = 0.05)
#'
#' # use gamDiff to calculate changes from 2005/06 to 2013/14
#' gamDiff(gamRslt=gamResult[["gamOutput2"]]$gamRslt,
#' iSpec=gamResult$iSpec, analySpec=analySpec,
#' base.yr.set = c(2004:2005), test.yr.set = c(2013:2014),
#' doy.set = NA, alpha = 0.05)
#'
#' @return Returns a nest list that includes the base and test years, doys,
#' period means in analyzed units, period means in observed units, percent
#' change, difference estimate, difference estimate in observed units,
#' standard error, confidence intervals, t statistic, p value, and alpha
#' level. The alpha level corresponds to the confidence intervals. The first
#' list (gamDiff.regular) uses the computed model to estimate differences and
#' is applicable for GAM formulas that do not involve an intervention term.
#' The second list (gamDiff.adjusted) performs computations by projecting the
#' most recent intervention (e.g., the current lab method) to all time
#' periods.
#' @export
#'
# ####
gamDiff <- function(gamRslt
, iSpec
, analySpec
, base.yr.set=NA
, test.yr.set=NA
, doy.set=NA
, alpha=0.05
, flow.detrended=NA
, salinity.detrended=NA) {
# -----< Change history >--------------------------------------------
# 24Mar2021: JBH: add parameter to of gamDiffNumChgYrs as 3 years
# 11Jul2018: JBH: add additional independent variables to pdat excluding dependent
# variable and "gamK*"
# 30Sep2017: JBH: added salinity interpolation to merge up with pdat
# 11Aug2017: JBH: added analySpec to function input parameters; renamed
#pct.chg.dta to pdat
# updated setting for flw_sal to be observed flow
# 29Jul2017: JBH: enhance for flw_sal
# 09NOv2016: JBH: update documentation
# 04Nov2016: JBH: update to accomodate models with intervention terms.
# specifically
# update returned list to have gamDiff.regular (baseline mean
# computed as
# per the prediction model) and gamDiff.adjusted (baseline meam
# computed
# assuming last intervention method used throughout the record)
# 20Oct2016: JBH: add intervention term to prediction data set (pdat)
# 09Jun2016: JBH: migrated from .gamDiff to gamDiff; revised help file
# 04Jun2016: JBH: migrated name convention from gam1 to gamRslt; migrated
# percent change calculation
# from function gamDiffPORtbl to this function; other misc. code
# formatting
# 24May2016: ESP: modified gamDiffPOR to create gamDiffYrs that allows user to
# specify
# a base year period and a test year period for the difference
# computation
# the user may also specify as set of days of year (doy.set) for
# computing this difference.
# If base and test years are not specified, default if first 2 and
# last 2 years of POR
# If yr.set not specified, default is once a month on the 15th.
# 27Apr2016: JBH: depricated porBegin, porEnd, porLength; switched to
# dyear*, see por.rng assignment
# unpack ####
# base.yr.set=NA; test.yr.set=NA; doy.set=NA; alpha=alpha
# extract selected values from iSpec
por.rng <- c(iSpec$yearBegin, iSpec$yearEnd)
centerYear <- iSpec$centerYear
transform <- iSpec$transform
logConst <- iSpec$logConst
gamDiffNumChgYrs <- analySpec$gamDiffNumChgYrs
# does model include intervention term 11Aug2017 [added from gamPlotCalc]
intervention <- ifelse (length(grep('intervention',gamRslt$formula )) == 0
, FALSE, TRUE)
# does model include flw_sal term 11Aug2017 [added from gamPlotCalc]
has.flw_sal <- ifelse (length(grep('flw_sal',gamRslt$formula )) == 0
, FALSE, TRUE)
if(has.flw_sal) {
pdatWgt <- data.frame(normPct = analySpec$gamFlw_Sal.Wgt.Perc,
Z = stats::qnorm(analySpec$gamFlw_Sal.Wgt.Perc),
normD = stats::dnorm(stats::qnorm(analySpec$gamFlw_Sal.Wgt.Perc)))
} else {
pdatWgt <- data.frame(normPct = 0.5,
Z = stats::qnorm(0.5),
normD = stats::dnorm(stats::qnorm(0.5)))
}
pdatWgt$flw.wgt <- pdatWgt$normD/sum(pdatWgt$normD)
# matrix set up ####
# diffType="regular"; diffType="adjusted" #(04Nov2016)
for(diffType in c("regular","adjusted")) {
# matrix: set up prediction table pdat for regular calculation ####
if(diffType=="regular") { #(04Nov2016)
# if doy.set is NA, then assume doy.set = 15th of each month
if(is.na(doy.set[1])){
doy.set <- baseDay(as.POSIXct(paste(2000,1:12,15,sep='-')))
}
# set up base and test years to first "gamDiffNumChgYrs" and last
# "gamDiffNumChgYrs" years if base.yr.set
# and/or test.yr.set not specified; otherwise concatenate the values
if(is.na(base.yr.set[1])) {base.yr.set <- c(por.rng[1]
,por.rng[1]+gamDiffNumChgYrs-1)}
if(is.na(test.yr.set[1])) {test.yr.set <- c(por.rng[2]-
gamDiffNumChgYrs+1,por.rng[2])}
Nbase.yr <- length(base.yr.set) # count years in each period
Ntest.yr <- length(test.yr.set)
yr.set <- c(base.yr.set,test.yr.set) # combine base years and test years
Ndoy <- length(doy.set) # count predictions per year
Nbase <- Ndoy*Nbase.yr # calculate total predictions in base period
Ntest <- Ndoy*Ntest.yr # calculate total predictions in test period
# create a data frame with Nrow rows where Nrow= (Nbase*Ndoy) +
# (Ntest*Ndoy)...
# Nbase yrs of Ndoy baseline dates and Ntest-yrs
# of Ndoy current dates. Include: doy, year, logical field (bl) indicating
# baseline
# and current, and centered decimal year (cyear) using same centering
# value
# computed from data set (centerYear)
pdat <- expand.grid(doy.set,yr.set) # make df with all comb.
# of doy.set and yr.set
names(pdat) <- c('doy','year') # rename variables
pdat$bl <- pdat$year <= base.yr.set[Nbase.yr] # create logical field
# indicating baseline
pdat$cyear <- (pdat$year + (pdat$doy-1)/366) - centerYear # compute cyear
# calculate date based on doy to set up adding intervention term
# (added 20Oct2016)
pdat$month <- lubridate::month(as.Date(pdat$doy, origin = "1999-12-31"))
pdat$day <- lubridate::day (as.Date(pdat$doy, origin = "1999-12-31"))
pdat$date <- as.POSIXct(paste(pdat$year,pdat$month,pdat$day,sep='-'))
# add intervention term to prediction data set (added 20Oct2016)
intervenList <- iSpec$intervenList
intervenList$intervention <- as.character(intervenList$intervention)
tmp <- lapply(1:nrow(pdat), function(x)
list(intervenList[ intervenList$beginDate <= pdat$date[x] &
intervenList$endDate +(24*3600)-1 >= pdat$date[x]
, c("intervention")]))
tmp[sapply(tmp, is.null)] <- NA
pdat$intervention <- sapply(1:nrow(pdat)
, function(x) unname(unlist(tmp[x])[1]))
pdat$intervention <- factor(pdat$intervention
, levels = intervenList$intervention)
# add flw_sal and flw_sal.sd term for models with flw_sal # 11Aug2017
if(has.flw_sal) {
# populate flw_sal
pdat$flw_sal <- 0
# populate flw_sal.sd
if(iSpec$hydroTermSel=="flow") {
# for "flow", iSpec$hydroTermSel.var will be d* indicating the
# averaging period
# associated with the best correlation (i.e., max(abs(cor))) between
# the dependent variable
# and the detrended log flow, e.g., d120 refers to a 120-day averaging
# window
tmp <- flow.detrended[[paste0("q"
,iSpec$usgsGageID
,".sum")]][["lowess.sd"]]
tmp <- tmp[,c("doy",iSpec$hydroTermSel.var )]
names(tmp)[names(tmp) == iSpec$hydroTermSel.var] <- 'flw_sal.sd'
} else if (iSpec$hydroTermSel=="salinity") {
# for "salinity", iSpec$hydroTermSel.var will be SAP or BBP indicating
# the
# average detrended salinity for "surface and above pycnocline" [SAP]
# or "bottom
# and below pycnocline [BBP]. Selection is BBP for dependent variable
# from B, BP,
# or BBP; and SAP otherwise
tmp <- salinity.detrended[[paste0(iSpec$stat,".sum")]][["lowess.sd"]]
tmp <- tmp[,c("doy",iSpec$hydroTermSel.var )]
names(tmp)[names(tmp) == iSpec$hydroTermSel.var] <- 'flw_sal.sd'
} else {
return('ERROR in lowess.sd pickup')
}
pdat <- merge( pdat,tmp, by="doy", all.x=TRUE)
pdat <- pdat[with( pdat, order(date)), ]
} else {
pdat$flw_sal <- NA_real_
pdat$flw_sal.sd <- NA_real_
}
# add a row id number and sort columns # 11Aug2017
pdat$rowID <- seq.int(nrow( pdat))
tmp <- c("rowID","date","year","cyear","doy")
pdat<-pdat[c(tmp, setdiff(names(pdat), tmp))]
# identify additional independent variables #11Jul2018
# excluding dependent variable and "gamK*"
indVar <- setdiff (all.vars(gamRslt$formula), c(iSpec$dep, names(pdat)))
indVar <- indVar[-grep("^gamK", indVar)]
pdat[,indVar] <- 0
# matrix: "adjust" prediction table pdat to use last intervention value for all
# calculations ####
#(04Nov2016)
} else if (diffType=="adjusted") {
pdat$intervention <- pdat$intervention[length(pdat$intervention)]
}
# create pdatLong from pdat with a downselected number of columns for
# merging
# with pdatWgt; in the merge of pdatWgt and pdatLong (each row of pdat is
# repeated
# for each record in pdatWgt, then goes to next row of pdat); flw_sal
# based on
# z stat and sd
pdatLong <- pdat[,c("rowID", "cyear", "doy", "bl", "intervention"
, "flw_sal", "flw_sal.sd", indVar)]
pdatLong <- merge( pdatWgt[,c("Z","flw.wgt")], pdat[,c("rowID", "cyear"
, "doy", "bl", "intervention"
, "flw_sal", "flw_sal.sd", indVar )])
pdatLong$flw_sal <- pdatLong$Z * pdatLong$flw_sal.sd
# JBH(24Nov2017): original construction of xa and avg.per.mat
# construct 2xNrow matrix (for averaging baseline and current periods).
# Since pdat is constructed
# with Nrow rows (see above)
# the weighting here is set to 1/Nbase for base period and 1/Ntest for
# test period.
# xa <- c(rep(1/Nbase,Nbase),rep(0,Ntest),rep(0,Nbase),rep(1/Ntest,Ntest))
# avg.per.mat <- matrix(xa,nrow=2,ncol=Nbase+Ntest, byrow=TRUE)
# JBH(24Nov2017): extension of above xa and avg.per.mat
# keeping weight the same--just extending number of values by
# "*nrow(pdatWgt)"
xa <- c(rep(1/Nbase,Nbase*nrow(pdatWgt)),
rep(0,Ntest*nrow(pdatWgt)),
rep(0,Nbase*nrow(pdatWgt)),
rep(1/Ntest,Ntest*nrow(pdatWgt))) # construct a matrix to average
# baseline and current periods
avg.per.mat <- matrix(xa,nrow=2
,ncol=(Nbase+Ntest)*nrow(pdatWgt), byrow=TRUE)
# now apply flow weighting to the averaging matrix (probably could have
# combined into above
# but this is ok such that each row of avg.per.mat is == 1)
avg.per.mat[1,] <- avg.per.mat[1,] * t(pdatLong$flw.wgt)
avg.per.mat[2,] <- avg.per.mat[2,] * t(pdatLong$flw.wgt)
# construct matrix to get difference of current minus baseline
diff.mat <- c(-1,1)
# Extract coefficients (number of terms depends on complexity of GAM
# formula)
beta <- gamRslt$coefficients # coefficients vector
VCmat <- gamRslt$Vp # variance-covariance matrix of
# coefficents
# Begin calculations to compute differences ####
# extract matrix of linear predicters
Xpc <- predict(gamRslt,newdata=pdatLong,type="lpmatrix")
# Compute predictions based on linear predictors (Nrow x 1 matrix)
pdep <- Xpc%*%beta # equivalent to
# "predict(gamRslt,newdata=pdatLong)"
# Calc. average baseline and average current; stores as a 2x1 matrix
period.avg <- avg.per.mat %*% pdep
# Calc. average current - average baseline; stores as a 1x1 matrix
diff.avg <- diff.mat %*% period.avg # pre-multiply by differencing matrix
# to check results
# Calc standard errors and confidence intervals on difference predictions
xpd <- diff.mat%*%avg.per.mat%*%Xpc # premultiply linear predictors by
# averaging and differencing matrices.
diff.est <- xpd%*%beta # compute estimate of difference
diff.se <- sqrt(xpd%*%VCmat%*%t(xpd)) # compute Std. Err. by usual rules
diff.t <- diff.est / diff.se
diff.pval <- 2*pt(abs(diff.t), gamRslt$df.null, 0, lower.tail = FALSE)
#compute CI for differnce
halpha <- alpha/2
diff.ci <- c(diff.est - qnorm(1-halpha) * diff.se,diff.est +
qnorm(1-halpha) *diff.se)
# back transform mean (03Nov)
per.mn.obs <- if(transform) exp(period.avg) - logConst else period.avg
# calculate percent change (03Nov)
pct.chg <- 100*((per.mn.obs[2] - per.mn.obs[1])/per.mn.obs[1])
# difference in obs units (03Nov)
diff.est.obs <- per.mn.obs[2] - per.mn.obs[1]
# pack up and return results ####
gamDiff.tmp <- list(base.yr = base.yr.set,
test.yr = test.yr.set,
doys = doy.set,
per.mn = as.vector(period.avg),
per.mn.obs = as.vector(per.mn.obs),
pct.chg = pct.chg,
diff.est = diff.est,
diff.est.obs = diff.est.obs,
diff.se = diff.se,
diff.ci = diff.ci,
diff.t = diff.t,
diff.pval = diff.pval,
alpha = alpha)
#(04Nov2016)
if(diffType=="regular") {
gamDiff.regular <- gamDiff.tmp
} else if(diffType=="adjusted") {
gamDiff.adjusted <- gamDiff.tmp
}
}
gamDiff.return <- list(gamDiff.regular=gamDiff.regular
, gamDiff.adjusted=gamDiff.adjusted)
return(gamDiff.return)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.