Nothing
#' @export
customPSE <- function(db,
x,
xVars,
xGrpBy = NULL,
y = NULL,
yVars = NULL,
yGrpBy = NULL,
method = 'TI',
lambda = .5,
totals = TRUE,
variance = TRUE) {
## Warnings upfront ----------------------------------------------------------
## Must have an FIA.Database or a remote one
if (!c(class(db) %in% c('FIA.Database', 'Remote.FIA.Database'))) {
stop('Must provide an `FIA.Database` or `Remote.FIA.Database`. See `readFIA` and/or `getFIA` to read and load your FIA data.')
}
## PLT_CN must be present in both x and y
if (!c('PLT_CN' %in% names(x))) {
stop('`PLT_CN` must be included in `x`. See our website for an example use case of `customPSE`.')
}
if (!is.null(y)) {
if (!c('PLT_CN' %in% names(y))) {
stop('`PLT_CN` must be included in `y`. See our website for an example use case of `customPSE`.')
}
}
## EVAL_TYP must be present in x
if (!c('EVAL_TYP' %in% names(x))) {
stop('`EVAL_TYP` must be included in `x`. See our website for an example use case of `customPSE`.')
}
## Only one of TREE_BASIS or AREA_BASIS may exist in each dataset
if (all(c('TREE_BASIS', 'AREA_BASIS') %in% names(x))) {
stop('Both `TREE_BASIS` and `AREA_BASIS` found in `x`, but only one is allowed. See our website for an example use case of `customPSE`.')
} else if (sum(c('TREE_BASIS', 'AREA_BASIS') %in% names(x)) != 1 ) {
stop('Neither `TREE_BASIS` or `AREA_BASIS` found in `x`, but one is required. See our website for an example use case of `customPSE`.')
}
if (!is.null(y)) {
if (all(c('TREE_BASIS', 'AREA_BASIS') %in% names(y))) {
stop('Both `TREE_BASIS` and `AREA_BASIS` found in `y`, but only one is allowed. See our website for an example use case of `customPSE`.')
} else if (sum(c('TREE_BASIS', 'AREA_BASIS') %in% names(y)) != 1) {
stop('Neither `TREE_BASIS` or `AREA_BASIS` found in `y`, but one is required. See our website for an example use case of `customPSE`.')
}
}
## If using tree variables, SUBP and TREE are required
## CONDID required for condition variables
if ( 'TREE_BASIS' %in% names(x) & !all(c('SUBP', 'TREE') %in% names(x) )) {
stop('Both `SUBP` and `TREE` required for estimation of tree variables, but are missing from `x`. See our website for an example use case of `customPSE`.')
} else if ( 'AREA_BASIS' %in% names(x) & !c('CONDID' %in% names(x))) {
stop('`CONDID` required for estimation of condition variables, but is missing from `x`. See our website for an example use case of `customPSE`.')
}
if (!is.null(y)) {
if ( 'TREE_BASIS' %in% names(y) & !all(c('SUBP', 'TREE') %in% names(y) )) {
stop('Both `SUBP` and `TREE` required for estimation of tree variables, but are missing from `y`. See our website for an example use case of `customPSE`.')
} else if ( 'AREA_BASIS' %in% names(y) & !c('CONDID' %in% names(y))) {
stop('`CONDID` required for estimation of condition variables, but is missing from `y`. See our website for an example use case of `customPSE`.')
}
}
## Pull design info from db --------------------------------------------------
## Make the sure the necessary tables are present in db
req.tables <- c('PLOT', 'POP_EVAL', 'POP_EVAL_TYP', 'POP_ESTN_UNIT', 'POP_STRATUM', 'POP_PLOT_STRATUM_ASSGN')
## If remote, read in state by state. Otherwise, drop all unnecessary tables
remote <- ifelse(class(db) == 'Remote.FIA.Database', 1, 0)
db <- readRemoteHelper(db$states, db, remote, req.tables, nCores = 1)
# if (class(db) == 'FIA.Database') {
# if (sum(req.tables %in% names(db)) < length(req.tables)) {
# missing.tables <- req.tables[!c(req.tables %in% names(db))]
# stop(paste(paste (as.character(missing.tables), collapse = ', '), 'tables not found in object db.'))
# }
# } else {
# ## Read the tables we need, readFIA will throw a warning if they are missing
# db <- readFIA(dir = db$dir,
# common = db$common,
# tables = c('PLOT', 'POP_EVAL', 'POP_EVAL_TYP', 'POP_ESTN_UNIT', 'POP_STRATUM', 'POP_PLOT_STRATUM_ASSGN'),
# states = db$states)
# }
## Pull the appropriate population tables
mr <- checkMR(db, remote = ifelse(class(db) == 'Remote.FIA.Database', 1, 0))
pops <- handlePops(db, evalType = x$EVAL_TYP[[1]], method, mr)
## Prep tables ---------------------------------------------------------------
## Make sure all variables specified exist in their respective dataset, and we
## have no duplicates
if ('TREE_BASIS' %in% names(x)) {
x.id.vars <- c('SUBP', 'TREE')
} else {
x.id.vars <- 'CONDID'
}
xVars <- rlang::enquos(xVars)
xGrpBy <- rlang::enquos(xGrpBy)
x <- dplyr::select(x, PLT_CN,
dplyr::all_of(x.id.vars),
dplyr::any_of(c('TREE_BASIS', 'AREA_BASIS')),
## Extras for vitalRates
dplyr::any_of(c('ONEORTWO')),
!!!xVars, !!!xGrpBy) %>%
dplyr::distinct()
if (!is.null(y)) {
if ('TREE_BASIS' %in% names(y)) {
y.id.vars <- c('SUBP', 'TREE')
} else {
y.id.vars <- 'CONDID'
}
yVars <- rlang::enquo(yVars)
yGrpBy <- rlang::enquos(yGrpBy)
y <- dplyr::select(y, PLT_CN,
dplyr::all_of(y.id.vars),
dplyr::any_of(c('TREE_BASIS', 'AREA_BASIS')),
!!yVars, !!!yGrpBy) %>%
dplyr::distinct()
}
## Sum variable(s) up to plot-level ------------------------------------------
# convert to character
xGrpBy <- names(dplyr::select(x, !!!xGrpBy))
if (!is.null(y)) {
yGrpBy <- names(dplyr::select(y, !!!yGrpBy))
# Make sure all denominator groups are a subset of numerator groups
if (!all(yGrpBy %in% xGrpBy)) {
stop('All grouping variables listed in `yGrpBy` must be included in `xGrpBy`. More specifically, `yGrpBy` must be equal to or a subset of `xGrpBy`. ')
}
}
# Sum to plot
xPlt <- sumToPlot(x, pops, xGrpBy)
if (!is.null(y)) yPlt <- sumToPlot(y, pops, yGrpBy)
## Sum up to estimation unit -------------------------------------------------
if (!is.null(y)) {
## Adding YEAR to groups
xGrpBy <- c('YEAR', xGrpBy)
yGrpBy <- c('YEAR', yGrpBy)
## Sum variable(s) up to strata then estimation unit level
eu.sums <- sumToEU(db, xPlt, yPlt, pops, xGrpBy, yGrpBy, method = method)
xEst <- eu.sums$x
yEst <- eu.sums$y
} else {
## Adding YEAR to groups
xGrpBy <- c('YEAR', xGrpBy)
## Sum variable(s) up to strata then estimation unit level
eu.sums <- sumToEU(db,
x = xPlt,
y = NULL,
pops = pops,
x.grpBy = xGrpBy,
y.grpBy = NULL,
method = method)
xEst <- eu.sums$x
}
## List of names for each output variable
## Combine most-recent population estimates across states with potentially
## different reporting schedules, i.e., if 2016 is most recent in MI and 2017 is
## most recent in WI, combine them and label as 2017
if (mr) {
xEst <- combineMR(xEst, xGrpBy)
if (!is.null(y)) yEst <- combineMR(yEst, yGrpBy)
}
## Totals and ratios -------------------------------------------------------
if (!is.null(y)) {
# print(names(yEst))
# print(stringr::str_sub(names(yEst), -5, -1))
# print(names(yEst)[stringr::str_sub(names(yEst), -5, -1) == '_mean'])
# return(yEst)
## For variable scoping w/ dplyr
xGrpSyms <- dplyr::syms(xGrpBy)
yGrpSyms <- dplyr::syms(yGrpBy)
xTotalSyms <- dplyr::syms(names(xEst)[stringr::str_sub(names(xEst), -5, -1) == '_mean'])
xVarSyms <- dplyr::syms(names(xEst)[stringr::str_sub(names(xEst), -4, -1) == '_var'])
xCovSyms <- dplyr::syms(names(xEst)[stringr::str_sub(names(xEst), -3, -1) == '_cv'])
yTotalSyms <- dplyr::sym(names(yEst)[stringr::str_sub(names(yEst), -5, -1) == '_mean'])
yVarSyms <- dplyr::sym(names(yEst)[stringr::str_sub(names(yEst), -4, -1) == '_var'])
ratioSyms <- dplyr::syms(stringr::str_c(names(xEst)[stringr::str_sub(names(xEst), -5, -1) == '_mean'], '_RATIO'))
ratioVarSyms <- dplyr::syms(stringr::str_c(names(xEst)[stringr::str_sub(names(xEst), -5, -1) == '_mean'], '_RATIO_VAR'))
## Sum over estimation units
xEst <- xEst %>%
dplyr::select(-c(ESTN_UNIT_CN, AREA_USED)) %>%
dplyr::group_by(!!!xGrpSyms) %>%
dplyr::summarize(dplyr::across(dplyr::everything(), sum, na.rm = TRUE))
yEst <- yEst %>%
dplyr::select(-c(ESTN_UNIT_CN, AREA_USED, P2PNTCNT_EU)) %>%
dplyr::group_by( !!!yGrpSyms) %>%
dplyr::summarize(dplyr::across(dplyr::everything(), sum, na.rm = TRUE))
## Join numerator/denominator, compute ratios
out <- left_join(xEst, yEst, by = yGrpBy) %>%
## Compute ratio point estimates
dplyr::mutate(dplyr::across(c(!!!xTotalSyms),
.fns = ~ .x / !!yTotalSyms,
.names = "{.col}_RATIO")) %>%
## Now ratio variances
dplyr::mutate(dplyr::across(c(!!!xTotalSyms),
.fns = ~ (1 / ((!!yTotalSyms)^2)) * (get(stringr::str_c(stringr::str_sub(dplyr::cur_column(), 1, -6), '_var')) + ((.x/!!yTotalSyms)^2 * !!yVarSyms) - (2 * (.x/!!yTotalSyms) * get(stringr::str_c(stringr::str_sub(dplyr::cur_column(), 1, -6), '_cv'))) ),
.names = "{.col}_RATIO_VAR")) %>%
## When we only have one non-zero plot, we'll sometimes get extremely
## small negative values from rounding errors. Make these zero
dplyr::mutate(dplyr::across(c(!!!ratioVarSyms),
.fns = ~ case_when(.x < 0 ~ 0,
TRUE ~ .x))) %>%
## Sampling error for all variables
dplyr::mutate(dplyr::across(c(!!!xTotalSyms),
.fns = ~ sqrt(get(stringr::str_c(stringr::str_sub(dplyr::cur_column(), 1, -6), '_var'))) / abs(.x) * 100,
.names = "{.col}_SE")) %>%
dplyr::mutate(dplyr::across(c(!!!ratioSyms),
.fns = ~ sqrt(get(stringr::str_c(dplyr::cur_column(), '_VAR'))) / abs(.x) * 100,
.names = "{.col}_SE"))
## Format output logically
out <- formatNames(out, xGrpBy)
} else {
xGrpSyms <- dplyr::syms(xGrpBy)
out <- xEst %>%
dplyr::select(-c(ESTN_UNIT_CN, AREA_USED)) %>%
dplyr::group_by(!!!xGrpSyms) %>%
dplyr::summarize(dplyr::across(dplyr::everything(), sum, na.rm = TRUE))
out <- formatNames(out, xGrpBy)
}
## Drop totals unless told not to
if (!totals) {
out <- out[,!stringr::str_detect(names(out), '_TOTAL')]
}
## Select either variance or SE, depending on input
if (variance) {
out <- out[,!stringr::str_detect(names(out), '_SE')]
} else {
out <- out[,!stringr::str_detect(names(out), '_VAR')]
}
## Pretty output
out <- out %>%
dplyr::ungroup() %>%
dplyr::mutate_if(is.factor, as.character) %>%
tidyr::drop_na(!!!xGrpSyms) %>%
dplyr::arrange(YEAR) %>%
as_tibble()
return(out)
}
formatNames <- function(x, grpBy) {
## x is the dataframe that we want to format the names of, linked to sumToEU
nms <- names(x)
## totals
total.slots <- stringr::str_sub(nms, -5, -1) == '_mean'
names(x)[total.slots] <- stringr::str_c(stringr::str_sub(nms[total.slots], 1, -6), '_TOTAL')
total.syms <- dplyr::syms(names(x)[total.slots])
## total variances
total.var.slots <- stringr::str_sub(nms, -4, -1) == '_var'
names(x)[total.var.slots] <- stringr::str_c(stringr::str_sub(nms[total.var.slots], 1, -5), '_TOTAL_VAR')
total.var.syms <- dplyr::syms(names(x)[total.var.slots])
## total sampling errors
total.se.slots <- stringr::str_sub(nms, -8, -1) == '_mean_SE'
names(x)[total.se.slots] <- stringr::str_remove(nms[total.se.slots], '_mean')
total.se.syms <- dplyr::syms(names(x)[total.se.slots])
## ratios
ratio.slots <- stringr::str_sub(nms, -11, -1) == '_mean_RATIO'
names(x)[ratio.slots] <- stringr::str_c(stringr::str_sub(nms[ratio.slots], 1, -12), '_RATIO')
ratio.syms <- dplyr::syms(names(x)[ratio.slots])
## ratio variances
ratio.var.slots <- stringr::str_sub(nms, -15, -1) == '_mean_RATIO_VAR'
names(x)[ratio.var.slots] <- stringr::str_c(stringr::str_sub(nms[ratio.var.slots], 1, -16), '_RATIO_VAR')
ratio.var.syms <- dplyr::syms(names(x)[ratio.var.slots])
## ratio sampling errors
ratio.se.slots <- stringr::str_sub(nms, -14, -1) == '_mean_RATIO_SE'
names(x)[ratio.se.slots] <- stringr::str_remove(nms[ratio.se.slots], '_mean')
ratio.se.syms <- dplyr::syms(names(x)[ratio.se.slots])
## Design information
names(x)[names(x) == 'P2PNTCNT_EU'] <- 'N'
names(x)[names(x) == 'nPlots.x'] <- 'nPlots_x'
names(x)[names(x) == 'nPlots.y'] <- 'nPlots_y'
## Select and order
grpSyms <- dplyr::syms(grpBy)
x <- x %>%
dplyr::select(!!!grpSyms,
!!!ratio.syms,
!!!total.syms,
!!!ratio.se.syms,
!!!total.se.syms,
!!!ratio.var.syms,
!!!total.var.syms,
dplyr::any_of(c('nPlots_x', 'nPlots_y')),
N)
return(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.