R/functionsFIA.R

Defines functions sumToEU sumToPlot ratioVar findEVALID getFHM readFHM str.Remote.FIA.Database str.FIA.Database print.Remote.FIA.Database summary.Remote.FIA.Database print.FIA.Database summary.FIA.Database stratVar vrAttHelper unitMean rVar unitVarNew unitVar unitVarDT grmAdj adjHelper structHelper basalArea makeClasses ema areal_par divIndex matchColClasses projectPoints projectPnts skewness filterAnnual combineMR maWeights annualStrataHelper mergeSmallStrata mergeSmallStrata_old handlePops handlePops_old udSeedDomain udTreeDomain udAreaDomain typeDomain_grow treeTypeDomain landTypeDomain handleWY handleTX grpByToChar readRemoteHelper dropStatesOutsidePolys arealSumPrep2 arealSumPrep1 checkMR remoteIter .onAttach

Documented in findEVALID makeClasses

#### A Start up Message ------------
.onAttach <- function(lib, pkg) {
  if(interactive() || getOption("verbose"))
    packageStartupMessage(sprintf("Package %s (%s) loaded. Check out our website at https://rfia.netlify.app/.\nType citation(\"%s\") for examples of how to cite rFIA.\n", pkg,
                                  packageDescription(pkg)$Version, pkg))
}




#' @import dtplyr
#' @import dplyr
#' @import methods
#' @import sf
#' @import stringr
#' @import bit64
#' @import tidyselect
#' @import rlang
#' @importFrom data.table fread fwrite rbindlist fifelse
#' @importFrom parallel makeCluster detectCores mclapply parLapply stopCluster clusterEvalQ
#' @import tidyr
#' @import ggplot2
#' @importFrom stats cov var coef lm na.omit quantile
#' @importFrom utils adist object.size read.csv tail globalVariables type.convert download.file unzip packageDescription
NULL


## Iterator if db is remote
remoteIter <- function(db, remote){
  if (remote){

    iter <- db$states

    ## In memory
  } else {
    ## Some warnings
    if (class(db) != "FIA.Database"){
      stop('db must be of class "FIA.Database". Use readFIA() to load your FIA data.')
    }

    ## an iterator for remote
    iter <- 1
  }

  return(iter)
}

## Check most recent and handle remote dbs
checkMR <- function(db, remote){
  if (remote){
    if ('mostRecent' %in% names(db)){
      mr = db$mostRecent # logical
    } else {
      mr = FALSE
    }
    ## In-memory
  } else {
    if ('mostRecent' %in% names(db)){
      mr = TRUE
    } else {
      mr = FALSE
    }
  }
  return(mr)
}

## Prep database for areal summary
arealSumPrep1 <- function(polys){

  if(!is.null(polys)) {
    # Convert polygons to an sf object
    polys <- polys %>%
      as('sf')%>%
      dplyr::mutate_if(is.factor,
                       as.character)
    ## A unique ID
    polys$polyID <- 1:nrow(polys)
  }

  return(polys)
}

## Do the spatial intersection of plots w/ polgyons
arealSumPrep2 <- function(db, grpBy, polys, nCores, remote){

  ## Make plot data spatial, projected same as polygon layer
  pltSF <- dplyr::select(db$PLOT, c('LON', 'LAT', pltID)) %>%
    dplyr::filter(!is.na(LAT) & !is.na(LON)) %>%
    dplyr::distinct(pltID, .keep_all = TRUE) %>%
    sf::st_as_sf(coords = c('LON', 'LAT'))
  sf::st_crs(pltSF) <- 4326
  pltSF <- pltSF %>%
    sf::st_transform(crs = sf::st_crs(polys))

  ## Crop the polygon to the bounding box of the FIA plots first
  suppressWarnings({suppressMessages({
    polys <- sf::st_crop(polys, sf::st_bbox(pltSF))
  })})

  ## Split up polys
  polyList <- split(polys, as.factor(polys$polyID))
  suppressWarnings({suppressMessages({
    ## Compute estimates in parallel -- Clusters in windows, forking otherwise
    if (Sys.info()['sysname'] == 'Windows'){
      cl <- parallel::makeCluster(nCores)
      parallel::clusterEvalQ(cl, {
        library(dplyr)
        library(stringr)
        library(rFIA)
        library(sf)
      })
      out <- parallel::parLapply(cl, X = names(polyList), fun = areal_par, pltSF, polyList)
      #stopCluster(cl) # Keep the cluster active for the next run
    } else { # Unix systems
      out <- parallel::mclapply(names(polyList), FUN = areal_par, pltSF, polyList, mc.cores = nCores)
    }
  })})
  pltSF <- dplyr::bind_rows(out)

  # A warning
  if (length(unique(pltSF$pltID)) < 1){
    if (!remote) {
      stop('No plots in db overlap with polys.')
    } else {
      return()
    }
  }

  ## Add polygon names to PLOT
  db$PLOT <- db$PLOT %>%
    dplyr::select(-c(any_of(names(polys)))) %>%
    dplyr::left_join(pltSF, by = 'pltID')

  return(db)

}

## If a state missing a ROI completely, skip it when remote
## Primary purpose is to throw a warning when no overlap is present
dropStatesOutsidePolys <- function(x) {
  x <- x[x != 'no plots in polys']

  if (length(x) < 1) {
    stop('No plots in db overlap with polys.')
  }

  return(x)

}

## Helper function to read remote database by state within
## estimator functions
readRemoteHelper <- function(x, db, remote, reqTables, nCores){
  if (remote){
    ## Store the original parameters here
    params <- db

    ## Read in one state at a time
    db <- readFIA(dir = db$dir, common = db$common,
                  tables = reqTables, states = x, ## x is the vector of state names
                  nCores = nCores)

    ## If a clip was specified, run it now
    if ('mostRecent' %in% names(params)){
      db <- clipFIA(db, mostRecent = params$mostRecent,
                    mask = params$mask, matchEval = params$matchEval,
                    evalid = params$evalid, designCD = params$designCD,
                    nCores = nCores)
    }

  } else {
    if (sum(reqTables %in% names(db)) < length(reqTables)) {
      missing.tables <- reqTables[!c(reqTables %in% names(db))]
      stop(paste(paste (as.character(missing.tables), collapse = ', '), 'tables not found in object `db`.'))
    }
    ## Really only want the required tables
    db <- db[names(db) %in% reqTables]
  }

  return(db)
}


## Convert grpBy from quos to character and make sure columns exist
grpByToChar <- function(db, grpBy_quo){

  ## If tree table exists
  if ('TREE' %in% names(db)){
    # Probably cheating, but it works
    if (dplyr::quo_name(grpBy_quo) != 'NULL'){
      ## Have to join tables to run select with this object type
      plt_quo <- dplyr::filter(db$PLOT, !is.na(PLT_CN))
      ## We want a unique error message here to tell us when columns are not present in data
      d_quo <- tryCatch(
        error = function(cnd) {
          return(0)
        },
        plt_quo[10,] %>% # Just the first row
          dplyr::left_join(dplyr::select(db$COND, PLT_CN, names(db$COND)[names(db$COND) %in% names(db$PLOT) == FALSE]), by = 'PLT_CN') %>%
          dplyr::inner_join(dplyr::select(db$TREE, PLT_CN, names(db$TREE)[names(db$TREE) %in% c(names(db$PLOT), names(db$COND)) == FALSE]), by = 'PLT_CN') %>%
          dplyr::select(!!grpBy_quo)
      )

      # If column doesnt exist, just returns 0, not a dataframe
      if (is.null(nrow(d_quo))){
        grpName <- dplyr::quo_name(grpBy_quo)
        stop(paste('Columns', grpName, 'not found in PLOT, TREE, or COND tables. Did you accidentally quote the variables names? e.g. use grpBy = ECOSUBCD (correct) instead of grpBy = "ECOSUBCD". ', collapse = ', '))
      } else {
        # Convert to character
        grpBy <- names(d_quo)
      }
    } else {
      grpBy <- NULL
    }
  } else if ('SEEDLING' %in% names(db)) {
    # Probably cheating, but it works
    if (dplyr::quo_name(grpBy_quo) != 'NULL'){
      ## Have to join tables to run select with this object type
      plt_quo <- dplyr::filter(db$PLOT, !is.na(PLT_CN))
      ## We want a unique error message here to tell us when columns are not present in data
      d_quo <- tryCatch(
        error = function(cnd) {
          return(0)
        },
        plt_quo[10,] %>% # Just the first row
          dplyr::left_join(dplyr::select(db$COND, PLT_CN, names(db$COND)[names(db$COND) %in% names(db$PLOT) == FALSE]), by = 'PLT_CN') %>%
          dplyr::inner_join(dplyr::select(db$SEEDLING, PLT_CN, names(db$SEEDLING)[names(db$SEEDLING) %in% c(names(db$PLOT), names(db$COND)) == FALSE]), by = 'PLT_CN') %>%
          dplyr::select(!!grpBy_quo)
      )

      # If column doesnt exist, just returns 0, not a dataframe
      if (is.null(nrow(d_quo))){
        grpName <- dplyr::quo_name(grpBy_quo)
        stop(paste('Columns', grpName, 'not found in PLOT, SEEDLING, or COND tables. Did you accidentally quote the variables names? e.g. use grpBy = ECOSUBCD (correct) instead of grpBy = "ECOSUBCD". ', collapse = ', '))
      } else {
        # Convert to character
        grpBy <- names(d_quo)
      }
    } else {
      grpBy <- NULL
    }
  } else {
    # Probably cheating, but it works
    if (dplyr::quo_name(grpBy_quo) != 'NULL'){
      ## Have to join tables to run select with this object type
      plt_quo <- dplyr::filter(db$PLOT, !is.na(PLT_CN))
      ## We want a unique error message here to tell us when columns are not present in data
      d_quo <- tryCatch(
        error = function(cnd) {
          return(0)
        },
        plt_quo[10,] %>% # Just the first row
          dplyr::left_join(dplyr::select(db$COND, PLT_CN, names(db$COND)[names(db$COND) %in% names(db$PLOT) == FALSE]), by = 'PLT_CN') %>%
          dplyr::select(!!grpBy_quo)
      )

      # If column doesnt exist, just returns 0, not a dataframe
      if (is.null(nrow(d_quo))){
        grpName <- dplyr::quo_name(grpBy_quo)
        stop(paste('Columns', grpName, 'not found in PLOT or COND tables. Did you accidentally quote the variables names? e.g. use grpBy = ECOSUBCD (correct) instead of grpBy = "ECOSUBCD". ', collapse = ', '))
      } else {
        # Convert to character
        grpBy <- names(d_quo)
      }
    } else {
      grpBy <- NULL
    }
  }

  return(grpBy)

}

## Drop all inventories that are specific to east or west tx. Only retain evals
## that span the entire state
handleTX <- function(db){

  if (any(db$POP_EVAL$STATECD %in% 48)){

    badIDS <- db$POP_PLOT_STRATUM_ASSGN %>%
      dplyr::filter(STATECD %in% 48) %>%
      dplyr::distinct(EVALID, UNITCD) %>%
      dplyr::group_by(EVALID) %>%
      dplyr::summarise(n = dplyr::n()) %>%
      dplyr::ungroup() %>%
      dplyr::filter(n != 7) ## i.e., only keep EVALIDs w/out all 7 units

    db$POP_EVAL <- dplyr::filter(db$POP_EVAL, !c(EVALID %in% badIDS$EVALID))

  }

  return(db)
}

## As of Apr 2021, WY labels 2018 and 2019 inventories as 2020. This breaks rFIA,
## so we manually reset these labels to their appropriate values here
handleWY <- function(db){
  if ('POP_EVAL' %in% names(db)) {
    db$POP_EVAL <- db$POP_EVAL %>%
      dplyr::mutate(END_INVYR = dplyr::case_when(EVALID %in% c(561800, 561801, 561803, 561807) ~ as.numeric(2018),
                                                 EVALID %in% c(561900, 561901, 561903, 561907, 561909, 561910) ~ as.numeric(2019),
                                                 TRUE ~ as.numeric(END_INVYR)))
  }
  return(db)
}

## Land type domain indicator
landTypeDomain <- function(landType, COND_STATUS_CD, SITECLCD, RESERVCD) {
  if (tolower(landType) == 'forest'){
    landD <- ifelse(COND_STATUS_CD == 1, 1, 0)
  } else if (tolower(landType) == 'timber'){
    landD <- ifelse(COND_STATUS_CD == 1 & SITECLCD %in% c(1, 2, 3, 4, 5, 6) & RESERVCD == 0, 1, 0)
  } else if (tolower(landType) == 'non-forest'){
    landD <- ifelse(COND_STATUS_CD == 2, 1, 0)
  } else if (tolower(landType) == 'water'){
    landD <- ifelse(COND_STATUS_CD == 3 | COND_STATUS_CD == 4, 1, 0)
  } else if (tolower(landType) == 'census water'){
    landD <- ifelse(COND_STATUS_CD == 3, 1, 0)
  } else if (tolower(landType) == 'non-census water'){
    landD <- ifelse(COND_STATUS_CD == 4, 1, 0)
  } else if (tolower(landType) == 'all') {
    landD <- 1
  }

  return(landD)
}

## Tree type domain indicator
treeTypeDomain <- function(treeType, STATUSCD, DIA, TREECLCD) {
  if (tolower(treeType) == 'live'){
    typeD <- data.table::fifelse(STATUSCD == 1, 1, 0)
  } else if (tolower(treeType) == 'dead'){
    typeD <- data.table::fifelse(STATUSCD == 2, 1, 0)
  } else if (tolower(treeType) == 'gs'){
    typeD <- data.table::fifelse(STATUSCD == 1 & DIA >= 5 & TREECLCD == 2, 1, 0)
  } else if (tolower(treeType) == 'all'){
    typeD <- 1
  }
  return(typeD)
}

typeDomain_grow <- function(db, treeType, landType, type, stateVar = NULL) {

  if (type == 'vr'){
    if (tolower(landType) == 'forest'){
      db$COND$landD <- ifelse(db$COND$COND_STATUS_CD == 1, 1, 0)
      # Tree Type domain indicator
      if (tolower(treeType) %in% c('live', 'all')){
        db$TREE$typeD <- 1
        ## Rename some variables in grm
        db$TREE_GRM_COMPONENT <- dplyr::rename(db$TREE_GRM_COMPONENT,
                                               TPAMORT_UNADJ = SUBP_TPAMORT_UNADJ_AL_FOREST,
                                               TPAREMV_UNADJ = SUBP_TPAREMV_UNADJ_AL_FOREST,
                                               TPAGROW_UNADJ = SUBP_TPAGROW_UNADJ_AL_FOREST,
                                               SUBPTYP_GRM = SUBP_SUBPTYP_GRM_AL_FOREST,
                                               COMPONENT = SUBP_COMPONENT_AL_FOREST)

      } else if (tolower(treeType) == 'gs'){
        db$TREE$typeD <- 1 #ifelse(db$TREE$DIA >= 5, 1, 0)
        db$TREE_GRM_COMPONENT <- dplyr::rename(db$TREE_GRM_COMPONENT,
                                               TPAMORT_UNADJ = SUBP_TPAMORT_UNADJ_GS_FOREST,
                                               TPAREMV_UNADJ = SUBP_TPAREMV_UNADJ_GS_FOREST,
                                               TPAGROW_UNADJ = SUBP_TPAGROW_UNADJ_GS_FOREST,
                                               SUBPTYP_GRM = SUBP_SUBPTYP_GRM_GS_FOREST,
                                               COMPONENT = SUBP_COMPONENT_GS_FOREST)
      }
    } else if (tolower(landType) == 'timber'){
      db$COND$landD <- ifelse(db$COND$COND_STATUS_CD == 1 & db$COND$SITECLCD %in% c(1, 2, 3, 4, 5, 6) & db$COND$RESERVCD == 0, 1, 0)
      # Tree Type domain indicator
      if (tolower(treeType)  %in% c('live', 'all')){
        db$TREE$typeD <- 1
        ## Rename some variables in grm
        db$TREE_GRM_COMPONENT <- dplyr::rename(db$TREE_GRM_COMPONENT,
                                               TPAMORT_UNADJ = SUBP_TPAMORT_UNADJ_AL_TIMBER,
                                               TPAREMV_UNADJ = SUBP_TPAREMV_UNADJ_AL_TIMBER,
                                               TPAGROW_UNADJ = SUBP_TPAGROW_UNADJ_AL_TIMBER,
                                               SUBPTYP_GRM = SUBP_SUBPTYP_GRM_AL_TIMBER,
                                               COMPONENT = SUBP_COMPONENT_AL_TIMBER)

      } else if (tolower(treeType) == 'gs'){
        db$TREE$typeD <- 1 #ifelse(db$TREE$DIA >= 5, 1, 0)
        db$TREE_GRM_COMPONENT <- dplyr::rename(db$TREE_GRM_COMPONENT,
                                               TPAMORT_UNADJ = SUBP_TPAMORT_UNADJ_GS_TIMBER,
                                               TPAREMV_UNADJ = SUBP_TPAREMV_UNADJ_GS_TIMBER,
                                               TPAGROW_UNADJ = SUBP_TPAGROW_UNADJ_GS_TIMBER,
                                               SUBPTYP_GRM = SUBP_SUBPTYP_GRM_GS_TIMBER,
                                               COMPONENT = SUBP_COMPONENT_GS_TIMBER)
      }
    }


    # ## If 'live' then we exclude trees that have died or recruited during remeasurement
    # if (tolower(treeType) == 'live') {
    #   status <- db$TREE %>%
    #     select(CN, PREV_TRE_CN, STATUSCD) %>%
    #     left_join(select(db$TREE, CN, STATUSCD),
    #               by = c('PREV_TRE_CN' = 'CN'),
    #               suffix = c('1', '2')) %>%
    #     ## If statuscd is ever anything but 1,
    #     ## exclude it from analysis
    #     mutate(typeD = case_when(STATUSCD1 == 1 & STATUSCD2 == 1 ~ 1,
    #                              TRUE ~ 0))
    #   db$TREE$typeD <- status$typeD
    # }

  } else if (type == 'gm') {
    ## Build domain indicator function which is 1 if observation meets criteria, and 0 otherwise
    # Land type domain indicator
    if (tolower(landType) == 'forest'){
      db$COND$landD <- ifelse(db$COND$COND_STATUS_CD == 1, 1, 0)

      # Tree Type domain indicator
      if (toupper(stateVar) %in% c('SAWVOL', 'SAWVOL_BF')) {
        db$TREE$typeD <- 1
        ## Rename some variables in grm
        db$TREE_GRM_COMPONENT <- dplyr::rename(db$TREE_GRM_COMPONENT,
                                               TPAMORT_UNADJ = SUBP_TPAMORT_UNADJ_SL_FOREST,
                                               TPAREMV_UNADJ = SUBP_TPAREMV_UNADJ_SL_FOREST,
                                               TPAGROW_UNADJ = SUBP_TPAGROW_UNADJ_SL_FOREST,
                                               SUBPTYP_GRM = SUBP_SUBPTYP_GRM_SL_FOREST,
                                               COMPONENT = SUBP_COMPONENT_SL_FOREST) %>%
          dplyr::mutate(TPARECR_UNADJ = dplyr::case_when(
            is.na(COMPONENT) ~ NA_real_,
            COMPONENT %in% c('INGROWTH', 'CUT2', 'MORTALITY2') ~ TPAGROW_UNADJ,
            TRUE ~ 0))


      } else if (tolower(treeType) == 'all'){
        db$TREE$typeD <- 1
        ## Rename some variables in grm
        db$TREE_GRM_COMPONENT <- dplyr::rename(db$TREE_GRM_COMPONENT,
                                               TPAMORT_UNADJ = SUBP_TPAMORT_UNADJ_AL_FOREST,
                                               TPAREMV_UNADJ = SUBP_TPAREMV_UNADJ_AL_FOREST,
                                               TPAGROW_UNADJ = SUBP_TPAGROW_UNADJ_AL_FOREST,
                                               SUBPTYP_GRM = SUBP_SUBPTYP_GRM_AL_FOREST,
                                               COMPONENT = SUBP_COMPONENT_AL_FOREST) %>%
          dplyr::mutate(TPARECR_UNADJ = dplyr::case_when(
            is.na(COMPONENT) ~ NA_real_,
            COMPONENT %in% c('INGROWTH', 'CUT2', 'MORTALITY2') ~ TPAGROW_UNADJ,
            TRUE ~ 0))

      } else if (tolower(treeType) == 'gs') {
        # db$TREE <- db$TREE %>%
        #   mutate(typeD = case_when(
        #     STATUSCD %in% 1:2 & DIA >=5 ~ 1,
        #     STATUSCD == 3 & PREVDIA >=5 ~ 1,
        #     TRUE ~ 0))
        db$TREE$typeD <- 1
        db$TREE_GRM_COMPONENT <- dplyr::rename(db$TREE_GRM_COMPONENT,
                                               TPAMORT_UNADJ = SUBP_TPAMORT_UNADJ_GS_FOREST,
                                               TPAREMV_UNADJ = SUBP_TPAREMV_UNADJ_GS_FOREST,
                                               TPAGROW_UNADJ = SUBP_TPAGROW_UNADJ_GS_FOREST,
                                               SUBPTYP_GRM = SUBP_SUBPTYP_GRM_GS_FOREST,
                                               COMPONENT = SUBP_COMPONENT_GS_FOREST)%>%
          dplyr::mutate(TPARECR_UNADJ = dplyr::case_when(
            is.na(COMPONENT) ~ NA_real_,
            COMPONENT %in% c('INGROWTH', 'CUT2', 'MORTALITY2') ~ TPAGROW_UNADJ,
            TRUE ~ 0))
      }

    } else if (tolower(landType) == 'timber'){
      db$COND$landD <- ifelse(db$COND$COND_STATUS_CD == 1 & db$COND$SITECLCD %in% c(1, 2, 3, 4, 5, 6) & db$COND$RESERVCD == 0, 1, 0)

      ## A general fix for land domain issues on timberland, i.e., trees on non-timberland being included
      db$TREE <- db$TREE %>%
        dplyr::left_join(dplyr::select(db$COND, PLT_CN, CONDID, landD), by = c('PLT_CN', 'CONDID')) %>%
        dplyr::mutate(typeD = landD) %>%
        dplyr::select(-c('landD'))

      # Tree Type domain indicator
      if (toupper(stateVar) %in% c('SAWVOL', 'SAWVOL_BF')) {
        ## Rename some variables in grm
        db$TREE_GRM_COMPONENT <- dplyr::rename(db$TREE_GRM_COMPONENT,
                                               TPAMORT_UNADJ = SUBP_TPAMORT_UNADJ_SL_TIMBER,
                                               TPAREMV_UNADJ = SUBP_TPAREMV_UNADJ_SL_TIMBER,
                                               TPAGROW_UNADJ = SUBP_TPAGROW_UNADJ_SL_TIMBER,
                                               SUBPTYP_GRM = SUBP_SUBPTYP_GRM_SL_TIMBER,
                                               COMPONENT = SUBP_COMPONENT_SL_TIMBER) %>%
          dplyr::mutate(TPARECR_UNADJ = dplyr::case_when(
            is.na(COMPONENT) ~ NA_real_,
            COMPONENT %in% c('INGROWTH', 'CUT2', 'MORTALITY2') ~ TPAGROW_UNADJ,
            TRUE ~ 0))


      } else if (tolower(treeType) == 'all'){
        ## Rename some variables in grm
        db$TREE_GRM_COMPONENT <- dplyr::rename(db$TREE_GRM_COMPONENT,
                                               TPAMORT_UNADJ = SUBP_TPAMORT_UNADJ_AL_TIMBER,
                                               TPAREMV_UNADJ = SUBP_TPAREMV_UNADJ_AL_TIMBER,
                                               TPAGROW_UNADJ = SUBP_TPAGROW_UNADJ_AL_TIMBER,
                                               SUBPTYP_GRM = SUBP_SUBPTYP_GRM_AL_TIMBER,
                                               COMPONENT = SUBP_COMPONENT_AL_TIMBER)%>%
          dplyr::mutate(TPARECR_UNADJ = dplyr::case_when(
            is.na(COMPONENT) ~ NA_real_,
            COMPONENT %in% c('INGROWTH', 'CUT2', 'MORTALITY2') ~ TPAGROW_UNADJ,
            TRUE ~ 0))

      } else if (tolower(treeType) == 'gs'){
        # db$TREE <- db$TREE %>%
        #   mutate(typeD = case_when(
        #     STATUSCD %in% 1:2 & DIA >=5 ~ 1,
        #     STATUSCD == 3 & PREVDIA >=5 ~ 1,
        #     TRUE ~ 0))
        db$TREE_GRM_COMPONENT <- dplyr::rename(db$TREE_GRM_COMPONENT,
                                               TPAMORT_UNADJ = SUBP_TPAMORT_UNADJ_GS_TIMBER,
                                               TPAREMV_UNADJ = SUBP_TPAREMV_UNADJ_GS_TIMBER,
                                               TPAGROW_UNADJ = SUBP_TPAGROW_UNADJ_GS_TIMBER,
                                               SUBPTYP_GRM = SUBP_SUBPTYP_GRM_GS_TIMBER,
                                               COMPONENT = SUBP_COMPONENT_GS_TIMBER )%>%
          dplyr::mutate(TPARECR_UNADJ = dplyr::case_when(
            is.na(COMPONENT) ~ NA_real_,
            COMPONENT %in% c('INGROWTH', 'CUT2', 'MORTALITY2') ~ TPAGROW_UNADJ,
            TRUE ~ 0))

      }
    }
  }



  return(db)
}

## Build domain indicator for UD area domain
udAreaDomain <- function(db, areaDomain) {

  ## Only evaluate if we need to
  if (!rlang::quo_is_null(areaDomain)) {
    ## We'll join up PLOT and COND, and evaluate in the context of the joined table
    plt <- db$PLOT %>%
      dplyr::filter(PLOT_STATUS_CD == 1)
    cnd <- db$COND %>%
      dplyr::filter(COND_STATUS_CD == 1)


    pcEval <- dplyr::left_join(plt, dplyr::select(cnd, -c('STATECD', 'UNITCD', 'COUNTYCD', 'INVYR', 'PLOT')), by = 'PLT_CN')
    pcEval$aD <- rlang::eval_tidy(areaDomain, pcEval) ## LOGICAL, THIS IS THE DOMAIN INDICATOR
    if(!is.null(pcEval$aD)) pcEval$aD[is.na(pcEval$aD)] <- 0 # Make NAs 0s. Causes bugs otherwise
    if(is.null(pcEval$aD)) pcEval$aD <- 1 # IF NULL IS GIVEN, THEN ALL VALUES TRUE
    pcEval$aD <- as.numeric(pcEval$aD)
    ## Any conditions involving variables in the PLOT table will be extended to the
    ## domain indicator in the condition table, no need to "upscale" domain indicator
    ## to the PLOT table
    db$COND <- db$COND %>%
      dplyr::left_join(dplyr::select(pcEval, c('PLT_CN', 'CONDID', 'aD')), by = c('PLT_CN', 'CONDID'))
  } else {
    db$COND$aD <- 1
  }


    #dplyr::mutate(aD_c = aD)
  # aD_p <- pcEval %>%
  #   dtplyr::lazy_dt() %>%
  #   dplyr::group_by(PLT_CN) %>%
  #   dplyr::summarize(aD_p = as.numeric(any(aD > 0))) %>%
  #   as.data.frame()
  # db$PLOT <- dplyr::left_join(db$PLOT, aD_p, by = 'PLT_CN')

  return(db)
}

## Build domain indicator for UD area domain
udTreeDomain <- function(db, treeDomain) {

  if (!rlang::quo_is_null(treeDomain)) {
    tD <- rlang::eval_tidy(treeDomain, db$TREE) ## LOGICAL, THIS IS THE DOMAIN INDICATOR
    if(!is.null(tD)) tD[is.na(tD)] <- 0 # Make NAs 0s. Causes bugs otherwise
    if(is.null(tD)) tD <- 1 # IF NULL IS GIVEN, THEN ALL VALUES TRUE
    db$TREE$tD <- as.numeric(tD)
  } else {
    db$TREE$tD <- 1
  }

  return(db)
}

## Build domain indicator for UD area domain
udSeedDomain <- function(db, treeDomain) {

  tD <- rlang::eval_tidy(treeDomain, db$SEEDLING) ## LOGICAL, THIS IS THE DOMAIN INDICATOR
  if(!is.null(tD)) tD[is.na(tD)] <- 0 # Make NAs 0s. Causes bugs otherwise
  if(is.null(tD)) tD <- 1 # IF NULL IS GIVEN, THEN ALL VALUES TRUE
  db$SEEDLING$tD <- as.numeric(tD)

  return(db)
}

## Handle population tables
handlePops_old <- function(db, evalType, method, mr, pltList = NULL, ga = FALSE){

  if (ga){
    ## What years are growth accounting years --> not all filled in
    ga <- db$POP_EVAL %>%
      group_by(END_INVYR) %>%
      summarize(ga = if_else(any(GROWTH_ACCT == 'Y'), 1, 0))

    ### Snag the EVALIDs that are needed
    db$POP_EVAL  <- db$POP_EVAL %>%
      left_join(ga, by = 'END_INVYR') %>%
      select('CN', 'END_INVYR', 'EVALID', 'ESTN_METHOD', 'GROWTH_ACCT', 'ga', STATECD) %>%
      left_join(select(db$POP_EVAL_TYP, c('EVAL_CN', 'EVAL_TYP')), by = c('CN' = 'EVAL_CN')) %>%
      filter(EVAL_TYP %in% c('EXPGROW', 'EXPMORT', 'EXPREMV')) %>%
      filter(!is.na(END_INVYR) & !is.na(EVALID) & END_INVYR >= 2003) %>%
      distinct(END_INVYR, EVALID, .keep_all = TRUE)
    gaVars <- c('GROWTH_ACCT')
  } else {
    ### Snag the EVALIDs that are needed
    db$POP_EVAL<- db$POP_EVAL %>%
      select('CN', 'END_INVYR', 'EVALID', 'ESTN_METHOD', STATECD) %>%
      inner_join(select(db$POP_EVAL_TYP, c('EVAL_CN', 'EVAL_TYP')), by = c('CN' = 'EVAL_CN')) %>%
      filter(EVAL_TYP %in% evalType) %>%
      filter(!is.na(END_INVYR) & !is.na(EVALID) & END_INVYR >= 2003) %>%
      distinct(END_INVYR, EVALID, .keep_all = TRUE)
    gaVars <- NULL
  }


  ## If a most-recent subset, make sure that we don't get two reporting years in
  ## western states
  if (mr) {
    db$POP_EVAL <- db$POP_EVAL %>%
      group_by(EVAL_TYP, STATECD) %>%
      filter(END_INVYR == max(END_INVYR, na.rm = TRUE)) %>%
      ungroup()
  }


  ## Cut STATECD
  db$POP_EVAL <- select(db$POP_EVAL, -c(STATECD))

  ### The population tables
  pops <- select(db$POP_EVAL, c('EVALID', 'ESTN_METHOD', 'CN', 'END_INVYR', 'EVAL_TYP', any_of(gaVars))) %>%
    rename(EVAL_CN = CN) %>%
    left_join(select(db$POP_ESTN_UNIT, c('CN', 'EVAL_CN', 'AREA_USED', 'P1PNTCNT_EU', any_of(c('p2eu', 'nStrata')))), by = c('EVAL_CN')) %>%
    rename(ESTN_UNIT_CN = CN) %>%
    left_join(select(db$POP_STRATUM, c('ESTN_UNIT_CN', 'EXPNS', 'P2POINTCNT', 'CN', 'P1POINTCNT', 'ADJ_FACTOR_SUBP', 'ADJ_FACTOR_MICR', "ADJ_FACTOR_MACR")), by = c('ESTN_UNIT_CN')) %>%
    rename(STRATUM_CN = CN) %>%
    distinct(EVALID, ESTN_UNIT_CN, STRATUM_CN, .keep_all = TRUE) %>%
    left_join(select(db$POP_PLOT_STRATUM_ASSGN, c('STRATUM_CN', 'PLT_CN', 'INVYR', 'STATECD',
                                                  any_of(c('p2eu_INVYR', 'nStrata_INVYR', 'P2POINTCNT_INVYR')))), by = 'STRATUM_CN') %>%
    distinct(EVALID, ESTN_UNIT_CN, STRATUM_CN, PLT_CN, .keep_all = TRUE) %>%
    ungroup() %>%
    mutate_if(is.factor,
              as.character) %>%
    rename(YEAR = END_INVYR)

  ## Dropping non-sampled plots for P3 variables
  if (!is.null(pltList)) {
    pops <- pops %>%
      filter(pops$PLT_CN %in% pltList)
  }

  ## P2POINTCNT column is NOT consistent for annual estimates, plots
  ## within individual strata and est units are related to different INVYRs

  ## Only run if this wasn't already done in clipFIA
  if (!any(c('p2eu', 'p2eu_INVYR', 'P2POINTCNT_INVYR') %in% names(pops))) {
    pops <- pops %>%
      ## Count of plots by Stratum/INVYR
      group_by(EVALID, ESTN_UNIT_CN, STRATUM_CN, INVYR) %>%
      mutate(P2POINTCNT_INVYR = n()) %>%
      ## By unit / INVYR
      group_by(EVALID, ESTN_UNIT_CN, INVYR) %>%
      mutate(p2eu_INVYR = n()) %>%
      ## By unit for entire cycle
      group_by(EVALID, ESTN_UNIT_CN) %>%
      mutate(p2eu = n()) %>%
      ungroup()

  }


  ## Recode a few of the estimation methods to make things easier below
  pops$ESTN_METHOD = recode(.x = pops$ESTN_METHOD,
                            `Post-Stratification` = 'strat',
                            `Stratified random sampling` = 'strat',
                            `Double sampling for stratification` = 'double',
                            `Simple random sampling` = 'simple',
                            `Subsampling units of unequal size` = 'simple')

  return(pops)

}

## Handle population tables
handlePops <- function(db, evalType, method, mr, pltList = NULL, ga = FALSE){

  ## Reset so we don't breka design info
  class(db) <- 'FIA.Database'

  ## Pull all design infor for the relevant inventories
  pops <- getDesignInfo(db,
                        type = evalType,
                        mostRecent = FALSE) # Will have been carried out already

  ## If a most-recent subset, make sure that we don't get two reporting years in
  ## western states
  if (mr) {
    pops <- pops %>%
      dplyr::group_by(STATECD) %>%
      dplyr::filter(YEAR == max(YEAR, na.rm = TRUE)) %>%
      dplyr::ungroup()
  }

  ## Dropping non-sampled plots for P3 variables
  if (!is.null(pltList)) {
    pops <- pops %>%
      dplyr::filter(pops$PLT_CN %in% pltList)
  }

  ## P2POINTCNT column is NOT consistent for annual estimates, plots
  ## within individual strata and est units are related to different INVYRs

  ## Only run if this wasn't already done in clipFIA
  if (!any(c('P2PNTCNT_EU_INVYR', 'P2POINTCNT_INVYR') %in% names(pops)) & method != 'TI') {
    pops <- pops %>%
      dplyr::left_join(select(db$PLOT, PLT_CN, INVYR), by = 'PLT_CN') %>%
      dtplyr::lazy_dt() %>%
      ## Count of plots by Stratum/INVYR
      dplyr::group_by(EVALID, ESTN_UNIT_CN, STRATUM_CN, INVYR) %>%
      dplyr::mutate(P2POINTCNT_INVYR = dplyr::n()) %>%
      dplyr::ungroup() %>%
      ## By unit / INVYR
      dplyr::group_by(EVALID, ESTN_UNIT_CN, INVYR) %>%
      dplyr::mutate(P2PNTCNT_EU_INVYR = dplyr::n()) %>%
      dplyr::ungroup() %>%
      as.data.frame() %>%
      dplyr::left_join(dplyr::select(db$POP_ESTN_UNIT, ESTN_UNIT_CN = CN, P1PNTCNT_EU), by = 'ESTN_UNIT_CN') %>%
      dplyr::left_join(dplyr::select(db$POP_STRATUM, STRATUM_CN = CN, P1POINTCNT), by = 'STRATUM_CN')
  }


  # ## Recode a few of the estimation methods to make things easier below
  pops <- pops %>%
    dplyr::mutate(ESTN_METHOD = dplyr::case_when(ESTN_METHOD %in% c("Post-Stratification", "Stratified random sampling") ~ 'strat',
                                                 ESTN_METHOD %in% c('Simple random sampling', 'Subsampling units of unequal size') ~ 'simple',
                                                 TRUE ~ 'double'))

  return(pops)

}



## When something other than temporally indifferent is used, we may need to merge small strata
## There is no great way to go about this, but very important we do it for variance issues.
## So, what we will do is:
## (1) Identify strata/INVYR pairs with less than 2 ground plots
## (2) For each of those pairs, identify their most similar neighbor based on fuzzy string matching of STRATUM Descriptions.
##     Neighbors (other strata) must be from the same estimation unit and measured in the same year (INVYR). If no
##     neighbors exist, i.e., the small stratum was the only one measured in a given INVYR
## Important -- we are effectively allowing for stratification to vary across panels within an inventory cycle.
##              Also, we are adjusting strata weights by INVYR within the cycle, i.e., the same stratum may be
##              weighted differently in different years within the same cycle
## When something other than temporally indifferent is used, we may need to merge small strata
## There is no great way to go about this, but very important we do it for variance issues.
## So, what we will do is:
## (1) Identify strata/INVYR pairs with less than 2 ground plots
## (2) For each of those pairs, identify their most similar neighbor based on fuzzy string matching of STRATUM Descriptions.
##     Neighbors (other strata) must be from the same estimation unit and measured in the same year (INVYR). If no
##     neighbors exist, i.e., the small stratum was the only one measured in a given INVYR
## Important -- we are effectively allowing for stratification to vary across panels within an inventory cycle.
##              Also, we are adjusting strata weights by INVYR within the cycle, i.e., the same stratum may be
##              weighted differently in different years within the same cycle

mergeSmallStrata_old <- function(db, pops) {

  ## Make a unique ID for stratum / year pairs
  pops$stratID <- paste(pops$STRATUM_CN, pops$INVYR, sep = '_')
  ## We'll allow strata weights to vary by INVYR w/ same strata because not all
  ## strata are sampled in a given year
  pops$P1POINTCNT_INVYR <- pops$P1POINTCNT

  if (any(c('nStrata', 'nStrata_INVYR') %in% names(pops))) {
    ## Stratum year pairs
    stratYr <- pops %>%
      left_join(select(db$POP_STRATUM, CN, STRATUM_DESCR), by = c('STRATUM_CN' = 'CN')) %>%
      distinct(STATECD, ESTN_UNIT_CN, STRATUM_CN, STRATUM_DESCR, INVYR,
               P2POINTCNT, P1POINTCNT, P2POINTCNT_INVYR, P1POINTCNT_INVYR, stratID,
               nStrata, nStrata_INVYR) %>%
      ## If buffer is present in the name, then the stratum has a different intensity
      ## than other strata in the same estimation unit (PNW only).
      ## Only combine buffer w/ buffer
      mutate(buff = str_detect(STRATUM_DESCR, 'buff') & STATECD %in% c(53, 41, 6)) %>%
      mutate(wrong = P2POINTCNT_INVYR < 2) %>%
      arrange(P2POINTCNT_INVYR)
  } else {
    ## Stratum year pairs
    stratYr <- pops %>%
      left_join(select(db$POP_STRATUM, CN, STRATUM_DESCR), by = c('STRATUM_CN' = 'CN')) %>%
      distinct(STATECD, ESTN_UNIT_CN, STRATUM_CN, STRATUM_DESCR, INVYR,
               P2POINTCNT, P1POINTCNT, P2POINTCNT_INVYR, P1POINTCNT_INVYR, stratID) %>%
      ## If buffer is present in the name, then the stratum has a different intensity
      ## than other strata in the same estimation unit (PNW only).
      ## Only combine buffer w/ buffer
      mutate(buff = str_detect(STRATUM_DESCR, 'buff') & STATECD %in% c(53, 41, 6)) %>%
      mutate(wrong = P2POINTCNT_INVYR < 2) %>%
      group_by(ESTN_UNIT_CN, INVYR) %>%
      mutate(nStrata_INVYR = length(unique(STRATUM_CN))) %>%
      group_by(ESTN_UNIT_CN) %>%
      mutate(nStrata = length(unique(STRATUM_CN))) %>%
      ungroup() %>%
      arrange(P2POINTCNT_INVYR)
  }


  ## Check if any fail
  warnMe <- c()

  ## If any are too small, i.e., only one plot --> do some merging
  if (sum(stratYr$wrong, na.rm = TRUE) > 0){

    for ( i in stratYr$stratID[stratYr$wrong == 1] ) {

      ## Subset the row
      dat <- filter(stratYr, stratID == i)


      ## Use fuzzy string matching if any other strata are available to merge with
      if (dat$nStrata_INVYR > 1) {
        ## Find its nearest neighbor of those in the same estimation
        ## unit and INVYR
        neighbors <- stratYr %>%
          filter(ESTN_UNIT_CN == dat$ESTN_UNIT_CN) %>%
          #filter(buff == dat$buff) %>%
          filter(INVYR == dat$INVYR) %>%
          filter(stratID != i)

        if (nrow(neighbors) < 1) {
          warnMe <- c(warnMe, TRUE)

        } else {
          warnMe <- c(warnMe, FALSE)

          ## Find the most similar neighbor in terms of stratum description
          msn <- adist(dat$STRATUM_DESCR, neighbors$STRATUM_DESCR)
          msnID <- neighbors$stratID[which.min(msn)]


          ## In pops, we want to update all rows of the giving and receiving strata
          ## Where giving gets a change in STRATUM_CN, P1POINTCNT, and P2POINTCNT

          ## Giving stratum ----------------------------------------------------
          pops[pops$stratID == i, 'STRATUM_CN'] <- unique(pops[pops$stratID == msnID, 'STRATUM_CN'])
          pops[pops$stratID == i, 'P1POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P1POINTCNT_INVYR']) + dat$P1POINTCNT_INVYR
          pops[pops$stratID == i, 'P2POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P2POINTCNT_INVYR']) + dat$P2POINTCNT_INVYR
          #pops[pops$stratID == i, 'stratID'] <- unique(pops[pops$stratID == msnID, 'stratID'])


          ## Receiving stratum -------------------------------------------------
          pops[pops$stratID == msnID, 'P1POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P1POINTCNT_INVYR']) + dat$P1POINTCNT_INVYR
          pops[pops$stratID == msnID, 'P2POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P2POINTCNT_INVYR']) + dat$P2POINTCNT_INVYR

        }




        ## If the small stratum is the only one available in the estimation unit in a given year,
        ## merge the INVYR within the same stratum
      } else {

        ## No other strata measured in the same year, so merge years instead
        neighbors <- stratYr %>%
          filter(STRATUM_CN == dat$STRATUM_CN) %>%
          filter(stratID != i)

        if (nrow(neighbors) > 0) {
          warnMe <- c(warnMe, FALSE)

          ## Find the most similar neighbor in terms of stratum description
          msn <- abs(neighbors$INVYR - (dat$INVYR + .01))
          msnID <- neighbors$stratID[which.min(msn)]

          ## Giving stratum ----------------------------------------------------
          pops[pops$stratID == i, 'P2POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P2POINTCNT_INVYR']) + dat$P2POINTCNT_INVYR
          pops[pops$stratID == i, 'INVYR'] <- unique(pops[pops$stratID == msnID, 'INVYR'])
          #pops[pops$stratID == i, 'stratID'] <- unique(pops[pops$stratID == msnID, 'stratID'])


          ## Receiving stratum -------------------------------------------------
          pops[pops$stratID == msnID, 'P2POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P2POINTCNT_INVYR']) + dat$P2POINTCNT_INVYR

        } else {
          warnMe <- c(warnMe, TRUE)
        }
      }
    }

    ## Keeps it from repeating above
    if (any(warnMe)) warning('Bad stratification, i.e., strata too small to compute variance of annual panels. If you are only interested in totals and/or ratio estimates, disregard this. However, if interested in variance (e.g., for confidence intervals) try using method = "TI".')


    ## Update adjustment factors in estimation unit
    pops <- pops %>%
      distinct(EVALID, ESTN_UNIT_CN, STRATUM_CN, INVYR, PLT_CN, .keep_all = TRUE) %>%
      ## Fix adjustment factors
      group_by(STRATUM_CN) %>%
      mutate(ADJ_FACTOR_MICR = mean(ADJ_FACTOR_MICR, na.rm = TRUE),
             ADJ_FACTOR_SUBP = mean(ADJ_FACTOR_SUBP, na.rm = TRUE),
             ADJ_FACTOR_MACR = mean(ADJ_FACTOR_MACR)) %>%
      ungroup()
  }


  ## Whether any strata are small are not, we will almost surely need to adjust
  ## strata weights for some INVYRs. Not all stratum are measured in each panel
  ## within a cycle, and when this happens, we need to weight the observed strata
  ## higher. If we don't, strata weights will not sum to 1 in some years and we
  ## will grossly underestimate means/totals in some cases
  ## Adjust stratum weights when not all strata are sampled in an INVYR
  stratYr <- pops %>%
    distinct(ESTN_UNIT_CN, STRATUM_CN, INVYR,
             P1POINTCNT_INVYR, P1PNTCNT_EU,
             P2POINTCNT_INVYR) %>%
    ## If multiple stratum were merged onto one, choose the maximum value (i.e.,
    ## the result of the last iteration)
    group_by(ESTN_UNIT_CN, STRATUM_CN, INVYR) %>%
    mutate(P1POINTCNT_INVYR = max(P1POINTCNT_INVYR),
           P2POINTCNT_INVYR = max(P2POINTCNT_INVYR)) %>%
    distinct() %>%
    mutate(stratWgt = P1POINTCNT_INVYR / P1PNTCNT_EU) %>%
    group_by(ESTN_UNIT_CN, INVYR) %>%
    mutate(propSampled = sum(stratWgt, na.rm = TRUE),
           stratWgt_INVYR = stratWgt / propSampled,
           P1POINTCNT_INVYR = P1PNTCNT_EU * stratWgt_INVYR,
           p2eu_INVYR = sum(P2POINTCNT_INVYR, na.rm = TRUE)) %>%
    ungroup() %>%
    select(STRATUM_CN, INVYR, P1POINTCNT_INVYR, P2POINTCNT_INVYR, p2eu_INVYR)

  pops <- pops %>%
    select(-c(P1POINTCNT_INVYR, stratID, P2POINTCNT_INVYR, p2eu_INVYR)) %>%
    left_join(stratYr, by = c('STRATUM_CN', 'INVYR')) %>%
    distinct(EVALID, ESTN_UNIT_CN, STRATUM_CN, PLT_CN, .keep_all = TRUE)


  return(pops)

}
mergeSmallStrata <- function(db, pops) {

  ## Make a unique ID for stratum / year pairs
  pops$stratID <- paste(pops$STRATUM_CN, pops$INVYR, sep = '_')
  ## We'll allow strata weights to vary by INVYR w/ same strata because not all
  ## strata are sampled in a given year
  pops$P1POINTCNT_INVYR <- pops$P1POINTCNT

  if (any(c('nStrata', 'nStrata_INVYR') %in% names(pops))) {
    ## Stratum year pairs
    stratYr <- pops %>%
      left_join(select(db$POP_STRATUM, CN, STRATUM_DESCR), by = c('STRATUM_CN' = 'CN')) %>%
      distinct(STATECD, ESTN_UNIT_CN, STRATUM_CN, STRATUM_DESCR, INVYR,
               P2POINTCNT, P1POINTCNT, P2POINTCNT_INVYR, P1POINTCNT_INVYR, stratID,
               nStrata, nStrata_INVYR) %>%
      ## If buffer is present in the name, then the stratum has a different intensity
      ## than other strata in the same estimation unit (PNW only).
      ## Only combine buffer w/ buffer
      mutate(buff = str_detect(STRATUM_DESCR, 'buff') & STATECD %in% c(53, 41, 6)) %>%
      mutate(wrong = P2POINTCNT_INVYR < 2) %>%
      arrange(P2POINTCNT_INVYR)
  } else {
    ## Stratum year pairs
    stratYr <- pops %>%
      left_join(select(db$POP_STRATUM, CN, STRATUM_DESCR), by = c('STRATUM_CN' = 'CN')) %>%
      distinct(STATECD, ESTN_UNIT_CN, STRATUM_CN, STRATUM_DESCR, INVYR,
               P2POINTCNT, P1POINTCNT, P2POINTCNT_INVYR, P1POINTCNT_INVYR, stratID) %>%
      ## If buffer is present in the name, then the stratum has a different intensity
      ## than other strata in the same estimation unit (PNW only).
      ## Only combine buffer w/ buffer
      mutate(buff = str_detect(STRATUM_DESCR, 'buff') & STATECD %in% c(53, 41, 6)) %>%
      mutate(wrong = P2POINTCNT_INVYR < 2) %>%
      group_by(ESTN_UNIT_CN, INVYR) %>%
      mutate(nStrata_INVYR = length(unique(STRATUM_CN))) %>%
      group_by(ESTN_UNIT_CN) %>%
      mutate(nStrata = length(unique(STRATUM_CN))) %>%
      ungroup() %>%
      arrange(P2POINTCNT_INVYR)
  }


  ## Check if any fail
  warnMe <- c()

  ## If any are too small, i.e., only one plot --> do some merging
  if (sum(stratYr$wrong, na.rm = TRUE) > 0){

    for ( i in stratYr$stratID[stratYr$wrong == 1] ) {

      ## Subset the row
      dat <- filter(stratYr, stratID == i)


      ## Use fuzzy string matching if any other strata are available to merge with
      if (dat$nStrata_INVYR > 1) {
        ## Find its nearest neighbor of those in the same estimation
        ## unit and INVYR
        neighbors <- stratYr %>%
          filter(ESTN_UNIT_CN == dat$ESTN_UNIT_CN) %>%
          #filter(buff == dat$buff) %>%
          filter(INVYR == dat$INVYR) %>%
          filter(stratID != i)

        if (nrow(neighbors) < 1) {
          warnMe <- c(warnMe, TRUE)

        } else {
          warnMe <- c(warnMe, FALSE)

          ## Find the most similar neighbor in terms of stratum description
          msn <- adist(dat$STRATUM_DESCR, neighbors$STRATUM_DESCR)
          msnID <- neighbors$stratID[which.min(msn)]


          ## In pops, we want to update all rows of the giving and receiving strata
          ## Where giving gets a change in STRATUM_CN, P1POINTCNT, and P2POINTCNT

          ## Giving stratum ----------------------------------------------------
          pops[pops$stratID == i, 'STRATUM_CN'] <- unique(pops[pops$stratID == msnID, 'STRATUM_CN'])
          pops[pops$stratID == i, 'P1POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P1POINTCNT_INVYR']) + dat$P1POINTCNT_INVYR
          pops[pops$stratID == i, 'P2POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P2POINTCNT_INVYR']) + dat$P2POINTCNT_INVYR
          #pops[pops$stratID == i, 'stratID'] <- unique(pops[pops$stratID == msnID, 'stratID'])


          ## Receiving stratum -------------------------------------------------
          pops[pops$stratID == msnID, 'P1POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P1POINTCNT_INVYR']) + dat$P1POINTCNT_INVYR
          pops[pops$stratID == msnID, 'P2POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P2POINTCNT_INVYR']) + dat$P2POINTCNT_INVYR

        }




        ## If the small stratum is the only one available in the estimation unit in a given year,
        ## merge the INVYR within the same stratum
      } else {

        ## No other strata measured in the same year, so merge years instead
        neighbors <- stratYr %>%
          filter(STRATUM_CN == dat$STRATUM_CN) %>%
          filter(stratID != i)

        if (nrow(neighbors) > 0) {
          warnMe <- c(warnMe, FALSE)

          ## Find the most similar neighbor in terms of stratum description
          msn <- abs(neighbors$INVYR - (dat$INVYR + .01))
          msnID <- neighbors$stratID[which.min(msn)]

          ## Giving stratum ----------------------------------------------------
          pops[pops$stratID == i, 'P2POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P2POINTCNT_INVYR']) + dat$P2POINTCNT_INVYR
          pops[pops$stratID == i, 'INVYR'] <- unique(pops[pops$stratID == msnID, 'INVYR'])
          #pops[pops$stratID == i, 'stratID'] <- unique(pops[pops$stratID == msnID, 'stratID'])


          ## Receiving stratum -------------------------------------------------
          pops[pops$stratID == msnID, 'P2POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P2POINTCNT_INVYR']) + dat$P2POINTCNT_INVYR

        } else {
          warnMe <- c(warnMe, TRUE)
        }
      }
    }

    ## Keeps it from repeating above
    if (any(warnMe)) warning('Bad stratification, i.e., strata too small to compute variance of annual panels. If you are only interested in totals and/or ratio estimates, disregard this. However, if interested in variance (e.g., for confidence intervals) try using method = "TI".')


    ## Update adjustment factors in estimation unit
    pops <- pops %>%
      distinct(EVALID, ESTN_UNIT_CN, STRATUM_CN, INVYR, PLT_CN, .keep_all = TRUE) %>%
      ## Fix adjustment factors
      group_by(STRATUM_CN) %>%
      mutate(ADJ_FACTOR_MICR = mean(ADJ_FACTOR_MICR, na.rm = TRUE),
             ADJ_FACTOR_SUBP = mean(ADJ_FACTOR_SUBP, na.rm = TRUE),
             ADJ_FACTOR_MACR = mean(ADJ_FACTOR_MACR)) %>%
      ungroup()
  }


  ## Whether any strata are small are not, we will almost surely need to adjust
  ## strata weights for some INVYRs. Not all stratum are measured in each panel
  ## within a cycle, and when this happens, we need to weight the observed strata
  ## higher. If we don't, strata weights will not sum to 1 in some years and we
  ## will grossly underestimate means/totals in some cases
  ## Adjust stratum weights when not all strata are sampled in an INVYR
  stratYr <- pops %>%
    distinct(ESTN_UNIT_CN, STRATUM_CN, INVYR,
             P1POINTCNT_INVYR, P1PNTCNT_EU,
             P2POINTCNT_INVYR) %>%
    ## If multiple stratum were merged onto one, choose the maximum value (i.e.,
    ## the result of the last iteration)
    group_by(ESTN_UNIT_CN, STRATUM_CN, INVYR) %>%
    mutate(P1POINTCNT_INVYR = max(P1POINTCNT_INVYR),
           P2POINTCNT_INVYR = max(P2POINTCNT_INVYR)) %>%
    distinct() %>%
    mutate(stratWgt = P1POINTCNT_INVYR / P1PNTCNT_EU) %>%
    group_by(ESTN_UNIT_CN, INVYR) %>%
    mutate(propSampled = sum(stratWgt, na.rm = TRUE),
           stratWgt_INVYR = stratWgt / propSampled,
           P1POINTCNT_INVYR = P1PNTCNT_EU * stratWgt_INVYR,
           P2PNTCNT_EU_INVYR = sum(P2POINTCNT_INVYR, na.rm = TRUE)) %>%
    ungroup() %>%
    select(STRATUM_CN, INVYR, P1POINTCNT_INVYR, P2POINTCNT_INVYR, P2PNTCNT_EU_INVYR)

  pops <- pops %>%
    select(-c(P1POINTCNT_INVYR, stratID, P2POINTCNT_INVYR, P2PNTCNT_EU_INVYR)) %>%
    left_join(stratYr, by = c('STRATUM_CN', 'INVYR')) %>%
    distinct(EVALID, ESTN_UNIT_CN, STRATUM_CN, PLT_CN, .keep_all = TRUE)


  return(pops)

}
# mergeSmallStrata <- function(db, pops, method) {
#
#
#   ## Make a unique ID for stratum / year pairs
#   pops$stratID <- paste(pops$STRATUM_CN, pops$INVYR, sep = '_')
#   ## We'll allow strata weights to vary by INVYR w/ same strata because not all
#   ## strata are sampled in a given year
#   pops$P1POINTCNT_INVYR <- pops$P1POINTCNT
#
#   if (any(c('nStrata', 'nStrata_INVYR') %in% names(pops))) {
#     ## Stratum year pairs
#     stratYr <- pops %>%
#       dplyr::left_join(dplyr::select(db$POP_STRATUM, CN, STRATUM_DESCR), by = c('STRATUM_CN' = 'CN')) %>%
#       dplyr::distinct(STATECD, ESTN_UNIT_CN, STRATUM_CN, STRATUM_DESCR, INVYR,
#                       P2POINTCNT, P1POINTCNT, P2POINTCNT_INVYR, P1POINTCNT_INVYR, stratID,
#                       nStrata, nStrata_INVYR) %>%
#       ## If buffer is present in the name, then the stratum has a different intensity
#       ## than other strata in the same estimation unit (PNW only).
#       ## Only combine buffer w/ buffer
#       dplyr::mutate(buff = stringr::str_detect(STRATUM_DESCR, 'buff') & STATECD %in% c(53, 41, 6)) %>%
#       dplyr::mutate(wrong = P2POINTCNT_INVYR < 2) %>%
#       dplyr::arrange(P2POINTCNT_INVYR)
#   } else {
#     ## Stratum year pairs
#     stratYr <- pops %>%
#       dplyr::left_join(dplyr::select(db$POP_STRATUM, CN, STRATUM_DESCR), by = c('STRATUM_CN' = 'CN')) %>%
#       dplyr::distinct(STATECD, ESTN_UNIT_CN, STRATUM_CN, STRATUM_DESCR, INVYR,
#                       P2POINTCNT, P1POINTCNT, P2POINTCNT_INVYR, P1POINTCNT_INVYR, stratID) %>%
#       ## If buffer is present in the name, then the stratum has a different intensity
#       ## than other strata in the same estimation unit (PNW only).
#       ## Only combine buffer w/ buffer
#       dplyr::mutate(buff = stringr::str_detect(STRATUM_DESCR, 'buff') & STATECD %in% c(53, 41, 6)) %>%
#       dplyr::mutate(wrong = P2POINTCNT_INVYR < 2) %>%
#       dplyr::group_by(ESTN_UNIT_CN, INVYR) %>%
#       dplyr::mutate(nStrata_INVYR = length(unique(STRATUM_CN))) %>%
#       dplyr::ungroup() %>%
#       dplyr::group_by(ESTN_UNIT_CN) %>%
#       dplyr::mutate(nStrata = length(unique(STRATUM_CN))) %>%
#       dplyr::ungroup() %>%
#       dplyr::arrange(P2POINTCNT_INVYR)
#   }
#
#
#   ## Check if any fail
#   warnMe <- c()
#
#   ## If any are too small, i.e., only one plot --> do some merging
#   if (sum(stratYr$wrong, na.rm = TRUE) > 0){
#
#     for ( i in stratYr$stratID[stratYr$wrong == 1] ) {
#
#       ## Subset the row
#       dat <- dplyr::filter(stratYr, stratID == i)
#
#
#       ## Use fuzzy string matching if any other strata are available to merge with
#       if (dat$nStrata_INVYR > 1) {
#         ## Find its nearest neighbor of those in the same estimation
#         ## unit and INVYR
#         neighbors <- stratYr %>%
#           dplyr::filter(ESTN_UNIT_CN == dat$ESTN_UNIT_CN) %>%
#           #filter(buff == dat$buff) %>%
#           dplyr::filter(INVYR == dat$INVYR) %>%
#           dplyr::filter(stratID != i)
#
#         if (nrow(neighbors) < 1) {
#           warnMe <- c(warnMe, TRUE)
#
#         } else {
#           warnMe <- c(warnMe, FALSE)
#
#           ## Find the most similar neighbor in terms of stratum description
#           msn <- adist(dat$STRATUM_DESCR, neighbors$STRATUM_DESCR)
#           msnID <- neighbors$stratID[which.min(msn)]
#
#
#           ## In pops, we want to update all rows of the giving and receiving strata
#           ## Where giving gets a change in STRATUM_CN, P1POINTCNT, and P2POINTCNT
#
#           ## Giving stratum ----------------------------------------------------
#           pops[pops$stratID == i, 'STRATUM_CN'] <- unique(pops[pops$stratID == msnID, 'STRATUM_CN'])
#           pops[pops$stratID == i, 'P1POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P1POINTCNT_INVYR']) + dat$P1POINTCNT_INVYR
#           pops[pops$stratID == i, 'P2POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P2POINTCNT_INVYR']) + dat$P2POINTCNT_INVYR
#           #pops[pops$stratID == i, 'stratID'] <- unique(pops[pops$stratID == msnID, 'stratID'])
#
#
#           ## Receiving stratum -------------------------------------------------
#           pops[pops$stratID == msnID, 'P1POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P1POINTCNT_INVYR']) + dat$P1POINTCNT_INVYR
#           pops[pops$stratID == msnID, 'P2POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P2POINTCNT_INVYR']) + dat$P2POINTCNT_INVYR
#
#         }
#
#
#
#
#         ## If the small stratum is the only one available in the estimation unit in a given year,
#         ## merge the INVYR within the same stratum
#       } else {
#
#         ## No other strata measured in the same year, so merge years instead
#         neighbors <- stratYr %>%
#           dplyr::filter(STRATUM_CN == dat$STRATUM_CN) %>%
#           dplyr::filter(stratID != i)
#
#         if (nrow(neighbors) > 0) {
#           warnMe <- c(warnMe, FALSE)
#
#           ## Find the most similar neighbor in terms of stratum description
#           msn <- abs(neighbors$INVYR - (dat$INVYR + .01))
#           msnID <- neighbors$stratID[which.min(msn)]
#
#           ## Giving stratum ----------------------------------------------------
#           pops[pops$stratID == i, 'P2POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P2POINTCNT_INVYR']) + dat$P2POINTCNT_INVYR
#           pops[pops$stratID == i, 'INVYR'] <- unique(pops[pops$stratID == msnID, 'INVYR'])
#           #pops[pops$stratID == i, 'stratID'] <- unique(pops[pops$stratID == msnID, 'stratID'])
#
#
#           ## Receiving stratum -------------------------------------------------
#           pops[pops$stratID == msnID, 'P2POINTCNT_INVYR'] <- unique(pops[pops$stratID == msnID, 'P2POINTCNT_INVYR']) + dat$P2POINTCNT_INVYR
#
#         } else {
#           warnMe <- c(warnMe, TRUE)
#         }
#       }
#     }
#
#     ## Keeps it from repeating above
#     if (any(warnMe)) warning('Bad stratification, i.e., strata too small to compute variance of annual panels. If you are only interested in totals and/or ratio estimates, disregard this. However, if interested in variance (e.g., for confidence intervals) try using method = "TI".')
#
#
#     ## Update adjustment factors in estimation unit
#     pops <- pops %>%
#       dplyr::distinct(EVALID, ESTN_UNIT_CN, STRATUM_CN, INVYR, PLT_CN, .keep_all = TRUE) %>%
#       ## Fix adjustment factors
#       dplyr::group_by(STRATUM_CN) %>%
#       dplyr::mutate(ADJ_FACTOR_MICR = mean(ADJ_FACTOR_MICR, na.rm = TRUE),
#                     ADJ_FACTOR_SUBP = mean(ADJ_FACTOR_SUBP, na.rm = TRUE),
#                     ADJ_FACTOR_MACR = mean(ADJ_FACTOR_MACR)) %>%
#       dplyr::ungroup()
#   }
#
#
#   ## Whether any strata are small are not, we will almost surely need to adjust
#   ## strata weights for some INVYRs. Not all stratum are measured in each panel
#   ## within a cycle, and when this happens, we need to weight the observed strata
#   ## higher. If we don't, strata weights will not sum to 1 in some years and we
#   ## will grossly underestimate means/totals in some cases
#   ## Adjust stratum weights when not all strata are sampled in an INVYR
#   stratYr <- pops %>%
#     dplyr::distinct(ESTN_UNIT_CN, STRATUM_CN, INVYR,
#                     P1POINTCNT_INVYR, P1PNTCNT_EU,
#                     P2POINTCNT_INVYR) %>%
#     ## If multiple stratum were merged onto one, choose the maximum value (i.e.,
#     ## the result of the last iteration)
#     dplyr::group_by(ESTN_UNIT_CN, STRATUM_CN, INVYR) %>%
#     dplyr::mutate(P1POINTCNT_INVYR = max(P1POINTCNT_INVYR),
#                   P2POINTCNT_INVYR = max(P2POINTCNT_INVYR)) %>%
#     dplyr::distinct() %>%
#     dplyr::mutate(stratWgt = P1POINTCNT_INVYR / P1PNTCNT_EU) %>%
#     dplyr::ungroup() %>%
#     dplyr::group_by(ESTN_UNIT_CN, INVYR) %>%
#     dplyr::mutate(propSampled = sum(stratWgt, na.rm = TRUE),
#                   stratWgt_INVYR = stratWgt / propSampled,
#                   P1POINTCNT_INVYR = P1PNTCNT_EU * stratWgt_INVYR,
#                   P2PNTCNT_EU_INVYR = sum(P2POINTCNT_INVYR, na.rm = TRUE)) %>%
#     dplyr::ungroup() %>%
#     dplyr::select(STRATUM_CN, INVYR, P1POINTCNT_INVYR, P2POINTCNT_INVYR, P2PNTCNT_EU_INVYR)
#
#   pops <- pops %>%
#     dplyr::select(-c(P1POINTCNT_INVYR, stratID, P2POINTCNT_INVYR, P2PNTCNT_EU_INVYR)) %>%
#     dplyr::left_join(stratYr, by = c('STRATUM_CN', 'INVYR')) %>%
#     dplyr::distinct(EVALID, ESTN_UNIT_CN, STRATUM_CN, PLT_CN, .keep_all = TRUE)
#
#
#   return(pops)
#
# }



## For annual estimator, we use the most recent stratification for all years.
## Otherwise we won't be able to compute the covariance between panels, because
## stratum boundaries and assignments differ from year to year. I don't see a
## better way at this point unfortunately.
annualStrataHelper <- function(db, pops) {


  ## Remove at end
  pops <- handlePops(db, evalType = c('EXPVOL'), method, mr)



  ## Add unique plot identifier
  pops <- dplyr::left_join(pops, dplyr::select(db$PLOT, PLT_CN, pltID, MEASYEAR), by = 'PLT_CN')


  ## Keep only design info from the most recent inventory,
  ## then join the full plot list back on
  pops <- pops %>%
    dplyr::group_by(STATECD, EVAL_TYP) %>%
    dplyr::filter(YEAR == max(YEAR, na.rm = TRUE)) %>%
    dplyr::ungroup() %>%
    dplyr::select(-c(PLT_CN, MEASYEAR)) %>%
    ## Add full plot list
    dplyr::left_join(
      dplyr::distinct(
        dplyr::select(pops, PLT_CN, pltID, MEASYEAR)
      ), by = 'pltID') %>%
    ## Add eu/ strat descriptions
    dplyr::left_join(dplyr::select(db$POP_STRATUM, CN, STRATUM_DESCR), by = c('STRATUM_CN' = 'CN')) %>%
    dplyr::left_join(dplyr::select(db$POP_ESTN_UNIT, CN, ESTN_UNIT_DESCR), by = c('ESTN_UNIT_CN' = 'CN')) %>%
    ## Override this for annual only
    dplyr::mutate(INVYR = MEASYEAR) %>%
    ## update annual stratum point counts to use MEASYEAR
    dplyr::group_by(INVYR, ESTN_UNIT_CN, STRATUM_CN) %>%
    dplyr::mutate(P2POINTCNT_INVYR = length(unique(PLT_CN))) %>%
    dplyr::ungroup()

  ## Drop any years where estimation units are not sampled
  keep.these <- pops %>%
    dplyr::group_by(INVYR) %>%
    dplyr::mutate(full.area = dplyr::case_when(all(unique(pops$ESTN_UNIT_CN) %in% ESTN_UNIT_CN) ~ 1,
                                               TRUE ~ 0)) %>%
    dplyr::filter(full.area == 1) %>%
    dplyr::ungroup() %>%
    dplyr::distinct(INVYR)
  pops <- dplyr::filter(pops, INVYR %in% keep.these$INVYR)


  ## A list of all estimation/unit strata by year
  stratYr <- pops %>%
    dplyr::distinct(STATECD, INVYR,
                    ESTN_UNIT_CN, AREA_USED, P2PNTCNT_EU, ESTN_UNIT_DESCR, P1PNTCNT_EU,
                    STRATUM_CN, STRATUM_DESCR, P1POINTCNT, P2POINTCNT, P2POINTCNT_INVYR) %>%
    ## update annual eu point counts to use MEASYEAR
    dplyr::group_by(INVYR, ESTN_UNIT_CN) %>%
    dplyr::mutate(P2PNTCNT_EU_INVYR = sum(P2POINTCNT_INVYR)) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(bad.eu = P2PNTCNT_EU_INVYR < 2) %>%
    dplyr::mutate(bad.strat = P2POINTCNT_INVYR < 2)


  ## Check if any fail
  warnMe <- c()

  ## If any estimation units are too small, i.e., only one plot --> do some merging
  if (sum(stratYr$bad.eu, na.rm = TRUE) > 0){

    for ( i in unique(stratYr$ESTN_UNIT_CN[stratYr$bad.eu == 1]) ) {

      ## Subset the row
      dat <- dplyr::filter(pops, ESTN_UNIT_CN == i) %>%
        dplyr::distinct(ESTN_UNIT_CN, ESTN_UNIT_DESCR, AREA_USED, P1PNTCNT_EU, P2PNTCNT_EU)



      ## Use fuzzy string matching to merge estimation units
      ## This is not ideal, but until FIA provides an objective
      ## means to determine sampling intensity within estimation
      ## units and/or stratum, this is the best we can do.
      neighbors <- pops %>%
        dplyr::filter(ESTN_UNIT_CN != dat$ESTN_UNIT_CN) %>%
        dplyr::distinct(ESTN_UNIT_CN, ESTN_UNIT_DESCR)

      if (nrow(neighbors) < 1) {
        warnMe <- c(warnMe, TRUE)

      } else {
        warnMe <- c(warnMe, FALSE)

        ## Find the most similar neighbor in terms of stratum description
        msn <- adist(dat$ESTN_UNIT_DESCR, neighbors$ESTN_UNIT_DESCR)
        msn <- dplyr::filter(pops, ESTN_UNIT_CN == neighbors$ESTN_UNIT_CN[which.min(msn)]) %>%
          dplyr::distinct(ESTN_UNIT_CN, P2PNTCNT_EU, AREA_USED, P1PNTCNT_EU)


        ## Recode the estimation units to carry out the merge
        pops <- pops %>%
          dplyr::mutate(AREA_USED = dplyr::case_when(
            ESTN_UNIT_CN == dat$ESTN_UNIT_CN ~ AREA_USED + msn$AREA_USED,
            ESTN_UNIT_CN == msn$ESTN_UNIT_CN ~ AREA_USED + dat$AREA_USED,
            TRUE ~ AREA_USED
          )) %>%
          dplyr::mutate(P2PNTCNT_EU = dplyr::case_when(
            ESTN_UNIT_CN == dat$ESTN_UNIT_CN ~ P2PNTCNT_EU + msn$P2PNTCNT_EU,
            ESTN_UNIT_CN == msn$ESTN_UNIT_CN ~ P2PNTCNT_EU + dat$P2PNTCNT_EU,
            TRUE ~ P2PNTCNT_EU
          )) %>%
          dplyr::mutate(P1PNTCNT_EU = dplyr::case_when(
            ESTN_UNIT_CN == dat$ESTN_UNIT_CN ~ P1PNTCNT_EU + msn$P1PNTCNT_EU,
            ESTN_UNIT_CN == msn$ESTN_UNIT_CN ~ P1PNTCNT_EU + dat$P1PNTCNT_EU,
            TRUE ~ P1PNTCNT_EU
          )) %>%
          dplyr::mutate(ESTN_UNIT_CN = dplyr::case_when(
            ESTN_UNIT_CN == dat$ESTN_UNIT_CN ~ msn$ESTN_UNIT_CN,
            TRUE ~ ESTN_UNIT_CN
          ))

      }
    } # Close for loop
  }



  ## If any strata are too small, i.e., only one plot --> do some merging
  if (sum(stratYr$bad.strat, na.rm = TRUE) > 0){

    for ( i in unique(stratYr$STRATUM_CN[stratYr$bad.strat == 1] )) {

      ## Previous update may fix future small strata, if so, leave it
      if (i %in% unique(pops$STRATUM_CN)) {

        ## Subset the row
        dat <- dplyr::filter(pops, STRATUM_CN == i) %>%
          dplyr::distinct(ESTN_UNIT_CN, STRATUM_CN, STRATUM_DESCR, P2POINTCNT, P1POINTCNT)



        ## Use fuzzy string matching to merge estimation units
        ## This is not ideal, but until FIA provides an objective
        ## means to determine sampling intensity within estimation
        ## units and/or stratum, this is the best we can do.
        neighbors <- pops %>%
          dplyr::distinct(ESTN_UNIT_CN, STRATUM_CN, STRATUM_DESCR, P2POINTCNT, P1POINTCNT) %>%
          dplyr::filter(ESTN_UNIT_CN == dat$ESTN_UNIT_CN) %>%
          dplyr::filter(STRATUM_CN != dat$STRATUM_CN)

        if (nrow(neighbors) < 1) {
          warnMe <- c(warnMe, TRUE)

        } else {
          warnMe <- c(warnMe, FALSE)

          ## Find the most similar neighbor in terms of stratum description
          msn <- adist(dat$STRATUM_DESCR, neighbors$STRATUM_DESCR)
          msn <- dplyr::filter(pops, STRATUM_CN == neighbors$STRATUM_CN[which.min(msn)]) %>%
            dplyr::distinct(ESTN_UNIT_CN, STRATUM_CN, STRATUM_DESCR, P2POINTCNT, P1POINTCNT)

          ## If we have a tie, random selection
          if (nrow(msn) > 1) {
            msn <- dplyr::sample_n(msn, size = 1)
          }


          ## Recode the estimation units to carry out the merge
          pops <- pops %>%
            dplyr::mutate(P2POINTCNT = dplyr::case_when(
              STRATUM_CN == dat$STRATUM_CN ~ P2POINTCNT + msn$P2POINTCNT,
              STRATUM_CN == msn$STRATUM_CN ~ P2POINTCNT + dat$P2POINTCNT,
              TRUE ~ P2POINTCNT
            )) %>%
            dplyr::mutate(P1POINTCNT = dplyr::case_when(
              STRATUM_CN == dat$STRATUM_CN ~ P1POINTCNT + msn$P1POINTCNT,
              STRATUM_CN == msn$STRATUM_CN ~ P1POINTCNT + dat$P1POINTCNT,
              TRUE ~ P1POINTCNT
            )) %>%
            dplyr::mutate(STRATUM_CN = dplyr::case_when(
              STRATUM_CN == dat$STRATUM_CN ~ msn$STRATUM_CN,
              TRUE ~ STRATUM_CN
            ))

        }
      }

    } # Close for loop
  }



  ## Keeps it from repeating above
  if (any(warnMe)) warning('Bad stratification, i.e., strata too small to compute variance of annual panels. If you are only interested in totals and/or ratio estimates, disregard this. However, if interested in variance (e.g., for confidence intervals) try using method = "TI".')


  ## Update adjustment factors in estimation unit
  pops <- pops %>%
    dplyr::distinct(EVALID, ESTN_UNIT_CN, STRATUM_CN, INVYR, PLT_CN, .keep_all = TRUE) %>%
    ## Fix adjustment factors
    dplyr::group_by(STRATUM_CN) %>%
    dplyr::mutate(ADJ_FACTOR_MICR = mean(ADJ_FACTOR_MICR, na.rm = TRUE),
                  ADJ_FACTOR_SUBP = mean(ADJ_FACTOR_SUBP, na.rm = TRUE),
                  ADJ_FACTOR_MACR = mean(ADJ_FACTOR_MACR)) %>%
    dplyr::ungroup() %>%
    ## Update stratum annual point counts
    dplyr::group_by(INVYR, ESTN_UNIT_CN, STRATUM_CN) %>%
    dplyr::mutate(P2POINTCNT_INVYR = length(unique(PLT_CN))) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(P1POINTCNT_INVYR = P1POINTCNT)


  ## Update eu annual point counts
  euP2 <- pops %>%
    dplyr::distinct(STATECD, INVYR,
                    ESTN_UNIT_CN, AREA_USED, P2PNTCNT_EU, ESTN_UNIT_DESCR, P1PNTCNT_EU,
                    STRATUM_CN, STRATUM_DESCR, P1POINTCNT, P2POINTCNT, P2POINTCNT_INVYR) %>%
    ## update annual eu point counts to use MEASYEAR
    dplyr::group_by(INVYR, ESTN_UNIT_CN) %>%
    dplyr::summarize(P2PNTCNT_EU_INVYR = sum(P2POINTCNT_INVYR)) %>%
    dplyr::ungroup()
  pops <- pops %>%
    dplyr::select(-c(P2PNTCNT_EU_INVYR)) %>%
    left_join(euP2, by = c('INVYR', 'ESTN_UNIT_CN'))


  return(pops)
}




## Moving average weights
maWeights <- function(pops, method, lambda){

  ### ---- SIMPLE MOVING AVERAGE
  if (stringr::str_to_upper(method) == 'SMA'){
    ## Assuming a uniform weighting scheme
    wgts <- pops %>%
      dplyr::group_by(YEAR, STATECD) %>%
      dplyr::summarize(wgt = 1 / length(unique(INVYR)))

    #### ----- Linear MOVING AVERAGE
  } else if (stringr::str_to_upper(method) == 'LMA'){

    wgts <- pops %>%
      dplyr::distinct(YEAR, STATECD, INVYR, .keep_all = TRUE) %>%
      dplyr::arrange(YEAR, STATECD, INVYR) %>%
      dplyr::group_by(as.factor(YEAR), as.factor(STATECD)) %>%
      dplyr::mutate(rank = dplyr::min_rank(INVYR),
                    nsum = sum(1:dplyr::n())) %>%
      dplyr::ungroup() %>%
      dplyr::mutate(wgt = rank / nsum) %>%
      dplyr::ungroup() %>%
      dplyr::select(YEAR, STATECD, INVYR, wgt)


    #### ----- EXPONENTIAL MOVING AVERAGE
  } else if (stringr::str_to_upper(method) == 'EMA'){
    wgts <- pops %>%
      dplyr::distinct(YEAR, STATECD, INVYR, .keep_all = TRUE) %>%
      dplyr::arrange(YEAR, STATECD, INVYR) %>%
      dplyr::group_by(as.factor(YEAR), as.factor(STATECD)) %>%
      dplyr::mutate(rank = dplyr::min_rank(INVYR))


    if (length(lambda) < 2){
      ## Want sum of weighitng functions
      neu <- wgts %>%
        dplyr::mutate(l = lambda) %>%
        dplyr::group_by(YEAR, STATECD) %>%
        dplyr::summarize(l = 1 - dplyr::first(lambda),
                         sumwgt = sum(l*(1-l)^(1-rank), na.rm = TRUE))

      ## Rejoining and computing wgts
      wgts <- wgts %>%
        dplyr::left_join(neu, by = c('YEAR', 'STATECD')) %>%
        dplyr::mutate(wgt = l*(1-l)^(1-rank) / sumwgt) %>%
        dplyr::ungroup() %>%
        dplyr::select(YEAR, STATECD, INVYR, wgt)
    } else {
      ## Duplicate weights for each level of lambda
      yrWgts <- list()
      for (i in 1:length(unique(lambda))) {
        yrWgts[[i]] <- dplyr::mutate(wgts, lambda = lambda[i])
      }
      wgts <- dplyr::bind_rows(yrWgts)
      ## Want sum of weighitng functions
      neu <- wgts %>%
        dplyr::group_by(lambda, YEAR, STATECD) %>%
        dplyr::summarize(l = 1 - dplyr::first(lambda),
                         sumwgt = sum(l*(1-l)^(1-rank), na.rm = TRUE))

      ## Rejoining and computing wgts
      wgts <- wgts %>%
        dplyr::left_join(neu, by = c('lambda', 'YEAR', 'STATECD')) %>%
        dplyr::mutate(wgt = l*(1-l)^(1-rank) / sumwgt) %>%
        dplyr::ungroup() %>%
        dplyr::select(lambda, YEAR, STATECD, INVYR, wgt)
    }
  }

  return(wgts)
}


## Combine most-recent population estimates across states with potentially
## different reporting schedules, i.e., if 2016 is most recent in MI and 2017 is
## most recent in WI, combine them and label as 2017
combineMR <- function(x, grpBy){

  ## Used to let max year vary by group -- causes issues for ratio estimates though
  ## so scrapping it and taking the most recent independent of group
  # suppressMessages({suppressWarnings({
  #
  #   maxyears <- x %>%
  #     select(all_of(grpBy)) %>%
  #     group_by(.dots = grpBy[!c(grpBy %in% 'YEAR')]) %>%
  #     summarise(YEAR = max(YEAR, na.rm = TRUE))
  #
  #   out <- x %>%
  #     ungroup() %>%
  #     select(-c(YEAR)) %>%
  #     left_join(maxyears, by = grpBy[!c(grpBy %in% 'YEAR')])
  #
  # })})

  out <- x %>%
    dplyr::ungroup() %>%
    dplyr::mutate(YEAR = max(YEAR, na.rm = TRUE))

  return(out)
}

## Make implicit NA explicity for spatial summaries
prettyNamesSF <- function (tOut, polys, byPlot, grpBy, grpByOrig, tNames, returnSpatial) {

  # Return a spatial object
  if (!is.null(polys) & byPlot == FALSE) {
    ## NO IMPLICIT NA
    nospGrp <- unique(grpBy[grpBy %in% c('SPCD', 'SYMBOL', 'COMMON_NAME', 'SCIENTIFIC_NAME') == FALSE])
    nospSym <- dplyr::syms(nospGrp)
    tOut <- tidyr::complete(tOut, !!!nospSym)
    ## If species, we don't want unique combos of variables related to same species
    ## but we do want NAs in polys where species are present
    if (length(nospGrp) < length(grpBy)){
      spGrp <- unique(grpBy[grpBy %in% c('SPCD', 'SYMBOL', 'COMMON_NAME', 'SCIENTIFIC_NAME')])
      spSym <- dplyr::syms(spGrp)
      tOut <- tidyr::complete(tOut, tidyr::nesting(!!!nospSym))
    }

    suppressMessages({suppressWarnings({
      tOut <- dplyr::left_join(tOut, polys) %>%
        dplyr::select(c('YEAR', grpByOrig, tNames, names(polys))) %>%
        dplyr::filter(!is.na(polyID))})})

    ## Makes it horrible to work with as a dataframe
    if (returnSpatial == FALSE) tOut <- dplyr::select(tOut, -c(geometry))
  } else if (!is.null(polys) & byPlot){
    polys <- as.data.frame(polys)
    tOut <- dplyr::left_join(tOut, dplyr::select(polys, -c(geometry)), by = 'polyID')
  }

  return(tOut)
}

## Choose annual panels to return
filterAnnual <- function(x, grpBy, pltsVar, ESTN_UNIT) {

  ## Have to handle statecd carefully in grp by
  if ('STATECD' %in% grpBy) {
    grpBy <- grpBy[!c(grpBy %in% 'STATECD')]
    x <- x %>%
      dplyr::ungroup() %>%
      dplyr::select(-c(STATECD))
  }
  pltquo <- rlang::enquo(pltsVar)
  x <- x %>%
    dplyr::left_join(dplyr::distinct(dplyr::select(ESTN_UNIT, CN, STATECD)), by = c('ESTN_UNIT_CN' = 'CN')) %>%
    dplyr::mutate(nplts = !!pltquo) %>%
    dplyr::group_by(STATECD, INVYR, .dots = grpBy[!c(grpBy %in% 'STATECD')]) %>%
    dplyr::summarize(dplyr::across(.cols = dplyr::everything(), sum, na.rm = TRUE)) %>%
    ## Keep these
    dplyr::group_by(STATECD, INVYR, .dots = grpBy[!c(grpBy %in% 'STATECD')]) %>%
    dplyr::mutate(keep = ifelse(INVYR %in% YEAR,
                                ifelse(YEAR == INVYR, 1, 0), ## When TRUE
                                ifelse(nplts == max(nplts, na.rm = TRUE), 1, 0))) %>% ## When INVYR not in YEAR, keep estimates from the inventory where panel has the most plots
    dplyr::ungroup() %>%
    dplyr::filter(keep == 1) %>%
    ## If there are multiple reporting years where a panel has the same number of plots
    ## then the estimate will be way too big, we fix this by taking the first row from each output group
    ## If the above worked it will have no effect. If the above failed, it will save our ass.
    dplyr::mutate(YEAR = INVYR) %>%
    dplyr::group_by(STATECD, .dots = grpBy[!c(grpBy %in% 'STATECD')]) %>%
    dplyr::summarize(dplyr::across(.cols = dplyr::everything(), dplyr::first)) %>%
    dplyr::ungroup()

  return(x)
}




## Estimate skewness in a distribution of values
skewness <- function(x, na.rm = TRUE){

  ## Cut any NA
  if (na.rm) x <- x[!is.na(x)]

  ## Sample size
  n <- length(x)

  ## Estimate the skewness
  skew <- (sum((x-mean(x))^3)/n)/(sum((x-mean(x))^2)/n)^(3/2)

  return(skew)
}

projectPnts <- function(x, y, slope = NULL, yint = NULL){
  if (is.null(slope)){
    P = data.frame(xOrig = x, yOrig = y)
    P$x <- (P$yOrig+P$xOrig) / 2
    P$y <- P$x
  } else {
    P = data.frame(x, y)
    P$m <- slope
    P$n <- yint
    ## Perp Points
    P$x1 = P$x + -slope
    P$y1 = P$y + 1
    ## Perp Line
    P$m1 = (P$y1-P$y)/(P$x1-P$x)
    P$n1 = P$y - P$m1*P$x
    ## Line intersection
    P$x=(P$n1-P$n)/(P$m-P$m1)
    P$y=P$m*P$x+P$n
  }
  return(P)
}

projectPoints <- function(x, y, slope = 1, yint = 0, returnPoint = TRUE){
  ## Solve for 1:1 line by default

  ## So where does y = mx and y = -1/m * x + b converge
  perp_slope <-  - 1 / slope
  ## Solve for c given x and y
  perp_int <- -perp_slope*x + y

  ## Set equations equal to each other on y
  ## -1/m*x + b = mx
  xproj <- (perp_int - yint) / (slope + -perp_slope)
  yproj <- slope * xproj + yint

  if (returnPoint){
    out <- data.frame(x = xproj, y = yproj)
  } else {
    out <- sqrt((xproj^2) + (yproj^2))
    out <- dplyr::if_else(xproj < 0, -out, out)
  }
  return(out)
}

matchColClasses <- function(df1, df2) {
  df1 <- as.data.frame(df1)
  df2 <- as.data.frame(df2)

  sharedColNames <- names(df1)[names(df1) %in% names(df2)]
  sharedColTypes <- sapply(df1[,sharedColNames], class)

  for (n in 1:length(sharedColNames)) {
    class(df2[, sharedColNames[n]]) <- sharedColTypes[n]
  }

  return(df2)
}

################ PREVIOUS FUNCTIONS ######################
#### SHANNON'S EVENESS INDEX (H)
#
# speciesObs: vector of observations (species or unique ID)
#
# Returns evenness score (numeric)
####
divIndex <- function(SPCD, TPA, index) {
  # Shannon's Index
  if(index == 'H'){
    species <- unique(SPCD[TPA > 0 & !is.na(TPA)])
    total <- sum(TPA, na.rm = TRUE)

    p <- c() # Empty vector to hold proportions
    for (i in 1:length(species)){
      p[i] <- sum(TPA[SPCD == species[i]], na.rm = TRUE) / total
    }
    value <- -sum(p*log(p), na.rm = TRUE)
  }
  if(index == 'Eh'){
    species <- unique(SPCD[TPA > 0 & !is.na(TPA)])
    total <- sum(TPA, na.rm = TRUE)
    p <- c() # Empty vector to hold proportions
    for (i in 1:length(species)){
      p[i] <- sum(TPA[SPCD == species[i]], na.rm = TRUE) / total
    }
    S <- length(unique(SPCD[TPA > 0 & !is.na(TPA)]))
    if(S == 0) S <- NA
    value <- -sum(p*log(p), na.rm = TRUE) / S
  }
  # Richness
  if(index == 'S'){
    value = length(unique(SPCD[TPA > 0 & !is.na(TPA)])) ## Assumes equal probabilty of detection, not true because of nested sampling design
  }
  # Simpsons Index
  if(index == 'D'){

    # species <- unique(SPCD[TPA > 0])
    # total <- sum(TPA, na.rm = TRUE)
    # p <- c() # Empty vector to hold proportions
    # for (i in 1:length(species)){
    #   p[i] <- sum(TPA[SPCD == species[i]], na.rm = TRUE) / total
    # }
    # value <- 1 /
    #
    #
    # total <- sum(TPA, na.rm = TRUE)
    # props <- data.frame(SPCD, TPA) %>%
    #   filter(TPA > 0) %>%
    #   group_by(SPCD) %>%
    #   summarize(tpa = sum(TPA, na.rm = TRUE)) %>%
    #   mutate(prop = tpa / total) %>%
    #   summarize(D = 1 / sum(prop^2, na.rm = TRUE))
    # props$D[is.infinite(props$D)] <- 0
    # value <- props$D
    # #value <- 1 - (sum(p*(p-1), na.rm = TRUE) / (total * (total-1)))
  }
  # Berger–Parker dominance
  if(index == 'BP'){
    species <- unique(SPCD[TPA > 0])
    total <- sum(TPA, na.rm = TRUE)
    p <- c() # Empty vector to hold proportions
    for (i in 1:length(species)){
      p[i] <- sum(TPA[SPCD == species[i]], na.rm = TRUE) / total
    }
    value <- max(p)
  }
  return(value)
}

areal_par <- function(x, pltSF, polys){
  pltSF <- sf::st_intersection(pltSF, polys[[x]]) %>%
    as.data.frame() %>%
    dplyr::select(-c('geometry')) # removes artifact of SF object
}

## Exponenetially weighted moving average
ema <- function(x, yrs, var = FALSE){
  l <- 2 / (1 + dplyr::first(yrs))
  wgts <- c()
  for (i in 1:length(x)) wgts[i] <- l^(i-1)*(1-l)

  if (var){
    #out <- sum(wgts^2 * x,na.rm = TRUE)
    out <- wgts^2 * x
  } else {
    #out <- sum(wgts * x,na.rm = TRUE)
    out <- wgts * x
  }

  return(out)
}


#' @export
makeClasses <- function(x, interval = NULL, lower = NULL, upper = NULL, brks = NULL, numLabs = FALSE){
  if(is.null(brks)){
    ## If min & max isn't specified, then use the data to compute
    low <- ifelse(is.null(lower), min(x, na.rm = TRUE), lower)
    up <- ifelse(is.null(upper), max(x, na.rm = TRUE), upper)
    # Compute class intervals
    brks = c(low)
    while (as.numeric(tail(brks,1)) < as.numeric(up)){
      brks <- c(brks, tail(brks,1) + interval)
    }
  } else {
    low = lower
  }

  # Apply classes to data
  classes <- cut(x, breaks = brks, include.lowest = TRUE, right = FALSE)

  # Convert to numeric (lowest value of interval)
  if (numLabs){
    classes <- as.numeric(classes) * interval -interval + low
  }

  return(classes)
}

#### Basal Area Function (returns sq units of diameter)
basalArea <- function(diameter, DIA_MID = NULL){
  #ba = ((diameter/2)^2) * pi
  # if (!is.null(DIA_MID)){
  #   diameter[is.null(diameter)] <- DIA_MID[is.null(diameter)]
  # }
  # ba = diameter^2 * .005454 # SQ FT, consistency with FIA EVALIDator

  # ba <- case_when(
  #   is.na(diameter) ~ NA_real_,
  #   ## Growth accounting only
  #   diameter < 0 ~ -(diameter^2 * .005454),
  #   TRUE ~ diameter^2 * .005454)

  #negative <- data.table::fifelse(diameter < 0, -1, 1)
  ## This allows us to retain negative values for change functions, and is faster
  ## than using an ifelse
  ba <- diameter * abs(diameter) * .005454


  return(ba)
}

### MODE FUNCTIO

##### Classification of Stand Structural Stage ######
##
## Classifies stand structural stage as pole, mature, late-successional, or mosaic
##  based on relative basal area of live canopy trees within pole, mature & large classes
##
##  diameter: stem DBH (inches) (DIA)
##  crownClass: canopy position of stem, suppressed and open grown excluded (CCLCD)
structHelper <- function(dia, crownClass){

  # Exclude suppressed and open grown stems from analysis
  dia = dia[crownClass %in% c(2,3,4)]

  # Total basal area within plot
  totalBA = sum(basalArea(dia[dia >= 5]), na.rm = TRUE)

  # Calculate proportion of stems in each size class by basal area
  pole = sum(basalArea(dia[dia >= 5 & dia < 10.23622]), na.rm = TRUE) / totalBA
  mature = sum(basalArea(dia[dia >= 10.23622 & dia < 18.11024]), na.rm = TRUE) / totalBA
  large = sum(basalArea(dia[dia >=18.11024]), na.rm = TRUE) / totalBA

  # Series of conditionals to identify stand structural stage based on basal
  #   area proportions in each size class
  if(is.nan(pole) | is.nan(mature) | is.nan(large)){
    stage = 'mosaic'
  } else if ( ((pole + mature) > .67) &  (pole > mature)){
    stage = 'pole'
  } else if(((pole + mature) > .67) &  (pole < mature)){
    stage = 'mature'
  } else if(((mature + large) > .67) & (mature > large)){
    stage = 'mature'
  } else if(((mature + large) > .67) & (mature < large)){
    stage = 'late'
  } else{
    stage = 'mosaic'
  }

  return(as.factor(stage))
}


# Prop basis helper
adjHelper <- function(DIA, MACRO_BREAKPOINT_DIA, ADJ_FACTOR_MICR, ADJ_FACTOR_SUBP, ADJ_FACTOR_MACR){
  # IF it doesnt exist make it massive
  MACRO_BREAKPOINT_DIA[is.na(MACRO_BREAKPOINT_DIA)] <- 10000
  # Replace DIA with adjustment factors
  adj <- DIA
  adj[is.na(DIA)] <- ADJ_FACTOR_SUBP[is.na(DIA)]
  adj[DIA < 5 & !is.na(DIA)] <- ADJ_FACTOR_MICR[DIA < 5 & !is.na(DIA)]
  adj[DIA >= 5 & DIA < MACRO_BREAKPOINT_DIA & !is.na(DIA)] <- ADJ_FACTOR_SUBP[DIA >= 5 & DIA < MACRO_BREAKPOINT_DIA & !is.na(DIA)]
  adj[DIA >= MACRO_BREAKPOINT_DIA & !is.na(DIA)] <- ADJ_FACTOR_MACR[DIA >= MACRO_BREAKPOINT_DIA & !is.na(DIA)]

  return(adj)

}

# GRM adjustment helper
grmAdj <- function(subtyp, adjMicr, adjSubp, adjMacr) {

  data <- data.frame(typ = as.numeric(subtyp), micr = as.numeric(adjMicr), subp =as.numeric(adjSubp), macr =as.numeric(adjMacr))

  data <- data %>%
    dplyr::mutate(adj = dplyr::case_when(
      typ == 0 ~ 0,
      typ == 1 ~ subp,
      typ == 2 ~ micr,
      typ == 3 ~ macr,
    ))

  return(data$adj)
}

# #stratVar <- function(x, a, p2, method, y = NULL){
#   p2 <- first(p2)
#   method <- first(method)
#   a <- first(a)
#   ## Variance Estimation
#   if (is.null(y)){
#     if (method == 'simple'){
#       out <- var(x * a / p2)
#     } else {
#       out <- (sum(x^2) - sum(p2 *  mean(x, na.rm = TRUE)^2)) / (p2 * (p2-1))
#     }
#     ## Covariance Estimation
#   } else {
#     if (method == 'simple'){
#       out <- cov(x,y)
#     } else {
#       out <- (sum(x*y) - sum(p2 * mean(x, na.rm = TRUE) * mean(y, na.rm = TRUE))) / (p2 * (p2-1))
#     }
#   }
#
# }
# stratVar <- function(ESTN_METHOD, x, xStrat, ndif, a, nh, y = NULL, yStrat = NULL){
#   ## Variance
#   if (is.null(y)){
#     v <- ifelse(first(ESTN_METHOD == 'simple'),
#                 var(c(x, numeric(ndif)) * first(a) / nh),
#                 (sum((c(x, numeric(ndif))^2), na.rm = TRUE) - nh * xStrat^2) / (nh * (nh-1)))
#     ## Covariance
#   } else {
#     v <- ifelse(first(ESTN_METHOD == 'simple'),
#                 cov(x,y),
#                 (sum(x*y, na.rm = TRUE) - sum(nh * xStrat *yStrat, na.rm = TRUE)) / (nh * (nh-1)))
#   }
# }

# Helper function to compute variance for estimation units (manages different estimation methods)
unitVarDT <- function(method, ESTN_METHOD, a, nh, w, v, stratMean, stratMean1 = NULL){
  unitM <- unitMean(ESTN_METHOD, a, nh, w, stratMean)
  unitM1 <- unitMean(ESTN_METHOD, a, nh, w, stratMean1)
  if(method == 'var'){
    uv = ifelse(first(ESTN_METHOD) == 'strat',
                ((first(a)^2)/sum(nh)) * (sum(w*nh*v) + sum((1-w)*(nh/sum(nh))*v)),
                ifelse(first(ESTN_METHOD) == 'double',
                       (first(a)^2) * (sum(((nh-1)/(sum(nh)-1))*(nh/sum(nh))*v) + ((1/(sum(nh)-1))*sum((nh/sum(nh))*(stratMean - (unitM/first(a)))^2))),
                       sum(v))) # Stratified random case
  } else { # Compute covariance
    cv = ifelse(first(ESTN_METHOD) == 'strat',
                ((first(a)^2)/sum(nh)) * (sum(w*nh*v) + sum((1-w)*(nh/sum(nh))*v)),
                ifelse(first(ESTN_METHOD) == 'double',
                       (first(a)^2) * (sum(((nh-1)/(sum(nh)-1))*(nh/sum(nh))*v) + ((1/(sum(nh)-1))*sum((nh/sum(nh))*(stratMean - unitM) * (stratMean1 - (unitM1/first(a)))))),
                       sum(v))) # Stratified random case (additive covariance)
  }
}

unitVar <- function(method, ESTN_METHOD, a, nh, w, v, stratMean, unitM, stratMean1 = NULL, unitM1 = NULL){
  if(method == 'var'){
    uv = ifelse(dplyr::first(ESTN_METHOD) == 'strat',
                ((dplyr::first(a)^2)/sum(nh)) * (sum(w*nh*v) + sum((1-w)*(nh/sum(nh))*v)),
                ifelse(dplyr::first(ESTN_METHOD) == 'double',
                       (dplyr::first(a)^2) * (sum(((nh-1)/(sum(nh)-1))*(nh/sum(nh))*v) + ((1/(sum(nh)-1))*sum((nh/sum(nh))*(stratMean - (unitM/dplyr::first(a)))^2))),
                       sum(v))) # Stratified random case
  } else {
    cv = ifelse(dplyr::first(ESTN_METHOD) == 'strat',
                ((dplyr::first(a)^2)/sum(nh)) * (sum(w*nh*v) + sum((1-w)*(nh/sum(nh))*v)),
                ifelse(dplyr::first(ESTN_METHOD) == 'double',
                       (dplyr::first(a)^2) * (sum(((nh-1)/(sum(nh)-1))*(nh/sum(nh))*v) + ((1/(sum(nh)-1))*sum((nh/sum(nh))*(stratMean - unitM) * (stratMean1 - (unitM1/dplyr::first(a)))))),
                       sum(v))) # Stratified random case (additive covariance)
  }
}

unitVarNew <- function(method, ESTN_METHOD, a, nh, n, w, v, stratMean, unitM, stratMean1 = NULL, unitM1 = NULL){
  if(method == 'var'){
    uv = ifelse(dplyr::first(ESTN_METHOD) == 'strat',
                ((dplyr::first(a)^2)/n) * (sum(w*nh*v, na.rm = TRUE) + sum((1-w)*(nh/n)*v, na.rm = TRUE)),
                ifelse(dplyr::first(ESTN_METHOD) == 'double',
                       (dplyr::first(a)^2) * (sum(((nh-1)/(n-1))*(nh/n)*v, na.rm = TRUE) + ((1/(n-1))*sum((nh/n)*(stratMean - (unitM/dplyr::first(a)))^2, na.rm = TRUE))),
                       sum(v, na.rm = TRUE))) # Stratified random case
  } else {
    cv = ifelse(dplyr::first(ESTN_METHOD) == 'strat',
                ((dplyr::first(a)^2)/n) * (sum(w*nh*v, na.rm = TRUE) + sum((1-w)*(nh/n)*v, na.rm = TRUE)),
                ifelse(dplyr::first(ESTN_METHOD) == 'double',
                       (dplyr::first(a)^2) * (sum(((nh-1)/(n-1))*(nh/n)*v, na.rm = TRUE) + ((1/(n-1))*sum((nh/n)*(stratMean - unitM) * (stratMean1 - (unitM1/dplyr::first(a))), na.rm = TRUE))),
                       sum(v))) # Stratified random case (additive covariance)
  }
}

## Compute ratio variances at the estimation unit level
rVar <- function(x, y, xVar, yVar, xyCov){
  ## Ratio
  r <- y / x
  ## Ratio variance
  rv <- (1 / x^2) * (yVar + (r^2 * xVar) - (2 * r * xyCov))

  return(rv)
}

# Helper function to compute variance for estimation units (manages different estimation methods)
unitMean <- function(ESTN_METHOD, a, nh, w, stratMean){
  um = ifelse(dplyr::first(ESTN_METHOD) == 'strat',
              sum(stratMean * w, na.rm = TRUE) * dplyr::first(a),
              ifelse(dplyr::first(ESTN_METHOD) == 'double',
                     sum(stratMean * (nh / sum(nh)), na.rm = TRUE) * dplyr::first(a),
                     mean(stratMean, na.rm = TRUE) * dplyr::first(a))) # Simple random case
}

## Calculate change for VR
## Calculate change for VR
# vrNAHelper <- function(attribute2, attribute1){
#   ## IF one time is NA, then both must be NA
#   vals <- case_when(
#     is.na(attribute)
#   )
# }


## Replace current attributes with midpoint attributes depending on component
vrAttHelper <- function(attribute, attribute.prev, attribute.mid, attribute.beg, component, remper, oneortwo){

  ## ONLY WORKS FOR ATTRIBUTES DEFINED IN TRE_MIDPNT and TRE_BEGIN
  at <- dplyr::case_when(
    oneortwo == 2 ~ dplyr::case_when(
      stringr::str_detect(component, c('SURVIVOR|INGROWTH|REVERSION')) ~ attribute / remper,
      stringr::str_detect(component, c('CUT|DIVERSION')) ~ attribute.mid / remper),
    oneortwo == 1 ~ dplyr::case_when(
      stringr::str_detect(component, c('SURVIVOR|CUT1|DIVERSION1|MORTALITY1')) ~ dplyr::case_when(
        !is.na(attribute.beg) ~ - attribute.beg / remper,
        TRUE ~ - attribute.prev / remper)))

  return(at)
}

stratVar <- function(ESTN_METHOD, x, xStrat, ndif, a, nh, y = NULL, yStrat = NULL){
  ## Variance
  if (is.null(y)){
    v <- ifelse(dplyr::first(ESTN_METHOD == 'simple'),
                var(c(x, numeric(ndif)) * dplyr::first(a) / nh),
                (sum((c(x, numeric(ndif))^2), na.rm = TRUE) - nh * xStrat^2) / (nh * (nh-1)))
    ## Covariance
  } else {
    v <- ifelse(dplyr::first(ESTN_METHOD == 'simple'),
                cov(x,y),
                (sum(x*y, na.rm = TRUE) - sum(nh * xStrat * yStrat, na.rm = TRUE)) / (nh * (nh-1)))
  }
}



















## Some base functions for the FIA Database Class
#' @export
summary.FIA.Database <- function(object, ...){
  cat('---- FIA Database Object -----', '\n')
  # Years available
  if (!is.null(object$POP_EVAL$END_INVYR)){
    cat('Reporting Years: ',
        unique(object$POP_EVAL$END_INVYR[order(object$POP_EVAL$END_INVYR)]), '\n')
  }
  # States Covered
  if (!is.null(object$PLOT$STATECD)){
    states <- unique(ifelse(stringr::str_length(object$PLOT$STATECD) < 2, paste(0, object$PLOT$STATECD, sep = ''), object$PLOT$STATECD))
    cat('States:          ',
        as.character(unique(intData$EVAL_GRP$STATE[intData$EVAL_GRP$STATECD %in% states])), '\n')
  }
  # Number of Plots
  if (!is.null(object$POP_STRATUM)){
    eval <- rFIA::findEVALID(object, mostRecent = TRUE, type = 'CURR')
    nPlots <- object$POP_STRATUM %>%
      dplyr::filter(EVALID %in% eval) %>%
      dplyr::group_by(ESTN_UNIT_CN, CN) %>%
      dplyr::summarise(n = first(P2POINTCNT)) %>%
      dplyr::summarise(n = sum(n))
    cat('Total Plots:     ', sum(nPlots$n), '\n')
  }

  ## Memory Allocated
  mem <- object.size(object)
  cat('Memory Used:     ', format(mem, units = 'MB'), '\n')

  ## Tables included

  cat('Tables:          ', names(object), '\n')

}
#' @export
print.FIA.Database <- function(x, ...){
  cat('---- FIA Database Object -----', '\n')
  # Years available
  if (!is.null(x$POP_EVAL$END_INVYR)){
    cat('Reporting Years: ',
        unique(x$POP_EVAL$END_INVYR[order(x$POP_EVAL$END_INVYR)]), '\n')
  }
  # States Covered
  if (!is.null(x$PLOT$STATECD)){
    states <- unique(ifelse(stringr::str_length(x$PLOT$STATECD) < 2, paste(0, x$PLOT$STATECD, sep = ''), x$PLOT$STATECD))
    cat('States:          ',
        as.character(unique(intData$EVAL_GRP$STATE[intData$EVAL_GRP$STATECD %in% states])), '\n')
  }
  # Number of Plots
  if (!is.null(x$POP_STRATUM)){
    eval <- rFIA::findEVALID(x, mostRecent = TRUE, type = 'CURR')
    nPlots <- x$POP_STRATUM %>%
      dplyr::filter(EVALID %in% eval) %>%
      dplyr::group_by(ESTN_UNIT_CN, CN) %>%
      dplyr::summarise(n = dplyr::first(P2POINTCNT)) %>%
      dplyr::summarise(n = sum(n))
    cat('Total Plots:     ', sum(nPlots$n), '\n')
  }

  ## Memory Allocated
  mem <- object.size(x)
  cat('Memory Used:     ', format(mem, units = 'MB'), '\n')

  ## Tables included
  cat('Tables:          ', names(x), '\n', '\n')

  if (length(x) > 1){
    print(sapply(x, as_tibble))
  } else {
    print(as_tibble(x[1]))
  }
  cat('\n')
}

#' @export
summary.Remote.FIA.Database <- function(object, ...){
  cat('---- Remote FIA Database Object -----', '\n')

  # States Covered
  if (!is.null(object$states)){
    cat('States:          ',
        as.character(object$states), '\n')
  }

  ## Memory Allocated
  mem <- object.size(object)
  cat('Memory Used:     ', format(mem, units = 'MB'), '\n')


}
#' @export
print.Remote.FIA.Database <- function(x, ...){
  cat('---- Remote FIA Database Object -----', '\n')

  # States Covered
  if (!is.null(x$states)){
    cat('States:          ',
        as.character(x$states), '\n')
  }

  ## Memory Allocated
  mem <- object.size(x)
  cat('Memory Used:     ', format(mem, units = 'MB'), '\n')

}

#' @export
str.FIA.Database <- function(object, ...) {
  cat(paste('FIA.Database', "\n"))
}

#' @export
str.Remote.FIA.Database <- function(object, ...) {
  cat(paste('Remote.FIA.Database', "\n"))
}








#globalVariables(c('.'))

## Not exported
readFHM <- function(dir, tables = NULL, nCores = 1){
  # Add a slash to end of directory name if missing
  if (stringr::str_sub(dir,-1) != '/') dir <- paste(dir, '/', sep = "")
  # Grab all the file names in directory
  files <- list.files(dir)
  inTables <- list()

  # Some warnings
  if(!dir.exists(dir)) {
    stop(paste('Directory', dir, 'does not exist.'))
  }
  if(length(files[stringr::str_to_lower(stringr::str_sub(files,-4, -1)) == '.csv']) < 1){
    stop(paste('Directory', dir, 'contains no .csv files.'))
  }


  # Only read in the specified tables
  if (!is.null(tables)){
    if (any(stringr::str_sub(files, 3, 3) == '_')){
      files <- files[stringr::str_sub(files,4,-5) %in% tables]
    } else {
      files <- files[stringr::str_sub(files,1,-5) %in% tables]
    }
  }

  # Only csvs
  files <- files[stringr::str_to_lower(stringr::str_sub(files,-4,-1)) == '.csv']

  inTables <- list()
  for (n in 1:length(files)){
    # Read in and append each file to a list
    file <- data.table::fread(paste(dir, files[n], sep = ""), showProgress = FALSE, integer64 = 'double', logical01 = FALSE, nThread = nCores)
    # We don't want data.table formats
    #file <- as.data.frame(file)
    fileName <- stringr::str_sub(files[n], 1, -5)

    inTables[[fileName]] <- file
  }

  outTables <- list()
  names(inTables) <- stringr::str_sub(names(inTables), 4, -6)

  uniqueNames <- unique(names(inTables))
  ## Works regardless of whether or not there are duplicate names (multiple states)
  for (i in 1:length(uniqueNames)){
    outTables[[uniqueNames[i]]] <- data.table::rbindlist(inTables[names(inTables) == uniqueNames[i]], fill=TRUE)
  }

  # NEW CLASS NAME FOR FIA DATABASE OBJECTS
  outTables <- lapply(outTables, as_tibble)
  class(outTables) <- 'FIA.Database'

  ## If you are on windows, close explicitly
  #closeAllConnections()

  return(outTables)
}


## Not exported
getFHM <- function(states,
                   dir = NULL,
                   nCores = 1){

  if (!is.null(dir)){
    # Add a slash to end of directory name if missing
    if (str_sub(dir,-1) != '/'){
      dir <- paste(dir, '/', sep = "")
    }
    # Check to see directory exists, if not, make it
    if(!dir.exists(dir)) {
      dir.create(dir)
      message(paste('Creating directory:', dir))
    }
  }

  ## If dir is not specified, hold in a temporary directory
  if (is.null(dir)){tempDir <- tempdir()}

  #   ## Some warnings up front
  #   ## Do not try to merge ENTIRE with other states
  #   if (length(states) > 1 & any(stringr::str_detect(stringr::str_to_upper(states), 'ENTIRE'))){
  #     stop('Cannot merge ENITRE with other state tables. ENTIRE includes all state tables combined. Do you only need data for a particular region?')
  #   }
  #   ## Check to make sure states exist
  #   allStates <- c('AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID',
  #                  'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS',
  #                  'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK',
  #                  'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV',
  #                  'WI', 'WY', 'AS', 'FM', 'GU', 'MP', 'PW', 'PR', 'VI', 'ENTIRE', 'REF')
  #   if (any(stringr::str_to_upper(states) %in% allStates == FALSE)){
  #     missStates <- states[stringr::str_to_upper(states) %in% allStates == FALSE]
  #     stop(paste('Data unavailable for: ', paste(as.character(missStates),collapse = ', '), '. Did you use state/terrority abbreviations? e.g. use states = "AL" (correct) instead of states = "ALABAMA".'))
  #   }
  #
  #   ## Check to make sure tables exist
  #   allTables <- c("BOUNDARY", "COND_DWM_CALC", "COND","COUNTY","DWM_COARSE_WOODY_DEBRIS",
  #                  "DWM_DUFF_LITTER_FUEL","DWM_FINE_WOODY_DEBRIS","DWM_MICROPLOT_FUEL",
  #                  "DWM_RESIDUAL_PILE", "DWM_TRANSECT_SEGMENT", "DWM_VISIT","GRND_CVR",
  #                  "INVASIVE_SUBPLOT_SPP","LICHEN_LAB","LICHEN_PLOT_SUMMARY","LICHEN_VISIT",
  #                  "OZONE_BIOSITE_SUMMARY","OZONE_PLOT_SUMMARY","OZONE_PLOT","OZONE_SPECIES_SUMMARY",
  #                  "OZONE_VALIDATION","OZONE_VISIT", "P2VEG_SUBP_STRUCTURE","P2VEG_SUBPLOT_SPP",
  #                  "PLOT_REGEN","PLOT", "PLOTGEOM", "PLOTSNAP","POP_ESTN_UNIT","POP_EVAL_ATTRIBUTE",
  #                  "POP_EVAL_GRP","POP_EVAL_TYP","POP_EVAL","POP_PLOT_STRATUM_ASSGN","POP_STRATUM",
  #                  "SEEDLING_REGEN","SEEDLING","SITETREE","SOILS_EROSION","SOILS_LAB","SOILS_SAMPLE_LOC" ,
  #                  "SOILS_VISIT", "SUBP_COND_CHNG_MTRX","SUBP_COND","SUBPLOT_REGEN","SUBPLOT",
  #                  "SURVEY","TREE_GRM_BEGIN","TREE_GRM_COMPONENT","TREE_GRM_ESTN", "TREE_GRM_MIDPT",
  #                  "TREE_REGIONAL_BIOMASS", "TREE_WOODLAND_STEMS","TREE","VEG_PLOT_SPECIES",
  #                  "VEG_QUADRAT","VEG_SUBPLOT_SPP","VEG_SUBPLOT", "VEG_VISIT",
  #                  'CITATION', 'DIFFERENCE_TEST_PER_ACRE', 'DIFFERENCE_TEST_TOTALS',
  #                  'FIADB_VERSION', 'FOREST_TYPE', 'FOREST_TYPE_GROUP',
  #                  'GRM_TYPE', 'HABTYP_DESCRIPTION', 'HABTYP_PUBLICATION',
  #                  'INVASIVE_SPECIES', 'LICHEN_SPECIES', 'LICHEN_SPP_COMMENTS',
  #                  'NVCS_HEIRARCHY_STRCT', 'NVCS_LEVEL_1_CODES', 'NVCS_LEVEL_2_CODES',
  #                  'NVCS_LEVEL_3_CODES', 'NVCS_LEVEL_4_CODES', 'NVCS_LEVEL_5_CODES',
  #                  'NVCS_LEVEL_6_CODES', 'NVCS_LEVEL_7_CODES', 'NVCS_LEVEL_8_CODES',
  #                  'OWNGRPCD', 'PLANT_DICTIONARY', 'POP_ATTRIBUTE', 'POP_EVAL_TYP_DESCR',
  #                  'RESEARCH_STATION', 'SPECIES', 'SPECIES_GROUP', 'STATE_ELEV', 'UNIT')
  #   if (any(stringr::str_to_upper(tables) %in% allTables == FALSE)){
  #     missTables <- tables[stringr::str_to_upper(tables) %in% allTables == FALSE]
  #     stop(paste('Tables: ', paste(as.character(missTables),collapse = ', '), ' unavailble. Check the FIA Datamart at https://apps.fs.usda.gov/fia/datamart/CSV/datamart_csv.html for a list of available tables for each state. Alternatively, specify common = TRUE to download the most commonly used tables.
  #
  # Did you accidentally include the state abbreviation in front of the table name? e.g. tables = "AL_PLOT" (wrong) instead of tables = "PLOT" (correct).'))
  #   }
  #


  ## If individual tables are specified, then just grab those .csvs, otherwise download the .zip file, extract and read with fread. Should be quite a bit quicker.
  urlNames <- sapply(states, FUN = function(x){paste0(x,'.zip')})
  urlNames <- c(urlNames)
  ## Set up urls
  urls <- paste0('https://www.fia.fs.fed.us/tools-data/other_data/csv/', urlNames)

  # Make sure state Abbs are in right format
  states <- stringr::str_to_upper(states)


  ## If dir is not specified, hold in a temporary directory
  if (is.null(dir)){tempDir <- tempdir()}


  ## Download each state and extract to directory
  for (i in 1:length(states)){
    # Temporary directory to download to
    temp <- tempfile()
    ## Make the URL
    url <- paste0('https://www.fia.fs.fed.us/tools-data/other_data/csv/', states[i],'.zip')
    ## Download as temporary file
    download.file(url, temp)
    ## Extract
    if (is.null(dir)){
      unzip(temp, exdir = tempDir)
    } else {
      unzip(temp, exdir = str_sub(dir, 1, -2))
    }
    unlink(temp)
  }

  ## Read in the files w/ readFHM
  if (is.null(dir)){
    outTables <- readFHM(tempDir, nCores = nCores)

    unlink(tempDir)

    #unlink(tempDir, recursive = TRUE)

  } else {
    outTables <- readFHM(dir, nCores = nCores)
  }

  # NEW CLASS NAME FOR FIA DATABASE OBJECTS
  #outTables <- lapply(outTables, as.data.frame)
  class(outTables) <- 'FHM.Database'

  return(outTables)

}


### Connect to an SQLite3 backend
# connectFIA <- function(dir){
#   ## Connect to the database
#   db <- dbConnect(RSQLite::SQLite(), dir)
#
#   ## Grab the names and store object in list like those held in memory
#   tableNames <- dbListTables(db)
#   outList <- list()
#   for (i in 1:length(tableNames)){
#     outList[[tableNames[i]]] <- tbl(db, tableNames[i])
#   }
#
#   # NEW CLASS NAME FOR FIA DATABASE OBJECTS
#   #outTables <- lapply(outTables, as.data.frame)
#   class(outList) <- 'FIA.Database'
#
#   return(outList)
# }



#### THIS MAY NEED WORK. NOT ALL EVALIDs follow the same coding scheme (ex, CT 2005 --> 95322)
# Look up EVALID codes
#' @export
findEVALID <- function(db = NULL,
                       mostRecent = FALSE,
                       state = NULL,
                       year = NULL,
                       type = NULL){

  #### REWRITING FOR SIMPLICITY #####
  # Joing w/ evaltype code
  ids <- db$POP_EVAL %>%
    dplyr::left_join(dplyr::select(db$POP_EVAL_TYP, c('EVAL_GRP_CN', 'EVAL_TYP')), by = 'EVAL_GRP_CN') %>%
    dplyr::mutate(place = stringr::str_to_upper(LOCATION_NM))

  if (!is.null(state)){
    state <- stringr::str_to_upper(state)
    evalGrp <- intData$EVAL_GRP %>%
      dplyr::select(STATECD, STATE) %>%
      dplyr::mutate(STATECD = as.numeric(STATECD))
    ## Join state abbs with state codes in popeval
    ids <- dplyr::left_join(ids, evalGrp, by = 'STATECD')
    # Check if any specified are missing from db
    if (any(unique(state) %in% unique(evalGrp$STATE) == FALSE)){
      missStates <- state[state %in% unique(evalGrp$STATE) == FALSE]
      fancyName <- unique(intData$EVAL_GRP$STATE[intData$EVAL_GRP$STATECD %in% missStates])
      stop(paste('States: ', toString(fancyName) , 'not found in db.', sep = ''))
    }
    ids <- dplyr::filter(ids, STATE %in% state)
  }
  if (!is.null(year)){
    #year <- ifelse(str_length(year) == 2, year, str_sub(year, -2,-1))
    ids <- dplyr::filter(ids, END_INVYR %in% year)
  }
  if (!is.null(type)){
    ids <- dplyr::filter(ids, EVAL_TYP %in% paste0('EXP', type))
  }
  if (mostRecent) {

    ## Grouped filter wasn't working as intended, use filtering join
    maxYear <- ids %>%
      ## Remove TX, do it seperately
      dplyr::filter(!(STATECD %in% 48)) %>%
      dplyr::mutate(place = stringr::str_to_upper(LOCATION_NM)) %>%
      dplyr::group_by(place, EVAL_TYP) %>%
      dplyr::summarize(END_INVYR = max(END_INVYR, na.rm = TRUE),
                       LOCATION_NM = dplyr::first(LOCATION_NM))

    ## Texas coding standards are very bad
    ## Name two different inventory units with 5 different names
    ## Due to that, only use inventories for the ENTIRE state, sorry
    if (any(ids$STATECD %in% 48)){
      # evalType <- c('EXP_ALL', 'EXP_VOL', '')
      # evalCode <- c('00', '01', '03', '07', '09', '29')
      #
      # txIDS <- ids %>%
      #   filter(STATECD %in% 48) %>%
      #   # ## Removing any inventory that references east or west, sorry
      #   # filter(stringr::str_detect(stringr::str_to_upper(EVAL_DESCR), 'EAST', negate = TRUE) &
      #   #          stringr::str_detect(stringr::str_to_upper(EVAL_DESCR), 'WEST', negate = TRUE)) %>%
      #   mutate(typeCode = str_sub(str_trim(EVALID), -2, -1))
      #
      #   mutate(place = stringr::str_to_upper(LOCATION_NM)) %>%
      #   group_by(place, EVAL_TYP) %>%
      #   summarize(END_INVYR = max(END_INVYR, na.rm = TRUE),
      #             LOCATION_NM = first(LOCATION_NM))

      ## Will require manual updates, fix your shit texas
      txIDS <- ids %>%
        dplyr::filter(STATECD %in% 48) %>%
        dplyr::filter(END_INVYR < 2017) %>%
        dplyr::filter(END_INVYR > 2006) %>%
        ## Removing any inventory that references east or west, sorry
        dplyr::filter(stringr::str_detect(stringr::str_to_upper(EVAL_DESCR), 'EAST', negate = TRUE) &
                        stringr::str_detect(stringr::str_to_upper(EVAL_DESCR), 'WEST', negate = TRUE)) %>%
        dplyr::mutate(place = stringr::str_to_upper(LOCATION_NM)) %>%
        dplyr::group_by(place, EVAL_TYP) %>%
        dplyr::summarize(END_INVYR = max(END_INVYR, na.rm = TRUE),
                         LOCATION_NM = dplyr::first(LOCATION_NM))

      maxYear <- dplyr::bind_rows(maxYear, txIDS)
    }

    # ids <- ids %>%
    #   mutate(place = stringr::str_to_upper(LOCATION_NM)) %>%
    #   ### TEXAS IS REALLY ANNOYING LIKE THIS
    #   ### FOR NOW, ONLY THE ENTIRE STATE
    #   filter(place %in% c('TEXAS(EAST)', 'TEXAS(WEST)') == FALSE)


    ids <- dplyr::left_join(maxYear, dplyr::select(ids, c('place', 'EVAL_TYP', 'END_INVYR', 'EVALID')), by = c('place', 'EVAL_TYP', 'END_INVYR'))
  }

  # Output as vector
  ID <- unique(ids$EVALID)

  return(ID)
}


ratioVar <- function(x, y, x.var, y.var, cv) {
  r.var <- (1 / (y^2)) * (x.var + ((x/y)^2 * y.var) - (2 * (x/y) * cv) )
  ## Sometimes rounding errors in covariance estimate cause slightly negative
  ## variance estimates for ratios, when this is the case, report 0
  r.var <- dplyr::case_when(r.var < 0 ~ 0,
                            TRUE ~ r.var)
  return(r.var)
}

sumToPlot <- function(x,
                      pops,
                      grpBy) {

  ## Convert to syms so we can use in dplyr functions
  grp.syms <- syms(grpBy)

  ## Sum x variable(s) up to plot-level
  if ('TREE_BASIS' %in% names(x)) {

    ## Allows us to use across in summary functions
    x.vars <- syms(names(x)[!c(names(x) %in% c('PLT_CN', 'TREE_BASIS',
                                               'CONDID', 'SUBP', 'TREE', 'ONEORTWO',
                                               grpBy))])

    ## Sum up to plot
    x <- x %>%
      dtplyr::lazy_dt() %>%
      dplyr::group_by(PLT_CN, TREE_BASIS, !!!grp.syms) %>%
      dplyr::summarize(across(.cols = c(!!!x.vars), .fns = sum, na.rm = TRUE)) %>%
      dplyr::ungroup() %>%

      ## Join population tables w/ adjustment factors for non-response
      dplyr::inner_join(dplyr::select(pops, ESTN_UNIT_CN, STRATUM_CN, PLT_CN,
                                      YEAR, dplyr::any_of('INVYR'),
                                      ADJ_FACTOR_MICR, ADJ_FACTOR_SUBP,
                                      ADJ_FACTOR_MACR), by = 'PLT_CN') %>%

      # Add adjustment factors
      dplyr::mutate(adj = dplyr::case_when(is.na(TREE_BASIS) ~ NA_real_,
                                            ## If the proportion was measured for a macroplot,
                                            ## use the macroplot value
                                            TREE_BASIS == 'MACR' ~ as.numeric(ADJ_FACTOR_MACR),
                                            ## Otherwise, use the subpplot value
                                            TREE_BASIS == 'SUBP' ~ as.numeric(ADJ_FACTOR_SUBP),
                                            TREE_BASIS == 'MICR' ~ as.numeric(ADJ_FACTOR_MICR))) %>%
      ## Apply adjustment
      dplyr::mutate(across(c(!!!x.vars), .fns = ~ .x * adj)) %>%
      dplyr::distinct() %>% # If multiple EVAL_TYP, drop those that we can
      ## Sum across micro, subp, macro
      dplyr::group_by(ESTN_UNIT_CN, STRATUM_CN, PLT_CN, !!!grp.syms) %>%
      dplyr::summarize(across(.cols = c(!!!x.vars), .fns = sum, na.rm = TRUE)) %>%
      as.data.frame()

  } else {

    ## If not tree variables, then condition variables
    x.vars <- syms(names(x)[!c(names(x) %in% c('PLT_CN', 'CONDID',
                                               'AREA_BASIS',
                                               grpBy))])
    # Sum up to plot
    x <- x %>%
      dtplyr::lazy_dt() %>%
      dplyr::group_by(PLT_CN, AREA_BASIS, !!!grp.syms) %>%
      dplyr::summarize(across(.cols = c(!!!x.vars), .fns = sum, na.rm = TRUE)) %>%
      dplyr::ungroup() %>%

      ## Join population tables w/ adjustment factors for non-response
      dplyr::inner_join(dplyr::select(pops, EVALID, ESTN_UNIT_CN, STRATUM_CN, PLT_CN,
                                      YEAR, any_of('INVYR'),
                                      ADJ_FACTOR_MICR, ADJ_FACTOR_SUBP,
                                      ADJ_FACTOR_MACR), by = 'PLT_CN') %>%

      # Add adjustment factors
      dplyr::mutate(adj = dplyr::case_when(
        ## When NA, stay NA
        is.na(AREA_BASIS) ~ NA_real_,
        ## If the proportion was measured for a macroplot,
        ## use the macroplot value
        AREA_BASIS == 'MACR' ~ as.numeric(ADJ_FACTOR_MACR),
        ## Otherwise, use the subplot value
        AREA_BASIS == 'SUBP' ~ ADJ_FACTOR_SUBP)) %>%
      ## Apply adjustment
      dplyr::mutate(across(c(!!!x.vars), .fns = ~ .x * adj)) %>%
      dplyr::distinct() %>% # If multiple EVAL_TYP, drop those that we can
      ## Sum across micro, subp, macro
      dplyr::group_by(ESTN_UNIT_CN, STRATUM_CN, PLT_CN, !!!grp.syms) %>%
      dplyr::summarize(across(.cols = c(!!!x.vars), .fns = sum, na.rm = TRUE)) %>%
      dplyr::ungroup() %>%
      as.data.frame()

  }

  return(x)
}

sumToEU <- function(db,
                    x,
                    y = NULL,
                    pops,
                    x.grpBy,
                    y.grpBy = NULL,
                    method,
                    lambda = NULL) {

  ## If not temporally indifferent, group by INVYR as well
  if (!c(stringr::str_to_upper(method) %in% 'TI')) {
    # We'll remove this after summary to estimation unit
    x.grpBy <- c(x.grpBy, 'INVYR')
    y.grpBy <- c(y.grpBy, 'INVYR')
    pops$P2POINTCNT <- pops$P2POINTCNT_INVYR
    pops$P1POINTCNT <- pops$P1POINTCNT_INVYR
    pops$STRATUM_WGT <- pops$P1POINTCNT_INVYR / pops$P1PNTCNT_EU
    pops$P2PNTCNT_EU <- pops$P2PNTCNT_EU_INVYR

    ## Add INVYR to plot tables
    x <- dplyr::left_join(x, dplyr::select(db$PLOT, PLT_CN, INVYR), by = 'PLT_CN')
    if (!is.null(y)) y <- dplyr::left_join(y, dplyr::select(db$PLOT, PLT_CN, INVYR), by = 'PLT_CN')

    ## Only join by INVYR when using panels
    panel.join.vars <- 'INVYR'
  } else {
    panel.join.vars <- NULL
  }



  ## Convert to syms for dplyr
  x.grp.syms <- dplyr::syms(x.grpBy)
  y.grp.syms <- dplyr::syms(y.grpBy)

  ## Variables names for scoping
  x.var <- names(x)[!c(names(x) %in% c('ESTN_UNIT_CN', 'STRATUM_CN', 'PLT_CN', 'INVYR', x.grpBy))]
  x.var.syms <- dplyr::syms(x.var)
  x.var.m.syms <- dplyr::syms(paste0(x.var, '_mean'))
  x.var.v.syms <- dplyr::syms(paste0(x.var, '_var'))
  x.var.c.syms <- dplyr::syms(paste0(x.var, '_cv'))
  if (!is.null(y)) {
    y.var <- names(y)[!c(names(y) %in% c('ESTN_UNIT_CN', 'STRATUM_CN', 'PLT_CN', 'INVYR', y.grpBy))]
    y.var.syms <- dplyr::sym(y.var)
    y.var.m.syms <- dplyr::sym(paste0(y.var, '_mean'))
    y.var.v.syms <- dplyr::sym(paste0(y.var, '_var'))

  }

  ## Relevant design info at each level
  pops.sub <- pops %>%
    dplyr::select(YEAR, dplyr::any_of('INVYR'), ESTN_UNIT_CN, AREA_USED,
                  P2PNTCNT_EU, STRATUM_CN, STRATUM_WGT, P2POINTCNT) %>%
    dplyr::distinct()

  ## If y is specified
  if (!is.null(y)) {

    ## Strata means, variances for denominator
    yStrat <- y %>%
      dtplyr::lazy_dt() %>%
      dplyr::left_join(pops.sub, by = c('ESTN_UNIT_CN', 'STRATUM_CN', panel.join.vars)) %>%
      dplyr::group_by(ESTN_UNIT_CN, AREA_USED,
                      P2PNTCNT_EU, STRATUM_CN, STRATUM_WGT,
                      P2POINTCNT, !!!y.grp.syms) %>%
      dplyr::summarize(dplyr::across(c(!!y.var.syms),
                              .fns = list(
                                mean = ~ sum(.x / P2POINTCNT, na.rm = TRUE),
                                var = ~ (sum(.x^2, na.rm = TRUE) - (P2POINTCNT * (sum(.x / P2POINTCNT, na.rm = TRUE)^2))) / (P2POINTCNT * (P2POINTCNT - 1))
                              )),
                       nPlots.y = length(unique(PLT_CN))) %>%
      dplyr::ungroup() %>%
      as.data.frame()

    ## EU means, variances for denominator
    yEU <- yStrat %>%
      dtplyr::lazy_dt() %>%
      dplyr::group_by(ESTN_UNIT_CN, AREA_USED, P2PNTCNT_EU, !!!y.grp.syms) %>%
      dplyr::summarise(dplyr::across(c(!!y.var.m.syms),
                              .fns = list(
                                mean = ~ AREA_USED * sum(.x * STRATUM_WGT, na.rm = TRUE)
                              )),
                       dplyr::across(c(!!y.var.v.syms),
                              .fns = list(
                                var = ~ (AREA_USED^2 / P2PNTCNT_EU) * (sum(.x * STRATUM_WGT * P2POINTCNT, na.rm = TRUE) + sum(.x * (1-STRATUM_WGT) * (P2POINTCNT / P2PNTCNT_EU), na.rm = TRUE))
                              )),
                       nPlots.y = sum(nPlots.y, na.rm = TRUE)) %>%
      dplyr::ungroup() %>%
      as.data.frame()


    ## Strata means, variances for numerator
    xStrat <- x %>%
      dtplyr::lazy_dt() %>%
      dplyr::left_join(y, by = c('ESTN_UNIT_CN', 'STRATUM_CN', 'PLT_CN', panel.join.vars, y.grpBy[!c(y.grpBy %in% c('YEAR', 'INVYR'))])) %>%
      dplyr::left_join(dplyr::select(yStrat, ESTN_UNIT_CN, STRATUM_CN, !!!y.grp.syms, !!y.var.m.syms), by = c('ESTN_UNIT_CN', 'STRATUM_CN', panel.join.vars, y.grpBy[!c(y.grpBy %in% c('YEAR', 'INVYR'))])) %>%
      dplyr::left_join(dplyr::select(pops.sub, -c(any_of(c('YEAR')))), by = c('ESTN_UNIT_CN', 'STRATUM_CN', panel.join.vars)) %>%
      dplyr::group_by(ESTN_UNIT_CN, AREA_USED,
                      P2PNTCNT_EU, STRATUM_CN, STRATUM_WGT,
                      P2POINTCNT, !!!x.grp.syms, !!y.var.m.syms) %>%
      dplyr::summarize(dplyr::across(c(!!!x.var.syms), .fns = list(mean = ~ sum(.x / P2POINTCNT, na.rm = TRUE),
                                                        var = ~ (sum(.x^2, na.rm = TRUE) - (P2POINTCNT * sum(.x / P2POINTCNT, na.rm = TRUE)^2)) / (P2POINTCNT * (P2POINTCNT - 1)),
                                                        cv = ~ (sum(.x * !!y.var.syms, na.rm = TRUE) - (P2POINTCNT * sum(.x / P2POINTCNT, na.rm = TRUE) * !!y.var.m.syms)) / (P2POINTCNT * (P2POINTCNT - 1)) )),
                       nPlots.x = length(unique(PLT_CN))) %>%
      dplyr::ungroup() %>%
      as.data.frame()

    ## EU means, variances for numerator
    xEU <- xStrat %>%
      dtplyr::lazy_dt() %>%
      dplyr::group_by(ESTN_UNIT_CN, AREA_USED, P2PNTCNT_EU, !!!x.grp.syms) %>%
      dplyr::summarise(dplyr::across(c(!!!x.var.m.syms),
                              .fns = list(
                                mean = ~ AREA_USED * sum(.x * STRATUM_WGT, na.rm = TRUE)
                              )),
                       dplyr::across(c(!!!x.var.v.syms),
                              .fns = list(
                                var = ~ (AREA_USED^2 / P2PNTCNT_EU) * (sum(.x * STRATUM_WGT * P2POINTCNT, na.rm = TRUE) + sum(.x * (1-STRATUM_WGT) * (P2POINTCNT / P2PNTCNT_EU), na.rm = TRUE))
                              )),
                       dplyr::across(c(!!!x.var.c.syms),
                              .fns = list(
                                cv = ~ (AREA_USED^2 / P2PNTCNT_EU) * (sum(.x * STRATUM_WGT * P2POINTCNT, na.rm = TRUE) + sum(.x * (1-STRATUM_WGT) * (P2POINTCNT / P2PNTCNT_EU), na.rm = TRUE))
                              )),
                       nPlots.x = sum(nPlots.x, na.rm = TRUE)) %>%
      dplyr::ungroup() %>%
      as.data.frame()


    ## Compute moving average weights if not TI ----------------------------------
    if (stringr::str_to_upper(method) %in% c("SMA", 'EMA', 'LMA')){

      ## Only needed INVYR for pop est
      x.grpBy <- x.grpBy[x.grpBy != 'INVYR']
      y.grpBy <- y.grpBy[y.grpBy != 'INVYR']
      x.grp.syms <- dplyr::syms(x.grpBy)
      y.grp.syms <- dplyr::syms(y.grpBy)

      ## Compute the weights
      wgts <- maWeights(pops, method, lambda)

      ## If moving average ribbons, add lambda to grpBy for easier summary
      if (stringr::str_to_upper(method) == 'EMA' & length(lambda) > 1){
        x.grpBy <- c('lambda', x.grpBy)
        y.grpBy <- c('lambda', y.grpBy)
      }


      ## Apply the weights
      if (stringr::str_to_upper(method) %in% c('LMA', 'EMA')){
        joinCols <- c('YEAR', 'STATECD', 'INVYR')
      } else {
        joinCols <- c('YEAR', 'STATECD')
      }

      xEU <- xEU %>%
        dplyr::left_join(dplyr::select(db$POP_ESTN_UNIT, CN, STATECD), by = c('ESTN_UNIT_CN' = 'CN')) %>%
        dplyr::left_join(wgts, by = joinCols) %>%
        dplyr::mutate(dplyr::across(c(!!!x.var.m.syms), ~(.x * wgt))) %>%
        dplyr::mutate(dplyr::across(c(!!!x.var.v.syms, !!!x.var.c.syms), ~(.x * (wgt^2)))) %>%
        dplyr::group_by(ESTN_UNIT_CN, P2PNTCNT_EU, !!!x.grp.syms) %>%
        dplyr::summarize(dplyr::across(c(!!!x.var.m.syms, !!!x.var.v.syms, !!!x.var.c.syms, nPlots.x), sum, na.rm = TRUE))
      yEU <- yEU %>%
        dplyr::left_join(dplyr::select(db$POP_ESTN_UNIT, CN, STATECD), by = c('ESTN_UNIT_CN' = 'CN')) %>%
        dplyr::left_join(wgts, by = joinCols) %>%
        dplyr::mutate(dplyr::across(c(!!y.var.m.syms), ~(.x * wgt))) %>%
        dplyr::mutate(dplyr::across(c(!!y.var.v.syms), ~(.x * (wgt^2)))) %>%
        dplyr::group_by(ESTN_UNIT_CN, !!!y.grp.syms) %>%
        dplyr::summarize(dplyr::across(c(!!y.var.m.syms, !!y.var.v.syms, nPlots.y), sum, na.rm = TRUE))



      ## If using an ANNUAL estimator --------------------------------------------
    } else if (stringr::str_to_upper(method) == 'ANNUAL') {

      ## Only needed INVYR for pop est
      x.grpBy <- x.grpBy[x.grpBy != 'INVYR']
      y.grpBy <- y.grpBy[y.grpBy != 'INVYR']


      # If INVYR is in YEAR, choose the estimates when INVYR == YEAR
      # Otherwise, choose the estimates produced with the most plots
      xEU <- filterAnnual(xEU, x.grpBy, nPlots.x, db$POP_ESTN_UNIT)
      yEU <- filterAnnual(yEU, y.grpBy, nPlots.y, db$POP_ESTN_UNIT)

    } else if (stringr::str_to_upper(method) == 'ANNUAL_COV') {

      xEU$YEAR <- xEU$INVYR
      yEU$YEAR <- yEU$INVYR
    }


  } else {
    ## Otherwise just strata mean and variance for x

    yEU <- NULL

    ## Strata means, variances for numerator
    xStrat <- x %>%
      dtplyr::lazy_dt() %>%
      dplyr::left_join(pops.sub, by = c('ESTN_UNIT_CN', 'STRATUM_CN', panel.join.vars)) %>%
      dplyr::group_by(ESTN_UNIT_CN, AREA_USED,
                      P2PNTCNT_EU, STRATUM_CN, STRATUM_WGT,
                      P2POINTCNT, !!!x.grp.syms) %>%
      dplyr::summarize(dplyr::across(c(!!!x.var.syms), .fns = list(mean = ~ sum(.x / P2POINTCNT, na.rm = TRUE),
                                                            var = ~ (sum(.x^2, na.rm = TRUE) - (P2POINTCNT * sum(.x / P2POINTCNT, na.rm = TRUE)^2)) / (P2POINTCNT * (P2POINTCNT - 1))
                                                            )),
                       nPlots.x = length(unique(PLT_CN))) %>%
      dplyr::ungroup() %>%
      as.data.frame()

    ## EU means, variances for numerator
    xEU <- xStrat %>%
      dtplyr::lazy_dt() %>%
      dplyr::group_by(ESTN_UNIT_CN, AREA_USED, P2PNTCNT_EU, !!!x.grp.syms) %>%
      dplyr::summarise(dplyr::across(c(!!!x.var.m.syms),
                              .fns = list(
                                mean = ~ AREA_USED * sum(.x * STRATUM_WGT, na.rm = TRUE)
                              )),
                       dplyr::across(c(!!!x.var.v.syms),
                              .fns = list(
                                var = ~ (AREA_USED^2 / P2PNTCNT_EU) * (sum(.x * STRATUM_WGT * P2POINTCNT, na.rm = TRUE) + sum(.x * (1-STRATUM_WGT) * (P2POINTCNT / P2PNTCNT_EU), na.rm = TRUE))
                              )),
                       nPlots.x = sum(nPlots.x, na.rm = TRUE)) %>%
      dplyr::ungroup() %>%
      as.data.frame()


    ## Compute moving average weights if not TI ----------------------------------
    if (stringr::str_to_upper(method) %in% c("SMA", 'EMA', 'LMA')){

      ## Only needed INVYR for pop est
      x.grpBy <- x.grpBy[x.grpBy != 'INVYR']
      x.grp.syms <- dplyr::syms(x.grpBy)

      ## Compute the weights
      wgts <- maWeights(pops, method, lambda)

      ## If moving average ribbons, add lambda to grpBy for easier summary
      if (stringr::str_to_upper(method) == 'EMA' & length(lambda) > 1){
        x.grpBy <- c('lambda', x.grpBy)
      }


      ## Apply the weights
      if (stringr::str_to_upper(method) %in% c('LMA', 'EMA')){
        joinCols <- c('YEAR', 'STATECD', 'INVYR')
      } else {
        joinCols <- c('YEAR', 'STATECD')
      }

      xEU <- xEU %>%
        dplyr::left_join(dplyr::select(db$POP_ESTN_UNIT, CN, STATECD), by = c('ESTN_UNIT_CN' = 'CN')) %>%
        dplyr::left_join(wgts, by = joinCols) %>%
        dplyr::mutate(dplyr::across(c(!!!x.var.m.syms), ~(.x * wgt))) %>%
        dplyr::mutate(dplyr::across(c(!!!x.var.v.syms), ~(.x * (wgt^2)))) %>%
        dplyr::group_by(ESTN_UNIT_CN, P2PNTCNT_EU, !!!x.grp.syms) %>%
        dplyr::summarize(dplyr::across(c(!!!x.var.m.syms, !!!x.var.v.syms, nPlots.x), sum, na.rm = TRUE)) %>%
        dplyr::ungroup()



      ## If using an ANNUAL estimator --------------------------------------------
    } else if (stringr::str_to_upper(method) == 'ANNUAL') {

      ## Only needed INVYR for pop est
      x.grpBy <- x.grpBy[x.grpBy != 'INVYR']


      # If INVYR is in YEAR, choose the estimates when INVYR == YEAR
      # Otherwise, choose the estimates produced with the most plots
      xEU <- filterAnnual(xEU, x.grpBy, nPlots.x, db$POP_ESTN_UNIT)

    } else if (stringr::str_to_upper(method) == 'ANNUAL_COV') {

      xEU$YEAR <- xEU$INVYR
    }

  }

  return(list(x = xEU, y = yEU))
}

Try the rFIA package in your browser

Any scripts or data that you put into this service are public.

rFIA documentation built on Dec. 16, 2021, 1:07 a.m.