RSFIA: FIA Tree Mortality Analysis

#' @title Calculate FIA Mortality Rates
#' @description
#'
#' This workhorse function calculates an average between-census mortality rate
#' at the plot level for a given FIA trees/plots input.
#'
#' @param trees Tree-level FIA dataframe
#' @param plots Plot-level FIA dataframe
#' @param prog_inc Number of plots to process before reporting progress
#' @param db_ver FIA DB version to use for metadata tags
#' @param skip_min_year Skip first year in dataset? T/F
#' @param min_ht Percentile of plot tree heights to include
#' @param min_BA Percentile of plot tree basal areas to include
#'
#' @return Plot-level dataframe
#'
#' @details min_ht and min_BA should be between 0 and 1, db_ver
#' doesn't work with anything besides 7.2 right now.
#'
#' @export
#' @examples test <- MakeMortalityRates(trees = trees_DF)
MakeMortalityRates <- function(trees, plots, debug = T,
                               prog_inc = 500, db_ver = 7.2,
                               min_ht = NULL, min_BA = NULL, min_dia = 10,
                               test_CNs = NULL) {

  # The database version used here is 7.2. Other versions probably work, but
  # small changes to database structure and metadata organization over time
  # require you to very dilligently check that outputs are correct, and major
  # db version changes probably require you to do debugging as well.

  # Sanity checks:
  if (!db_ver == 7.2) stop('Bad database version input')

  # Declare:

  # The mort_cols are plot-level variables. These are pulled as unique values
  # from the tree-level data, and errors are thrown if more than one unique value
  # is returned for a given PLT_CN. These are combined later using plot-level
  # variables taken directly from the plot data that was provided as input.
  AGENTCD_to_cod <- RSFIA::KeyAgentCode(db_ver = db_ver)
  mort_cols <- c(
    'PLT_CN', 'prev_year', 'median_year',
    'mort_rate', 'n_dead', 'n_total'
    )
  pos_cod <- c('Missing', 'Insect', 'Disease', 'Fire', 'Animal',
               'Weather', 'Vegetation', 'Other', 'Harvest')
  cod_vars <- c('_ndead', '_frac_of_ndead', '_rate_wrt_ntree')
  non_cod_cols <- c('non_harv_mort_rate', 'non_beet_mort_rate',
                    'non_fire_mort_rate', 'non_harv_fire_mort_rate',
                    'non_harv_fire_beet_mort_rate')
  n_cols_mort_df <- length(mort_cols) +
    (length(pos_cod) * length(cod_vars)) +
    length(non_cod_cols)
  mort_df <- data.frame(matrix(ncol = n_cols_mort_df, nrow = 0))
  pos_grid <- expand.grid(pos_cod, cod_vars)
  pos_cols <- paste0(pos_grid[, 1], pos_grid[, 2])
  colnames(mort_df) <- c(mort_cols, pos_cols, non_cod_cols)

  # Prep dataframe for species mortality. This is a seperately-populated
  # dataframe that is combined with mort_df using dplyr::full_join() right
  # before the final dataframe is returned.
  species_mort <- data.frame(plots$CN)
  colnames(species_mort) <- 'PLT_CN'

  ht_drop <- numeric()
  ht_drop_perc <- numeric()
  BA_drop <- numeric()
  BA_drop_perc <- numeric()
  tree_CNs <- numeric()

  # Skip/index counters for the primary work loop.
  icnt <- 0
  dcnt <- 0
  ycnt <- 0
  rcnt <- 0
  pcnt <- 0
  ccnt <- 0

  # Work loop:
  message('MakeMortalityRates progress:')
  cat('Total input plots:', length(unique(trees$PLT_CN)), '\n')
  # Generate plot-level mortality rates:
  i0 <- unique(trees$PLT_CN)
  for (i in i0) {

    # Advance progress counter/status message:
    icnt <- icnt + 1
    if (debug && (icnt < 2000)) next
    cat('\r', format(icnt / length(i0) * 100, digits = 2, nsmall = 2), '%    ')

    # Find plot trees:
    tree_sub <- trees[which(trees$PLT_CN == i), ]

    # Check plot validity. All trees must be from same census, i.e. same INVYR:
    if (length(unique(tree_sub$INVYR)) > 1) stop('Multiple years present?')
    year <- unique(tree_sub$INVYR)
    # First year of the data is skipped, since they are by default the first
    # census in the dataset.
    if (year == min(unique(trees$INVYR), na.rm = T)) next
    ycnt <- ycnt + 1
    # METHODS NOTE: Plots without previously measured trees were skipped.
    if (sum(!is.na(tree_sub$PREV_TRE_CN)) < 1) next
    rcnt <- rcnt + 1
    prev_plot <- unique(trees$PLT_CN[which(trees$CN %in% tree_sub$PREV_TRE_CN)])
    if (length(prev_plot) > 1) stop('Multiple previous plots?')
    # METHODS NOTE: Plots with remeasured trees that were missing the prior plot
    # entry were skipped.
    if (length(prev_plot) < 1) next
    pcnt <- pcnt + 1
    prev_year <- unique(trees$INVYR[which(trees$PLT_CN == prev_plot)])
    if (length(prev_year) > 1) stop('Multiple plot-years?')

    # METHODS NOTE: To calculate census-to-census mortality directly,
    # trees which were not present in the preceeding census were excluded from
    # tree lists for each plot. This is theoretically all tree recruits.
    # CAVEATS: This excludes trees which may have been
    # missed due to measurement error on the first census, or trees which missed
    # the measurement cutoff for the first census but grew into the census and died
    # during the ten years between the first and second censuses.

    # There's also code here to include analysis of recruits, although its not in the
    # output yet.
    # Ditch small trees:
    if (length(min_dia) > 0) {
      if (length(which(tree_sub$DIA_cm > min_dia)) > 0) {
        tree_sub <- tree_sub[which(tree_sub$DIA_cm > min_dia), ]
      }
    }
    which_recruit <- as.logical(is.na(tree_sub$PREV_STATUS_CD) - is.na(tree_sub$STATUSCD))
    recruits <- tree_sub[which_recruit, ]
    n_recruits <- nrow(recruits)
    mean_recruit_size <- mean(recruits$DIA, na.rm = T)
    mean_recruit_size <- ifelse(is.nan(mean_recruit_size), NA, mean_recruit_size)
    stopifnot(all(is.na(recruits$PREV_STATUS_CD)),
              all(is.na(recruits$PREV_TRE_CN)))
    if (n_recruits > 0) {
      tree_sub <- tree_sub[-which(tree_sub$CN %in% recruits$CN), ]
    }

    # METHODS NOTE: Incorrectly tallied trees were excluded from the census sample.
    # CAVEATS: This might lower sample size - however, difficult to rationalize
    # data interpretation choices this way. Its no doubt safer to leave that to
    # the data collection agency and accept the error rates that appear unavoidable.
    miss <- which(tree_sub$STATUSCD == 0)
    if (length(miss) > 0) tree_sub <- tree_sub[-miss, ]

    # Treat STATUSCD == 3, logged trees, as AGENTCD == 80
    log <- which(tree_sub$STATUSCD == 3)
    if (length(log) > 0) {
      tree_sub$AGENTCD[log] <- 80
      tree_sub$STATUSCD[log] <- 2
    }
    stopifnot(
      all(tree_sub$PREV_STATUS_CD %in% c(1, 2)),
      all(tree_sub$STATUSCD %in% c(1, 2))
    )

    if (all(length(min_BA) > 0, length(min_ht) > 0)) {
      stop('Cant use use both BA and ht for minimum sampling')
    }
    if (length(min_BA) > 0) {
      b <- nrow(tree_sub)
      BAs <- pi * ((tree_sub$DIA / 2) ^ 2)
      cut <- quantile(BAs, probs = seq(min_BA, 1, 0.01), na.rm = T)[1]
      tree_sub <- tree_sub[which(BAs > as.numeric(cut)), ]
      a <- nrow(tree_sub)
      BA_drop <- c(BA_drop, a - b)
      BA_drop_perc <- c(BA_drop_perc, (a - b) / b)
    }
    if (length(min_ht) > 0) {
      b <- nrow(tree_sub)
      cut <- quantile(tree_sub$HT, probs = seq(min_ht, 1, 0.01), na.rm = T)[1]
      tree_sub <- tree_sub[which(tree_sub$HT > as.numeric(cut)), ]
      a <- nrow(tree_sub)
      ht_drop <- c(ht_drop, b - a)
      ht_drop_perc <- c(ht_drop_perc, (b - a) / b * 100)
    }

    # Calculate overall mortality:
    # METHODS NOTE: To calculate census-to-census mortality directly,
    # trees which were not present in the preceeding census were excluded from
    # tree lists for each plot
    which_dead <- tree_sub$STATUSCD - tree_sub$PREV_STATUS_CD
    jesus_trees <- which(which_dead == -1)
    tree_sub$PREV_STATUS_CD[jesus_trees] <- 1
    which_dead <- which(as.logical(which_dead))
    n_2dead <- sum(tree_sub$PREV_STATUS_CD - 1, na.rm = T)
    # n_dead is trees dead THIS CENSUS
    n_dead <- sum(tree_sub$STATUSCD - tree_sub$PREV_STATUS_CD, na.rm = T)
    # METHODS NOTE: Trees that were 'ressurected' over the course of the census interval
    # were assumed to be erroneously marked dead at the previous census.

    if (n_dead < 0) stop('Mort failure')
    n_tot <- length(unique(tree_sub$CN)) - n_2dead
    # METHODS NOTE: To simplify analysis, plots with census intervals different
    # than the standard 10 years were excluded.
    if ((year - prev_year) != 10) next
    ccnt <- ccnt + 1
    mort_rate <- n_dead / n_tot
    median_year <- round(median(c(year, prev_year)), 0)

    # Calculate mortality by AGENTCD:
    # pos_cod and cod_vars are declared above

    cod <- AGENTCD_to_cod(tree_sub$AGENTCD[which_dead])
    cod_list <- list()
    j_cnt <- 0
    for (j in cod_vars) {
      j_cnt <- j_cnt + 1
      for (k in pos_cod) {
        tag <- paste0(k, j)
        if (k %in% names(table(cod))) {
          jk_val <- switch(j_cnt,
                           as.numeric(table(cod)[k]),
                           as.numeric(table(cod)[k]) / n_dead,
                           as.numeric(table(cod)[k]) / n_tot)
          cod_list[[tag]] <- ifelse(is.na(jk_val), 0, jk_val)
        } else {
          cod_list[[tag]] <- 0
        }
      }
    } # end j
    chk_end <- length(mort_cols) + length(pos_cols)
    chk_names <- colnames(mort_df)[(length(mort_cols) + 1):chk_end]
    if (!(identical(chk_names, names(cod_list)))) stop('COD names dont match')

    # Calculate AGENTCD-subtraction mortality rates:
    # METHODS NOTE: Mortality rates are re-calculated here to exclude harvest,
    # fire, and insect causes-of-death. Additionally, fire and harvest are
    # combined-excluded for a measure of death that does not include stochastic
    # landscape effects. Finally, harvest fire and insects are all excluded for
    # an additional measure of mortality that solely reflects non-fire abiotic
    # causes as well as within-stand competitive effects.
    non_cod_list <- list()
    jcnt <- 0
    for (j in non_cod_cols) {
      jcnt <- jcnt + 1
      hn <- cod_list$Harvest_ndead
      bn <- cod_list$Insect_ndead
      fn <- cod_list$Fire_ndead
      val <- switch(jcnt,
                    (n_dead %NA-% hn) / (n_tot %NA-% hn), # Non-harvest mort rate
                    (n_dead %NA-% bn) / (n_tot %NA-% bn), # Non-insect mort rate
                    (n_dead %NA-% fn) / (n_tot %NA-% fn), # Non-fire mort rate
                    (n_dead %NA-% c(fn, hn)) / (n_tot %NA-% c(fn, hn)), # No harv/fire mort
                    (n_dead %NA-% c(fn, hn, bn)) / (n_tot %NA-% c(fn, hn, bn)))
      val <- ifelse(is.na(val), 0, val)
      non_cod_list[[jcnt]] <- val
    }

    # Species mortality rates:
    for (j in unique(tree_sub$SPCD)) {
      sp_sub <- tree_sub[which(tree_sub$SPCD == j), ]
      indx <- which(species_mort$PLT_CN == i)
      tag0 <- paste0('prop_plot_SPCD_', j)
      tag <- paste0('mort_rate_SPCD_', j)
      n_dead_sp <- sum(sp_sub$STATUSCD - sp_sub$PREV_STATUS_CD, na.rm = T)
      mort_val <- round(n_dead_sp / nrow(sp_sub), 4)
      prop_val <- round(nrow(sp_sub) / nrow(tree_sub), 4)
      species_mort[indx, tag] <- mort_val
      species_mort[indx, tag0] <- prop_val
    }
    # Put it all together:
    mort_row <- c(i, prev_year, median_year, mort_rate, n_dead, n_tot,
                  cod_list, non_cod_list)
    # Output sanity checks:
    if (length(mort_row) != ncol(mort_df)) {
      warning('Mort rate failed?')
      cat('\n')
      browser()
      stop('Debug failed.')
    }
    mort_df[dcnt + 1, ] <- mort_row
    dcnt <- dcnt + 1

    tree_CNs <- append(tree_CNs, tree_sub$CN)

  }

  # Plot skip outputs:
  perc <- round(dcnt / icnt * 100, 2)
  cat('\n')
  message(paste('Skipped', icnt - dcnt, 'plots,', perc, '% of total.'))
  cat('Skip breakdown:\n')
  cat('Minimum year skips:', icnt - ycnt, '\n')
  cat('No remeasure skips:', ycnt - rcnt, '\n')
  # pcnt here is the same as dcnt, but its left in case more checks are added later
  cat('Previous plot missing skips:', rcnt - pcnt, '\n')
  cat('Non-standard census interval skips:', pcnt - ccnt, '\n')
  if (length(min_ht) > 0) {
    cat('\nMean # of trees that were too short:', round(mean(ht_drop, 2)))
    cat('\nMean % of plots that were too short:', round(mean(ht_drop_perc, 2)))
  }
  if (length(min_BA) > 0) {
    cat('\nMean # of trees that were too small:', round(mean(BA_drop, 2)))
    cat('\nMean % of plots that were too small:', round(mean(BA_drop_perc, 2)))
  }
  cat('\n')

  # Sanity check for multiple-remeasure plots:
  if (length(which(mort_df$PREV_PLT_CN %in% mort_df$PLT_CN)) > 0) {
    warning('Multiple mortality present for one plot, check PREV_PLT_CN')
  }

  # Combine dfs and return:
  colnames(plots)[which(colnames(plots) == 'CN')] <- 'PLT_CN'
  mort_df <- dplyr::inner_join(x = mort_df, y = plots, by = 'PLT_CN')
  mort_df <- dplyr::left_join(x = mort_df, y = species_mort, by = 'PLT_CN')
  return(list(mort_df, tree_CNs))
}