BCForestGroundSample: BC forest ground sample compilation

Documented in auxiTreeCompiler

#' Derive volume components for H-enhanced and non-enhanced trees-VRI specific
#'
#'
#' @description Estimates volume components for H-enhanced and non-enhanced trees using regression and ratio methods.
#'              For H-enhanced trees, the whole stem volume and gross merchantable volume are already calculated directly using
#'              taper equations; and rest of volume components will be calculated using ratio method in this function.
#'              For non-enhanced trees, the whole stem volume is derived using regression equation between basal area
#'              and whole stem volume and the rest of volume components will be computed using ratio method in this function.
#'              The function is part of \code{vol_ha_2017.sas}.
#'
#' @param fullMeasuredTrees Compiled tree-level data in vi_c, which contains full measured trees, enhanced trees
#'                          and H-enhanced trees. This data is output of \code{\link{DWBCompiler}}
#'
#' @param auxiTrees data.table, Non-enhanced trees in anxilirary plots, however, it may have enhanced trees and H-enhanced trees.
#'                              An output from \code{\link{VRIInit_auxTree}}.
#'
#' @param clusterPlotHeader data.table, Cluster and plot-level information. An output of \code{\link{VRIInit_clusterplot}}.
#'
#' @return A list of four tables: 1. fullenhancedtrees: full and enhanced trees;
#'         2. HnonenhancedTrees: Height enhanced and non-enhanced trees;
#'         3. regression table; 4. ratio table.
#'
#' @importFrom data.table ':='
#' @importFrom fpCompare '%<=%' '%==%' '%>=%' '%!=%' '%>>%' '%<<%'
#' @note The data selection procedure for regression has been standardized as following:
#' \enumerate{
#' \item Start from vi_c, which has all trees have minimum information of DBH and Height;
#' \item Select all the full, enhanced and H-enhanced trees;
#' \item Remove observations in Audit plots and have zero whole stem volume;
#' \item Select the latest observation for each tree by live_dead status. A tree's identity is considered same when it is from same proj_id, samp_no and plot.
#' }
#' The data selection for ratio has been standardized as following:
#' \enumerate{
#' \item Start from the that used for regression;
#' \item Select all the full and enhanced trees;
#' \item Select the trees with DBH >= 10cm
#' }
#'
#' @export
#' @docType methods
#' @rdname auxiTreeCompiler
#'
#' @author Yong Luo
auxiTreeCompiler <- function(fullMeasuredTrees, auxiTrees, clusterPlotHeader){
    ## prepare regression based on prj_grp, lv_d and sp0
    ## obtain regression data
    tree_vb <- mergeAllVolTrees(treeMS = data.table::copy(fullMeasuredTrees),
                               treeAX = data.table::copy(auxiTrees))
    rm(fullMeasuredTrees, auxiTrees)
    tree_vb <- merge_dupUpdate(tree_vb,
                               unique(clusterPlotHeader[,.(CLSTR_ID, TYPE_CD)], by = "CLSTR_ID"),
                               by = "CLSTR_ID", all.x = TRUE)
    nonvoltrees <- tree_vb[MEAS_INTENSE %in% c("H-ENHANCED", "NON-ENHANCED"),] ## tree with H-enhanced and non-enhanced
    voltrees <- tree_vb[MEAS_INTENSE %in% c("FULL", "ENHANCED"),] ## tree with all information
    fullMeasuredTrees <- tree_vb[MEAS_INTENSE %in% c("FULL", "ENHANCED", "H-ENHANCED"), ] ## for calibrating regression and ratio
    fullMeasuredTrees[, uniobsid := 1:nrow(fullMeasuredTrees)]
    rm(tree_vb)
    if(nrow(nonvoltrees) > 0){
      ## go to the non-enhanced trees routine to calculate volume components
      ## select clusters that are not audit
      selectedcluster <- unique(clusterPlotHeader[!grepl("A", TYPE_CD, 1, 1),
                                                  .(CLSTR_ID, PROJ_ID, MEAS_DT, SAMP_NO, TYPE_CD, SAMP_TYP)],
                                by = "CLSTR_ID")
      selectedcluster[, meas_length := length(CLSTR_ID), by = c("PROJ_ID", "SAMP_NO")]
      ## select the sample point that just measured once
      selectedcluster_output <- selectedcluster[meas_length == 1,]
      ## working on the sample point that measured more than once
      selectedcluster_check <- selectedcluster[meas_length > 1,]
      ## determine the sample type by using the first letter of type code
      selectedcluster_check[, TYPE_CD_1 := substr(TYPE_CD, 1, 1)]
      ## ordering measurement by measurement time for each sample point
      selectedcluster_check <- selectedcluster_check[order(PROJ_ID, SAMP_NO, MEAS_DT),]
      ## get the sample type combination for each sample point
      selectedcluster_check[, samptypecomb := paste(TYPE_CD_1, collapse = ""),
                            by = c("PROJ_ID", "SAMP_NO")]

      ## step 1, select sample point that has at one monitor sample
      selectedcluster_monitor <- selectedcluster_check[grepl("L", samptypecomb) |
                                                         grepl("M", samptypecomb) |
                                                         grepl("F", samptypecomb) |
                                                         grepl("Y", samptypecomb), ]

      ## remove these clusters from check table
      selectedcluster_check <- selectedcluster_check[!(CLSTR_ID %in% selectedcluster_monitor$CLSTR_ID),]
      ## select monitor plot over other sample type
      selectedcluster_monitor <- selectedcluster_monitor[TYPE_CD_1 %in% c("L", "M", "F", "Y"),]
      ## select the most recent one for each sample point
      selectedcluster_monitor[, mostrecent := max(MEAS_DT), by = c("PROJ_ID", "SAMP_NO")]
      selectedcluster_monitor <- selectedcluster_monitor[mostrecent == MEAS_DT,]
      ## join the selected cluster into selectedcluster_output
      selectedcluster_output <- rbindlist(list(selectedcluster_output[, meas_length := NULL],
                                               selectedcluster_monitor[,.(CLSTR_ID, PROJ_ID, MEAS_DT, SAMP_NO, TYPE_CD, SAMP_TYP)]))
      rm(selectedcluster_monitor)


      ## step 2, select sample points that have N sample
      selectedcluster_N <- selectedcluster_check[grepl("N", samptypecomb), ]
      ## remove these clusters from check table
      selectedcluster_check <- selectedcluster_check[!(CLSTR_ID %in% selectedcluster_N$CLSTR_ID),]
      ## select N sample over other temporary sample plot
      selectedcluster_N <- selectedcluster_N[TYPE_CD_1 == "N"]
      ## select the most recent N type sample for each sample point
      selectedcluster_N[, mostrecent := max(MEAS_DT), by = c("PROJ_ID", "SAMP_NO")]
      selectedcluster_N <- selectedcluster_N[mostrecent == MEAS_DT, ]
      ## join the selected cluster into selectedcluster_output
      selectedcluster_output <- rbindlist(list(selectedcluster_output,
                                               selectedcluster_N[,.(CLSTR_ID, PROJ_ID, MEAS_DT, SAMP_NO, TYPE_CD, SAMP_TYP)]))
      rm(selectedcluster_N)

      ## step 3, select sample point that had been measured in 5 cluster plot
      selectedcluster_check[TYPE_CD_1 %in% c("B", "D", "O", "Q", "T", "Z"),
                            cluster5 := "yes"]
      selectedcluster_check[is.na(cluster5), cluster5 := "no"]
      selectedcluster_check[, allcluster5 := length(unique(cluster5)),
                            by = c("PROJ_ID", "SAMP_NO")]
      ## select all sample point that has been measured in 5 cluster plot design
      selectedcluster_5cluster <- selectedcluster_check[allcluster5 == 1 & cluster5 == "yes",]
      ## remove these clusters from check
      selectedcluster_check <- selectedcluster_check[!(CLSTR_ID %in% selectedcluster_5cluster$CLSTR_ID),]
      ## select the most recent measurement
      selectedcluster_5cluster[, mostrecent := max(MEAS_DT), by = c("PROJ_ID", "SAMP_NO")]
      selectedcluster_5cluster <- selectedcluster_5cluster[mostrecent == MEAS_DT, ]
      ## join the selected cluster into selectedcluster_output
      selectedcluster_output <- rbindlist(list(selectedcluster_output,
                                               selectedcluster_5cluster[,.(CLSTR_ID, PROJ_ID, MEAS_DT, SAMP_NO, TYPE_CD, SAMP_TYP)]))
      rm(selectedcluster_5cluster)

      ## give a warning message if the check file still have some clusters that are not
      ## been processed in the above step procedures
      if(nrow(selectedcluster_check) > 0){
        warning("Some remeasured sample points may have unexpected sample type. Please check.")
      }
      selectedcluster_output[, meas_length := length(CLSTR_ID),
                             by = c("PROJ_ID", "SAMP_NO")]
      if(nrow(selectedcluster_output[meas_length > 1])){
        warning("Some sample points are remeasured using same sample type at same time.")
      }

      ## select the trees based on selected clusters
      regressionData <- fullMeasuredTrees[VOL_WSV %>=% 0 &
                                            CLSTR_ID %in% selectedcluster_output$CLSTR_ID,]
      # regressionData <- haven::read_sas("//Mayhem.dmz/gis_tib/VRI/RDW/RDW_Data2/Work_Areas/VRI_ASCII_PROD/vri_db/regression_input.sas7bdat") %>%
      #   data.table
      # names(regressionData) <- toupper(names(regressionData))
      regressionData[, ':='(L10WSV = log10(VOL_WSV),
                            L10BA = log10(BA_TREE),
                            PRJ_GRP = prj_ID2Grp(substr(CLSTR_ID, 1, 4)),
                            TYPE = speciesCode2speciesType(SP0),
                            SPECIES = substr(SPECIES, 1, 2))]
      regressionData[is.na(TYPE), ':='(SP0 = "X",
                                       TYPE = "D")]

      # the below codes are consistent with reg_wsv_2017.sas
      allcombs <- nonvoltrees[,.(PRJ_GRP = prj_ID2Grp(substr(CLSTR_ID, 1, 4)), SP0,
                                 LV_D, TYPE = speciesCode2speciesType(SPECIES))]
      allcombs[is.na(TYPE), ':='(SP0 = "X",
                                 TYPE = "D")]
      allcombs <- unique(allcombs,
                         by = c("PRJ_GRP", "SP0", "LV_D"))
      reg_all <- WSV_BARegression(masterTable = allcombs,
                                  regressionData = data.table::copy(regressionData))

      ## prepare toWSV ratio
      ## ratios can only be calcualted for live and full measured/enhanced trees
      ratiodata <- regressionData[MEAS_INTENSE %in% c("FULL", "ENHANCED"),]

      ## call calc_ratio_2017.sas
      # ratiodata <- haven::read_sas("//Mayhem.dmz/gis_tib/VRI/RDW/RDW_Data2/Work_Areas/VRI_ASCII_PROD/vri_db/ratio_input.sas7bdat") %>%
      # data.table
      # names(ratiodata) <- toupper(names(ratiodata))

      ratios_by_group <- toWSVRatio(masterTable = allcombs,
                                    ratioData = ratiodata)
      rm(regressionData, ratiodata, fullMeasuredTrees)
      nonvoltrees <- treeVolEst_RegRatioMethod(nonVolTrees = nonvoltrees,
                                               regressionTable = reg_all,
                                               ratioTable = ratios_by_group)
      return(list(fullenhancedtrees = voltrees,
                  HnonenhancedTrees = nonvoltrees,
                  regressionTable = reg_all,
                  ratioTable = ratios_by_group))
    } else {
      return(list(fullenhancedtrees = voltrees,
                  HnonenhancedTrees = NA,
                  regressionTable = NA,
                  ratioTable = NA))
    }
  }