R/regRatioDataSelect.R

Defines functions regRatioDataSelect

Documented in regRatioDataSelect

#' Select the data for regression and ratio
#'
#' @description This function selects the data for fitting regression model between basal area and
#'              whole stem volume, and for ratios to whole stem volume in VRI compiler. The regreesion
#'              method is used for derive whole stem volume for the trees that just have DBH information.
#'              The ratio method is used to derive netted merchantable volume for the trees that
#'              do not have call grading information. The data selection should be done annually at the end of
#'              every March to allow newer and higher quaulity data enter the data.
#'
#' @param sampledata data.table, The data contains the sample level information.
#' @param alltreedata data.table, The is the data pool which contains the full/enhanced trees
#'                    and H-enhanced trees.
#'
#'
#' @return Selected data for regression and ratio.
#'
#'
#' @importFrom data.table ':=' setnames
#'
#'
#' @export
#' @docType methods
#' @rdname regRatioDataSelect
#'
#' @author Yong Luo
#'
regRatioDataSelect <- function(sampledata, alltreedata){
  sampledata[, SAMP_POINT := substr(CLSTR_ID, 1, 9)]
  selectedsamples <- sampledata[0, ]
  ## remove the delinearity samples also duplicated
  sampledata <- sampledata[!(PROJ_ID %in% c("037A", "DMHA", "DDCX", "DDCA", "DDCY",
                                            "DDCB", "DDCZ", "029A", "DQUE")),]
  ## remove audit samples
  sampledata <- sampledata[!(substr(CLSTR_ID, 11, 11) == "A" |
                               substr(CLSTR_ID, 12, 12) == "A" |
                               substr(CLSTR_ID, 13, 13) == "A"),]

  ## deal with sample points that have NVAF samples
  ##
  nvafsamppoints <- unique(sampledata[substr(CLSTR_ID, 11, 11) == "N"]$SAMP_POINT)

  nvafselected <- unique(sampledata[SAMP_POINT %in% nvafsamppoints &
                               substr(CLSTR_ID, 11, 11) == "N",],
                         by = c("CLSTR_ID", "PLOT"))
  nvafselected[, LASTTIME := max(MEAS_DT), by = "SAMP_POINT"]
  nvafselected <- nvafselected[MEAS_DT == LASTTIME & PLOT != "I",]
  nvafselected[, LASTTIME := NULL]

  selectedsamples <- rbind(selectedsamples, nvafselected)
  rm(nvafselected)

  nvafselected_forIPC <- unique(sampledata[SAMP_POINT %in% nvafsamppoints &
                                             substr(CLSTR_ID, 11, 11) != "N" &
                                             PLOT == "I",],
                                by = c("CLSTR_ID"))

  nvafselected_forIPC_Fix <- nvafselected_forIPC[SAMP_TYP == "F",]
  nvafselected_forIPC_Fix[, LASTTIME := max(MEAS_DT),
                          by = "SAMP_POINT"]
  nvafselected_forIPC_Fix <- nvafselected_forIPC_Fix[MEAS_DT == LASTTIME,]
  nvafselected_forIPC_Fix[, LASTTIME := NULL]
  selectedsamples <- rbind(selectedsamples, nvafselected_forIPC_Fix)


  nvafselected_forIPC_Var <- nvafselected_forIPC[!(SAMP_POINT %in%
                                                   unique(nvafselected_forIPC_Fix$SAMP_POINT)),]
  rm(nvafselected_forIPC_Fix)

  nvafselected_forIPC_Var[, LASTTIME := max(MEAS_DT),
                          by = "SAMP_POINT"]
  nvafselected_forIPC_Var <- nvafselected_forIPC_Var[MEAS_DT == LASTTIME,]
  nvafselected_forIPC_Var[, LASTTIME := NULL]
  selectedsamples <- rbind(selectedsamples, nvafselected_forIPC_Var)
  ## check sample point 0031-0005
  testdata <- selectedsamples[SAMP_POINT == "0031-0005",.(CLSTR_ID, PLOT)]
  if(!identical(testdata[order(CLSTR_ID, PLOT),],
                data.table(CLSTR_ID = c("0031-0005-NO1", "0031-0005-NO1", "0031-0005-NO1",
                                        "0031-0005-NO1", "0031-0005-QO1"),
                           PLOT = c("E", "N", "S", "W", "I")))){
    stop("Sample point with NVAF data is not correctly selected.")
  }
  rm(testdata)


  ## select the sample points that just have one sample
  sampledata <- sampledata[!(SAMP_POINT %in% nvafsamppoints), ]
  sampledata[, sample_length := length(unique(CLSTR_ID)), by = "SAMP_POINT"]

  sampledata_selected <- sampledata[sample_length == 1,]
  sampledata_selected[, sample_length := NULL]

  selectedsamples <- rbind(selectedsamples, sampledata_selected)
  rm(sampledata_selected)
  sampledata <- sampledata[sample_length != 1,]


  ## for sample point have the fixed area plot
  sampledata_fix <- sampledata[SAMP_TYP == "F",]
  sampledata_fix[, LASTTIME := max(MEAS_DT), by = "SAMP_POINT"]
  sampledata_fix <- sampledata_fix[MEAS_DT == LASTTIME,]
  sampledata_fix <- sampledata_fix[CLSTR_ID != "4742-0104-FO1",] ## need to remove when figure out what is
                                                                 ## going on
  sampledata_fix[, clster_length := length(unique(CLSTR_ID)), by = "SAMP_POINT"]
  if(nrow(sampledata_fix[clster_length > 1]) > 0){
    print(unique(sampledata_fix[clster_length > 1,.(CLSTR_ID, SAMP_TYP, MEAS_DT, SAMP_POINT)],
                 by = "CLSTR_ID"))
    stop("Multiple monitoring samples were found for one sample point at same time.")
  }
  sampledata_fix[, ':='(LASTTIME = NULL,
                        clster_length = NULL,
                        sample_length = NULL)]
  selectedsamples <- rbind(selectedsamples, sampledata_fix)
  sampledata <- sampledata[!(SAMP_POINT %in% unique(sampledata_fix$SAMP_POINT)), ]

  ## last selection for the sample point just have multiple variable plot samples
  sampledata[, LASTTIME := max(MEAS_DT), by = "SAMP_POINT"]
  sampledata <- sampledata[MEAS_DT == LASTTIME,]
  sampledata[, clster_length := length(unique(CLSTR_ID)), by = "SAMP_POINT"]
  if(nrow(sampledata[clster_length > 1]) > 0){
    print(unique(sampledata_fix[clster_length > 1,.(CLSTR_ID, SAMP_TYP, MEAS_DT, SAMP_POINT)],
                 by = "CLSTR_ID"))
    stop("Multiple monitoring samples were found for one sample point at same time.")
  }
  sampledata[, ':='(LASTTIME = NULL,
                        clster_length = NULL,
                        sample_length = NULL)]
  selectedsamples <- rbind(selectedsamples, sampledata)
  selectedsamples[, uniplot := paste0(CLSTR_ID, "-", PLOT)]

  alltreedata[, uniplot := paste0(CLSTR_ID, "-", PLOT)]
  treedata_selected <- alltreedata[uniplot %in% selectedsamples$uniplot &
                                     MEAS_INTENSE %in% c("FULL", "ENHANCED", "H-ENHANCED"),]
  return(treedata_selected)
}
bcgov/BCForestGroundSample documentation built on May 25, 2019, 3:21 p.m.