R/AnalyzeMortByIQR.R

#' @title Analyzes mortality through interquartile range
#' @description TODO: this
#' @export
AnalyzeMortByIQR <- function(data_df = NULL,
                             by = 'section', type = 'background',
                             arcsqrt = F, agg = F) {
  # mort_types:
  #   mort_rate, Harvest_rate_wrt_ntree, Fire_rate_wrt_ntree,
  #   Insect_rate_wrt_ntree, Disease_rate_wrt_ntree,
  #   non_harv_mort_rate, non_harv_fire_mort_rate, non_harv_fire_beet_mort_rate
  stop('This function probably should not run lm on aggregated data.
        Instead, fix it so if agg = F the lm is run on the full data.
        Its not clear anymore what the h in the h loop actually does...')
  # Setup:
  # type = background, outlier
  suppressWarnings(par_ret <- par())
  on.exit(suppressWarnings(par(par_ret)))
  if (is.null(data_df)) {
    data_df <- FIA_mortality_with_explanatory
  }
  pos_types <- c('mort_rate', 'Harvest_rate_wrt_ntree', 'Fire_rate_wrt_ntree',
                 'Insect_rate_wrt_ntree', 'Disease_rate_wrt_ntree',
                 'non_harv_mort_rate', 'non_harv_fire_mort_rate',
                 'non_harv_fire_beet_mort_rate')
  expl_vars <- c('MAP_10', 'MAT_10', 'SNDPPT', 'CLYPPT', 'OCSTHA',
                 'PHIHOX', 'AWCtS', 'BDTICM', 'BLDFIE')

  out_signif <- data.frame(matrix(nrow = 0, ncol = 5))
  colnames(out_signif) <- c(
    'mort_type', 'variable', 'p_value', 'r2', 'normality_check')
  hcnt <- 0
  message('Looping over variables...')
  for (h in pos_types) {
    mort_df <- data_df
    hcnt <- hcnt + 1
    mort_type <- h
    b <- which(c('forest_type', 'section', 'province') == by)
    var <- switch(b, 'forest_type', 'Cleland_section', 'Cleland_province')
    if (h == 'Fire_rate_wrt_ntree' && var == 'Cleland_section') browser()
    cat('\nMortality type:', h, '\nVar type:', var)
    IQR_groups <- CalcCategoricalMortByIQR(
      x = mort_df$mort_rate, group = mort_df[[var]]
    )[, 4]
    mort_IQR <- character(length(IQR_groups))
    mort_IQR[IQR_groups] <- 'outlier'
    mort_IQR[!IQR_groups] <- 'background'
    mort_df <- data.frame(mort_df, mort_IQR, stringsAsFactors = F)
    mort_df <- mort_df[which(mort_df$mort_IQR == type), ]
    if (arcsqrt) {
      mort_df[, pos_types] <- asin(sqrt(mort_df[, pos_types]))
    }

    # Aggregating:
    if (agg) {
      mean2 <- function(x) mean(x, na.rm = T)
      ag0 <- aggregate(mort_df[[mort_type]], by = list(mort_df$Cleland_section), FUN = mean2)
      colnames(ag0) <- c('section', mort_type)
      for (i in expl_vars) {
        ag1 <- aggregate(mort_df[[i]], by = list(mort_df$Cleland_section), FUN = mean2)
        colnames(ag1) <- c('section', i)
        ag0 <- dplyr::left_join(ag0, ag1, by = 'section')
      }
    } else {
      ag0_cols <- c('section', mort_type, expl_vars)
      ag0 <- mort_df[, ag0_cols]
    }

    out <- data.frame(matrix(nrow = 4, ncol = ncol(ag0) - 2))
    colnames(out) <- colnames(ag0)[3:ncol(ag0)]
    row.names(out) <- c('p_value', 'F_value', 'r2', 'normality_check')
    for (i in 3:ncol(ag0)) {
      lmi <- lm(ag0[, 2] ~ ag0[, i])
      lmi_sum <- summary(lmi)
      if (agg) {
        stpval <- ifelse(shapiro.test(lmi$residuals)$p.value > 0.05, T, F)
      } else {
        par(mfrow = c(2, 2))
        plot(lmi$residuals)
        stpval <- readline(prompt = 'Normal? T/F')
      }
      out[1, i - 2] <- lmi_sum$coefficients[2 , 4]
      out[2, i - 2] <- lmi_sum$fstatistic[1]
      out[3, i - 2] <- lmi_sum$r.squared
      out[4, i - 2] <- stpval
    }
    #colnames(out_signif) <- c('variable', 'p_value', 'r2')
    sig <- which(out[1, ] < 0.05)
    nbef <- nrow(out_signif)
    for (i in sig) {
      nbef <- nbef + 1
      out_signif <- rbind(out_signif, c(rep(NA, 5)))
      colnames(out_signif) <- c(
        'mort_type', 'variable', 'p_value', 'r2', 'normality_check')
      out_signif[nbef, 1] <- h
      out_signif[nbef, 2] <- colnames(out)[i]
      out_signif[nbef, 3] <- out[1, i]
      out_signif[nbef, 4] <- out[3, i]
      out_signif[nbef, 5] <- out[4, i]
    }
  }
  cat('\n')

  return(out_signif)
}
bmcnellis/RSFIA documentation built on June 1, 2019, 7:40 a.m.