#' @title Analyzes mortality through interquartile range
#' @description TODO: this
#' @export
AnalyzeMortByIQR <- function(data_df = NULL,
by = 'section', type = 'background',
arcsqrt = F, agg = F) {
# mort_types:
# mort_rate, Harvest_rate_wrt_ntree, Fire_rate_wrt_ntree,
# Insect_rate_wrt_ntree, Disease_rate_wrt_ntree,
# non_harv_mort_rate, non_harv_fire_mort_rate, non_harv_fire_beet_mort_rate
stop('This function probably should not run lm on aggregated data.
Instead, fix it so if agg = F the lm is run on the full data.
Its not clear anymore what the h in the h loop actually does...')
# Setup:
# type = background, outlier
suppressWarnings(par_ret <- par())
on.exit(suppressWarnings(par(par_ret)))
if (is.null(data_df)) {
data_df <- FIA_mortality_with_explanatory
}
pos_types <- c('mort_rate', 'Harvest_rate_wrt_ntree', 'Fire_rate_wrt_ntree',
'Insect_rate_wrt_ntree', 'Disease_rate_wrt_ntree',
'non_harv_mort_rate', 'non_harv_fire_mort_rate',
'non_harv_fire_beet_mort_rate')
expl_vars <- c('MAP_10', 'MAT_10', 'SNDPPT', 'CLYPPT', 'OCSTHA',
'PHIHOX', 'AWCtS', 'BDTICM', 'BLDFIE')
out_signif <- data.frame(matrix(nrow = 0, ncol = 5))
colnames(out_signif) <- c(
'mort_type', 'variable', 'p_value', 'r2', 'normality_check')
hcnt <- 0
message('Looping over variables...')
for (h in pos_types) {
mort_df <- data_df
hcnt <- hcnt + 1
mort_type <- h
b <- which(c('forest_type', 'section', 'province') == by)
var <- switch(b, 'forest_type', 'Cleland_section', 'Cleland_province')
if (h == 'Fire_rate_wrt_ntree' && var == 'Cleland_section') browser()
cat('\nMortality type:', h, '\nVar type:', var)
IQR_groups <- CalcCategoricalMortByIQR(
x = mort_df$mort_rate, group = mort_df[[var]]
)[, 4]
mort_IQR <- character(length(IQR_groups))
mort_IQR[IQR_groups] <- 'outlier'
mort_IQR[!IQR_groups] <- 'background'
mort_df <- data.frame(mort_df, mort_IQR, stringsAsFactors = F)
mort_df <- mort_df[which(mort_df$mort_IQR == type), ]
if (arcsqrt) {
mort_df[, pos_types] <- asin(sqrt(mort_df[, pos_types]))
}
# Aggregating:
if (agg) {
mean2 <- function(x) mean(x, na.rm = T)
ag0 <- aggregate(mort_df[[mort_type]], by = list(mort_df$Cleland_section), FUN = mean2)
colnames(ag0) <- c('section', mort_type)
for (i in expl_vars) {
ag1 <- aggregate(mort_df[[i]], by = list(mort_df$Cleland_section), FUN = mean2)
colnames(ag1) <- c('section', i)
ag0 <- dplyr::left_join(ag0, ag1, by = 'section')
}
} else {
ag0_cols <- c('section', mort_type, expl_vars)
ag0 <- mort_df[, ag0_cols]
}
out <- data.frame(matrix(nrow = 4, ncol = ncol(ag0) - 2))
colnames(out) <- colnames(ag0)[3:ncol(ag0)]
row.names(out) <- c('p_value', 'F_value', 'r2', 'normality_check')
for (i in 3:ncol(ag0)) {
lmi <- lm(ag0[, 2] ~ ag0[, i])
lmi_sum <- summary(lmi)
if (agg) {
stpval <- ifelse(shapiro.test(lmi$residuals)$p.value > 0.05, T, F)
} else {
par(mfrow = c(2, 2))
plot(lmi$residuals)
stpval <- readline(prompt = 'Normal? T/F')
}
out[1, i - 2] <- lmi_sum$coefficients[2 , 4]
out[2, i - 2] <- lmi_sum$fstatistic[1]
out[3, i - 2] <- lmi_sum$r.squared
out[4, i - 2] <- stpval
}
#colnames(out_signif) <- c('variable', 'p_value', 'r2')
sig <- which(out[1, ] < 0.05)
nbef <- nrow(out_signif)
for (i in sig) {
nbef <- nbef + 1
out_signif <- rbind(out_signif, c(rep(NA, 5)))
colnames(out_signif) <- c(
'mort_type', 'variable', 'p_value', 'r2', 'normality_check')
out_signif[nbef, 1] <- h
out_signif[nbef, 2] <- colnames(out)[i]
out_signif[nbef, 3] <- out[1, i]
out_signif[nbef, 4] <- out[3, i]
out_signif[nbef, 5] <- out[4, i]
}
}
cat('\n')
return(out_signif)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.