R/bayes_anova_tables.R

#' Analyze variance components of two factor regression model
#'
#' @export
#' @param formula the model formula used to fit the analysis
#' @param data a data frame
#' @param chains model.fit the MCMC samples of the model
#' @param plot defaults to TRUE
#' @param console should the results be printed to console? Defaults to TRUE.
#' @param cred.level Width of the highest density intervals.
#' @param interval.color what color to use in the plot
#' @return
#'
#'
bayes.twoway.ANOVA = function (formula, data, model.fit, plot = TRUE, console=TRUE, cred.level=.95, interval.color = "steelblue") {

  model = as.matrix(model.fit)
  y = which(colnames(data) == as.character(formula)[[2]])
  y = data[, y]
  Xmat = model.matrix(formula, data)
  wch = attr(Xmat, 'assign')
  model = as.matrix(extract_post(model.fit, droppars = c("sd", "sigma", "nu", "log_lik", "lp__")))
  coefs = as.mcmc(model)
  fit = as.data.frame(as.matrix(coefs) %*% as.matrix(t(Xmat)))
  wch = attr(Xmat, 'assign')
  wch.main = wch[c(-which(wch==3))]
  wch.int = wch[which(wch==3)]
  sd.main =  as.data.frame(lapply(unique(wch.main)[-1], function(x) {apply(cbind(0,coefs[,which(wch.main %in% c(x))]),1,sd)}))
  sd.int = apply(cbind(0,coefs[,which(wch %in% c(unique(wch)[-1]))]),1,sd)
  resid = sweep(fit,2,as.vector(as.matrix(y)),'-')
  sd.resid = apply(resid,1,sd)
  sd.all = cbind.data.frame(sd.main, sd.int, sd.resid)
  Col.names = c(attr(terms(formula), "term.labels"), "residuals")
  colnames(sd.all) <- Col.names

  sd.all = 100*(sd.all/rowSums(sd.all))
  fpsd.p = post_summary(sd.all, estimate.method = "mode")
  fpsd.p = fpsd.p %>% mutate(term = factor(term, levels=unique(.$term)))

  if (isTRUE(plot)) {
    plot <- ggplot(fpsd.p, aes(y=estimate, x=forcats::fct_rev(term))) +
      geom_hline(yintercept=0, linetype='dashed') +
      geom_pointrange(colour = interval.color, size=.6, aes(ymin=hdi.low, ymax=hdi.high)) +
      geom_text(aes(label=sprintf('%.2f%%', fpsd.p$estimate), vjust=-1)) +
      scale_y_continuous(paste0("% ",'of Finite Population SD Explained')) +
      scale_x_discrete('factor')+
      coord_flip() +
      ggplot2::geom_errorbar(width = 0.08, size=1, colour = interval.color, aes(ymin=hdi.low, ymax=hdi.high)) +
      ggtitle("Bayesian Two-Way ANOVA Table")
    plot(plot)

  }

  if (isTRUE(console)) {
    for (i in 1:length(fpsd.p$term)) {
      cat(cyan(bold(paste("Approximately", round(fpsd.p[i,2], 1), "%",  "of the variance is explained by",
                          gsub("sd.","", as.character(as.matrix(unique(fpsd.p[,1])[i,]))), ",",
                          paste0(cred.level*100, "% HDI", " ", "[", round(fpsd.p[i,4], 1),"%", ","
                                 ,round(fpsd.p[i,5], 1), "%", "]"),"\n"))))

  }

  } else {return(fpsd.p)}

}

#' Analyze variance components of one factor regression model
#'
#' @export
#' @param formula the model formula used to fit the analysis
#' @param data a data frame
#' @param chains model.fit the MCMC samples of the model
#' @param plot defaults to TRUE
#' @param console should the results be printed to console? Defaults to TRUE.
#' @param cred.level Width of the highest density intervals.
#' @param interval.color what color to use in the plot
#' @return
#'
#'
bayes.oneway.ANOVA = function (formula, data, model.fit, plot = TRUE, cred.level=.95, console=TRUE, interval.color = "steelblue") {

  model = as.matrix(extract_post(model.fit, droppars = c("sd", "sigma", "nu", "log_lik", "lp__")))
  y = which(colnames(data) == as.character(formula)[[2]])
  y = data[, y]
  wch = as.numeric((as.factor(colnames(model))))[-1]
  newdata = data
  Xmat = as.matrix(model.matrix(formula, newdata))
  coefs = model[,wch]
  sd.x = apply(model[,wch], 1, sd)
  fit = coefs %*% t(Xmat[,-1])
  resid = sweep(fit, 2, y, "-")
  sd.resid = apply(resid, 1, sd)
  sd.all = cbind(sd.x, sd.resid)
  sd.all = 100*sd.all/rowSums(sd.all)
  Col.names = c(as.character(formula)[[3]], "residual")
  colnames(sd.all) <- Col.names
  fpsd.p = post_summary(sd.all, estimate.method = "mode", cred.level= cred.level)
  fpsd.p = fpsd.p %>% mutate(term = factor(term, levels=unique(.$term)))

  if (isTRUE(plot)) {
    plot <- ggplot(fpsd.p, aes(y=estimate, x= forcats::fct_rev(term))) +
      geom_hline(yintercept=0, linetype='dashed') +
      geom_pointrange(colour = interval.color, size=.6, aes(ymin=hdi.low, ymax=hdi.high)) +
      geom_text(aes(label=sprintf('%.2f%%', fpsd.p$estimate), vjust=-1)) +
      scale_y_continuous(paste0("% ",'of Finite Population SD Explained')) +
      scale_x_discrete('factor')+
      coord_flip() +
      ggplot2::geom_errorbar(width = 0.08, size=1, colour = interval.color, aes(ymin=hdi.low, ymax=hdi.high)) +
      ggtitle("Bayesian One-Way ANOVA Table")
    plot(plot)
  }


  if (isTRUE(console)) {
  for (i in 1:length(fpsd.p$term)) {
    cat(cyan(bold(paste("Approximately", round(fpsd.p[i,2], 1), "%",  "of the variance is explained by", gsub("sd.","", as.character(as.matrix(unique(fpsd.p[,1])[i,]))), ",", paste0(cred.level*100, "% HDI", " ", "[", round(fpsd.p[i,4], 1),"%", "," ,round(fpsd.p[i,5], 1), "%", "]"),"\n"))))
  }
  } else {return(fpsd.p)}
}
abnormally-distributed/abdisttools documentation built on May 5, 2019, 7:07 a.m.