R/cate_nelson_1965.R
In soiltestcorr: Soil Test Correlation and Calibration

Documented in boot_cn_1965 cate_nelson_1965

#' @name cate_nelson_1965
#' @title Cate & Nelson quadrants analysis (graphical)
#' @description This function runs the quadrants analysis suggested by Cate and Nelson (1965)
#' @param data argument to call a data.frame or data.table containing the data
#' @param stv argument to call the vector or column containing the soil test value (stv) data
#' @param ry argument to call the vector or column containing the relative yield (ry) data
#' @param target argument to specify the ry target (numeric) to estimate the critical stv for
#' @param tidy logical operator (TRUE/FALSE) to decide the type of return. TRUE returns a tibble, FALSE returns a list. Default: TRUE.
#' @param plot logical operator (TRUE/FALSE) to decide the type of return. TRUE returns a ggplot,
#' FALSE returns either a list (tidy == FALSE) or a tibble (tidy == TRUE).
#' @param n sample size for the bootstrapping Default: 500
#' @param ... when running bootstrapped samples, the `...` (open arguments) allows to add grouping variable/s (factor or character) Default: NULL
#' @rdname cate_nelson_1965
#' @return returns an object of type `ggplot` if plot = TRUE.
#' @return returns an object of class `data.frame` if tidy = TRUE, 
#' @return returns an object of class `list` if tidy = FALSE.
#' @details
#' See [online-documentation](https://adriancorrendo.github.io/soiltestcorr/articles/cate_nelson_1965_tutorial.html) for additional details. 
#' @references
#' Cate & Nelson (1965). 
#' A rapid method for correlation of soil test analysis with plant response data. 
#' _North Carolina Agric. Exp. Stn., International soil Testing Series l. No. 1._
#' @examples 
#' \donttest{
#'  # Example 1 dataset
#'  dat <- data.frame("ry" = c(65,80,85,88,90,94,93,96,97,95,98,100,99,99,100),
#'                    "stv" = c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15))
#'  # Run
#'  fit_example_cn_1965 <- cate_nelson_1965(data = dat, 
#'  ry = ry, stv = stv, target = 90, tidy=FALSE, plot=FALSE)
#'  
#'  fit_example_cn_1965
#' }
#' @seealso 
#'  \code{\link[rlang]{eval_tidy}},\code{\link[rlang]{defusing-advanced}}
#'  \code{\link[stats]{lm}},\code{\link[stats]{anova}}
#'  \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{geom_point}},\code{\link[ggplot2]{labs}},\code{\link[ggplot2]{geom_abline}},\code{\link[ggplot2]{annotate}},\code{\link[ggplot2]{theme}}
#' @note
#' This code was adapted from 
#' Mangiafico, S. S. (2013). Cate-Nelson Analysis for Bivariate Data Using R-project.
#' _The Journal of Extension, 51(5), Article 33._ <https://tigerprints.clemson.edu/joe/vol51/iss5/33/> 
#' @export 
#' @importFrom rlang eval_tidy quo enquo
#' @importFrom dplyr %>% select group_by mutate slice_sample ungroup as_tibble
#' @importFrom stats lm anova AIC BIC
#' @importFrom modelr rmse
#' @importFrom ggplot2 ggplot aes geom_point scale_shape_manual scale_color_manual labs geom_vline geom_hline annotate theme_bw theme
#' @importFrom tidyr nest unnest expand_grid
#' @importFrom purrr map possibly
#' 
cate_nelson_1965 <- function(data=NULL, stv, ry, target, tidy = TRUE, plot = FALSE){
  
  if (missing(stv)) {
    stop("Please specify the variable name for soil test values using the `stv` argument")
  }
  
  if (missing(ry)) {
    stop("Please specify the variable name for relative yield using the `ry` argument")
  }
  
  if (missing(target)) {
    stop("Please specify the relative yield target to estimate the critical soil test value using the
         the `target` argument (e.g. target = 90)")
  }

  x <- rlang::eval_tidy(data = data, rlang::quo({{stv}}) )
  
  y <- rlang::eval_tidy(data = data, rlang::quo({{ry}}) )
  
  n <- length(x) 
  
  ##-----order by x and create critical.x variable for calculation---
  xgroup <- c('a','b')
  ygroup <- c('c','d')
  for (i in c(2:n))
  { xgroup[i] <-  c('b')
  ygroup[i] <-  c('d') }
  dataset <- data.frame(x = x, y = y, xgroup = as.factor(xgroup),
                        ygroup = as.factor(ygroup))
  dataset$ObsNo <- 1:n
  dataset <- dataset[with(dataset, order(x, y)), ]
  
  ## Define the Target of Y variable
  
  for (i in c(1:n))
  {dataset$ygroup[i] <- if(dataset$y[i] < target)  'c' else 'd'}
  
  ##-order by y, add target variable, and determine final critical-y-
  dataset <- dataset[with(dataset, order(y, x)), ]
  dataset$critical.x[1] <- 0
  for(k in c(2:n))
  { dataset$critical.x[k] <- (dataset$x[k]+dataset$x[k-1])/2 }
  dataset$critical.x[1] <- min(dataset$critical.x[2:n])
  for(j in c(1:n))
  { for (i in c(1:n))
  { (dataset$xgroup[i]
     <- if(dataset$x[i] < dataset$critical.x[j]) 'a' else 'b')}
    for (i in c(1:n))
    { dataset$q.i[i] <- with(dataset, ifelse
                             (dataset$xgroup[i]=='a' & dataset$ygroup[i]=='d', 1, 0))
    dataset$q.ii[i] <- with(dataset, ifelse
                            (dataset$xgroup[i]=='b' & dataset$ygroup[i]=='d', 1, 0))
    dataset$q.iii[i] <- with(dataset, ifelse
                             (dataset$xgroup[i]=='b' & dataset$ygroup[i]=='c', 1, 0))
    dataset$q.iv[i] <- with(dataset, ifelse
                            (dataset$xgroup[i]=='a' & dataset$ygroup[i]=='c', 1, 0)) }
    dataset$q.err[j] <- (  sum(dataset$q.i) + sum(dataset$q.iii)) }
  
  min.qerr <- min(dataset$q.err)
  dataset3 <- subset(dataset, dataset$q.err == min.qerr)
  CSTV <- dataset3$critical.x[1]
  dplyr::`%>%`
  
  # Rewrite xgroups
  dataset <- dataset %>% dplyr::mutate(xgroup = dplyr::case_when(x < CSTV ~ "a",
                                                          x >= CSTV ~ "b")) %>% 
    dplyr::mutate(
      q = dplyr::case_when((dataset$x<CSTV)&(dataset$y>=target) ~ "I",
                    (dataset$x>=CSTV)&(dataset$y>=target) ~ "II",
                    (dataset$x>=CSTV)&(dataset$y<target) ~ "III",
                    (dataset$x<CSTV)&(dataset$y<target)~ "IV"),
      Quadrant = dplyr::case_when((dataset$x<CSTV)&(dataset$y>=target) ~ "negative",
                           (dataset$x>=CSTV)&(dataset$y>=target) ~ "positive",
                           (dataset$x>=CSTV)&(dataset$y<target) ~ "negative",
                           (dataset$x<CSTV)&(dataset$y<target)~ "positive") )
  
  # Counts by quadrant
  quadrants.summary <- dataset %>% 
    dplyr::summarise(q.I = sum(q=="I"),
                     q.II = sum(q=="II"),
                     q.III = sum(q=="III"),
                     q.IV = sum(q=="IV"),
                     positive = sum(dataset$Quadrant == "positive"),
                     negative = sum(dataset$Quadrant == "negative"))
  
  ## ---------- chi-square -----------
  
  q.I <- quadrants.summary$q.I[[1]]
  q.II <- quadrants.summary$q.II[[1]]
  q.III <- quadrants.summary$q.III[[1]]
  q.IV <- quadrants.summary$q.IV[[1]]
  row.1 <- c(q.I, q.II)
  row.2 <- c(q.IV, q.III)
  X2.test <- stats::chisq.test(data.frame(row.1,row.2))
  
  ## Performance of the model, coefficient of determination (R2) ----------------
  aov.model <- stats::lm(y ~ xgroup, data=dataset)
  anova.model <- stats::anova(aov.model)
  R2.model <- anova.model[1,2]/sum(anova.model[,2])
  AIC <- stats::AIC(aov.model)
  BIC <- stats::BIC(aov.model)
  RMSE <- modelr::rmse(model = aov.model, data = dataset)
  
  ## --------- final plot --------------
  
  max.x <- max(dataset$x)
  max.y <- max(dataset$y)
  min.x <- min(dataset$x)
  min.y <- min(dataset$y)
  Quadrant_ <- dataset$Quadrant
  
  # ggplot %>% 
  cn65.ggplot <- 
    ggplot2::ggplot(data = dataset, ggplot2::aes(x=x, y=y))+
    ggplot2::geom_rug(alpha = 0.2, length = ggplot2::unit(2, "pt")) +
    ggplot2::geom_point(aes(color = Quadrant_, shape = Quadrant_), alpha = 0.75)+
    ggplot2::scale_shape_manual(name = "", values = c(4,16))+
    ggplot2::scale_color_manual(name = "", values = c("#800f2f","#355070"))+
    ggplot2::geom_vline(xintercept = CSTV, col = "dark red", linetype = "dashed")+
    ggplot2::geom_hline(yintercept = target, col = "dark red", linetype = "dashed")+
    # CSTV
    ggplot2::annotate(geom = "text", label = paste("CSTV =", CSTV, "ppm"),
                      x = CSTV+1, y = 0, angle = 90, hjust = 0, vjust = 1, col = "grey25") +
    # RY target
    ggplot2::annotate(geom = "text", label = paste0("RY = ", round(target, 0), "%"),
                      x = max(dataset$x), y = target-2, angle = 0, hjust = 1, vjust = 1, 
                      col = "grey25")+
    # Quadrants
    ggplot2::annotate(geom = "label", x = min.x+(max.x-min.x)*0.01, y =  min.y+(max.y-min.y)*0.99, 
                      label = "I", size = 3, color = "#800f2f")+
    ggplot2::annotate(geom = "label", x = min.x+(max.x-min.x)*0.99, y =  min.y+(max.y-min.y)*0.99, 
                      label = "II", size = 3, color = "#355070")+
    ggplot2::annotate(geom = "label", x = min.x+(max.x-min.x)*0.99, y =  min.y+(max.y-min.y)*0.01, 
                      label = "III", size = 3, color = "#800f2f")+
    ggplot2::annotate(geom = "label", x = min.x+(max.x-min.x)*0.01, y =  min.y+(max.y-min.y)*0.01, 
                      label = "IV", size = 3, color = "#355070")+
    # Giving more flexibility to x scale
    ggplot2::scale_x_continuous(
      breaks = seq(0, max.x,
                   by = ifelse(max.x >= 300, 50,
                         ifelse(max.x >= 200, 20,
                          ifelse(max.x >= 100, 10, 
                           ifelse(max.x >= 50, 5,
                            ifelse(max.x >= 20, 2,
                             ifelse(max.x >= 10, 1,
                              ifelse(max.x >= 5, 0.5,
                               ifelse(max.x >= 1, 0.2, 
                                      0.1))))))))) ) +
    # Y-axis scale
    ggplot2::scale_y_continuous(limits = c(0, max.y),
                                breaks = seq(0, max.y * 2, 10)) +
    ggplot2::labs(x="Soil test value", y="Relative yield (%)",
                  title = "Cate & Nelson (1965)")+
    ggplot2::theme_bw()+
    ggplot2::theme(legend.position="none",
                   panel.grid = ggplot2::element_blank(),
                   axis.title = ggplot2::element_text(size = ggplot2::rel(1.5)))
## Outputs
  results <- list("n" = n, 
              "CRYV" = target,
              "CSTV" = CSTV,
              "R2" = R2.model,
              "AIC" = AIC,
              "BIC" = BIC,
              "RMSE" = RMSE,
              "quadrants" = quadrants.summary,
              "X2" = X2.test,
              "anova" = anova.model) 
  
  if (tidy == TRUE) {
    results <- dplyr::as_tibble(results[c(1:8)])
  } else {
    results <- results
  }

  if (plot == TRUE){
    return(cn65.ggplot)
  } else {
    return(results)
  }
}

#' @rdname cate_nelson_1965
#' @return boot_cn_1965: bootstrapping function
#' @export 
boot_cn_1965 <- 
  function(data, ry, stv, target = 90, n=5, ...) {
    # Allow customized column names
    x <- rlang::enquo(stv)
    y <- rlang::enquo(ry)
    # Empty global variables
    boot_id <- NULL
    boots <- NULL
    model <- NULL
    
    data %>%  
      dplyr::select(!!y, !!x, ...) %>%
      tidyr::expand_grid(boot_id = seq(1, n, by = 1)) %>%
      dplyr::group_by(boot_id, ...) %>%
      tidyr::nest(boots = c(!!x, !!y)) %>% 
      dplyr::mutate(boots = boots %>% 
                      map(function(boots) 
                        dplyr::slice_sample(boots, 
                                            replace = TRUE, n = nrow(boots))) ) %>% 
      dplyr::mutate(model = map(boots,
                                purrr::possibly(
                                  .f = ~soiltestcorr::cate_nelson_1965(
                                    data = ., ry = !!y, stv = !!x, 
                                    target = target),
                                  otherwise = NULL, quiet = TRUE)) ) %>%
      dplyr::select(-boots) %>% 
      dplyr::mutate(model = map(model, ~dplyr::as_tibble(.))) %>% 
      tidyr::unnest(cols = model) %>% 
      dplyr::ungroup()
  }