R/snk.evaluate.core.R

Defines functions snk.evaluate.core

Documented in snk.evaluate.core

### This file is part of 'EvaluateCore' package for R.

### Copyright (C) 2018-2026, ICAR-NBPGR.
#
# EvaluateCore is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# EvaluateCore is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.r-project.org/Licenses/


#' Student-Newman-Keuls Test
#'
#' Test difference between means of entire collection (EC) and core set (CS) for
#' quantitative traits by Newman-Keuls or Student-Newman-Keuls test
#' \insertCite{newman_distribution_1939,keuls_use_1952}{EvaluateCore}.
#'
#' @param data The data as a data frame object. The data frame should possess
#'   one row per individual and columns with the individual names and multiple
#'   trait/character data.
#' @param names Name of column with the individual names as a character string.
#' @param quantitative Name of columns with the quantitative traits as a
#'   character vector.
#' @param selected Character vector with the names of individuals selected in
#'   core collection and present in the \code{names} column.
#'
#' @return A data frame with the following components. \item{Trait}{The
#'   quantitative trait.} \item{Count}{The accession count (excluding missing
#'   data).} \item{Df}{The degrees of freedom for the test.} \item{EC_Min}{The
#'   minimum value of the trait in EC.} \item{EC_Max}{The maximum value of the
#'   trait in EC.} \item{EC_Mean}{The mean value of the trait in EC.}
#'   \item{EC_SE}{The standard error of the trait in EC.} \item{CS_Min}{The
#'   minimum value of the trait in CS.} \item{CS_Max}{The maximum value of the
#'   trait in CS.} \item{CS_Mean}{The mean value of the trait in CS.}
#'   \item{CS_SE}{The standard error of the trait in CS.} \item{SNK_pvalue}{The
#'   p value of the Student-Newman-Keuls test for equality of means of EC and
#'   CS.} \item{SNK_significance}{The significance of the Student-Newman-Keuls
#'   test for equality of means of EC and CS.}
#'
#' @seealso \code{\link[agricolae]{SNK.test}}
#'
#' @importFrom agricolae SNK.test
#' @importFrom dplyr bind_rows
#' @importFrom stats formula
#' @importFrom stats aov
#' @importFrom stats sd
#' @importFrom Rdpack reprompt
#' @export
#'
#' @references
#'
#' \insertAllCited{}
#'
#' @examples
#'
#' data("cassava_CC")
#' data("cassava_EC")
#'
#' ec <- cbind(genotypes = rownames(cassava_EC), cassava_EC)
#' ec$genotypes <- as.character(ec$genotypes)
#' rownames(ec) <- NULL
#'
#' core <- rownames(cassava_CC)
#'
#' quant <- c("NMSR", "TTRN", "TFWSR", "TTRW", "TFWSS", "TTSW", "TTPW", "AVPW",
#'            "ARSR", "SRDM")
#' qual <- c("CUAL", "LNGS", "PTLC", "DSTA", "LFRT", "LBTEF", "CBTR", "NMLB",
#'           "ANGB", "CUAL9M", "LVC9M", "TNPR9M", "PL9M", "STRP", "STRC",
#'           "PSTR")
#'
#' ec[, qual] <- lapply(ec[, qual],
#'                      function(x) factor(as.factor(x)))
#'
#' snk.evaluate.core(data = ec, names = "genotypes",
#'                   quantitative = quant, selected = core)
#'
snk.evaluate.core <- function(data, names, quantitative, selected) {
  # Checks
  checks.evaluate.core(data = data, names = names,
                      quantitative = quantitative,
                      selected = selected)

  if (any(c("tbl_dataf", "tbl") %in% class(data))) {
    warning('"data" is of type tibble\nCoercing to data frame')
    data <- as.data.frame(data)
  }

  dataf <- data[, c(names, quantitative)]

  datafcore <- dataf[dataf[, names] %in% selected, ]

  dataf$`[Type]` <- "EC"
  datafcore$`[Type]` <- "CS"

  dataf <- rbind(dataf, datafcore)
  rm(datafcore)

  outdf <- vector(mode = "list", length = length(quantitative))
  names(outdf) <- quantitative

  for (i in seq_along(quantitative)) {
    frmla <- stats::formula(paste("`", quantitative[i], "` ~ `[Type]`",
                                       sep = ""))
    model <- stats::aov(frmla, data = dataf[!is.na(dataf[, quantitative[i]]), ])
    snkout <- agricolae::SNK.test(model, "[Type]", group = FALSE,
                                  console = FALSE)
    snkdf <- snkout$statistics$Df
    snkpvalue <- snkout$comparison$pvalue

    # out <- mutoss::snk(frmla, data = dataf,
    #                    alpha=0.05, MSE=NULL, df = NULL, silent = FALSE)
    # out <- t.test(dataf[dataf$`[Type]` == "EC", quantitative[i]],
    #               dataf[dataf$`[Type]` == "CS", quantitative[i]])

    outdf[[quantitative[i]]] <-
      data.frame(`Trait` = quantitative[i],
                 `Count` = sum(!is.na(dataf[dataf$`[Type]` == "EC",
                                            quantitative[i]])),
                 `Df` = snkdf,
                 `EC_Min` = min(dataf[dataf$`[Type]` == "EC", quantitative[i]],
                                na.rm = TRUE),
                 `EC_Max` = max(dataf[dataf$`[Type]` == "EC", quantitative[i]],
                                na.rm = TRUE),
                 `EC_Mean` = mean(dataf[dataf$`[Type]` == "EC",
                                        quantitative[i]],
                                  na.rm = TRUE),
                 `EC_SE` = stats::sd(dataf[dataf$`[Type]` == "EC",
                                           quantitative[i]],
                                     na.rm = TRUE) /
                   sqrt(length(dataf[dataf$`[Type]` == "EC" &
                                       !is.na(dataf[, quantitative[i]]),
                                     quantitative[i]])),
                 `CS_Min` = min(dataf[dataf$`[Type]` == "CS", quantitative[i]],
                                na.rm = TRUE),
                 `CS_Max` = max(dataf[dataf$`[Type]` == "CS", quantitative[i]],
                                na.rm = TRUE),
                 `CS_Mean` = mean(dataf[dataf$`[Type]` == "CS",
                                        quantitative[i]],
                                  na.rm = TRUE),
                 `CS_SE` = stats::sd(dataf[dataf$`[Type]` == "CS",
                                           quantitative[i]],
                                     na.rm = TRUE) /
                   sqrt(length(dataf[dataf$`[Type]` == "CS" &
                                       !is.na(dataf[, quantitative[i]]),
                                     quantitative[i]])),
                 `SNK_pvalue` = snkpvalue,
                 stringsAsFactors = FALSE)

    rm(snkout, snkpvalue, frmla, model)
  }

  outdf <- dplyr::bind_rows(outdf)

  outdf$SNK_significance <- ifelse(outdf$SNK_pvalue <= 0.01, "**",
                                   ifelse(outdf$SNK_pvalue <= 0.05, "*", "ns"))

  return(outdf)

}

Try the EvaluateCore package in your browser

Any scripts or data that you put into this service are public.

EvaluateCore documentation built on April 22, 2026, 9:07 a.m.