R/test_and_visuals.R
In visStatistics: Automated Selection and Visualisation of Statistical Hypothesis Tests

# MIT License-----
# Copyright (c) 2021 Sabine Schilling

# Plotting functions----

# Testing for normality and visualization ----

test_norm_vis <- function(x, y_axis_hist = c(0, 0.04)) {
  # store default graphical parameters------
  oldparnormvis <- par(no.readonly = TRUE)
  on.exit(par(oldparnormvis))
  par(mfrow = c(1, 2), oma = c(0, 0, 3, 0))
  # Remove NA from x
  x <- x[!is.na(x)]
  norm_dens <- function(z) {
    dnorm(z, mean(x), sd(x))
  }
  
  ymax <- max(norm_dens(x))
  # Plot histogramm of raw data
  otto <- hist(
    x,
    freq = FALSE,
    col = "grey",
    breaks = "Sturges",
    xlim = c(
      mean(x, na.rm = T) - 5 * sd(x, na.rm = T),
      mean(x, na.rm = T) +
        5 * sd(x, na.rm = T)
    ),
    ylim = c(0, 1.2 * ymax)
  )
  
  # normal distribution with mean and sd of given distribution
  curve(norm_dens,
        col = "red",
        add = TRUE,
        lwd = 2)
  
  
  # par(new = TRUE) #the next high-level plotting command does not clean the frame before drawing
  # as if it were on a new device.
  lines(density(x), col = "blue")
  
  legend(
    "topright",
    c("fitted", "estimated"),
    lty = 1,
    lwd = 2,
    col = c("red", "blue"),
    bty = "n"
  )
  box() # frame around current plot
  
  qqnorm(x)
  qqline(x, col = "red", lwd = 2)
  
  KS <- ad.test(x)
  p_KS <- signif(KS$p.value, 2)
  SH <- shapiro.test(x)
  p_SH <- signif(SH$p.value, 2)
  
  mtext(
    paste(
      "Shapiro-Wilk: p = ",
      p_SH,
      "\n Anderson-Darling: p = ",
      p_KS,
      "\n Nullhypothesis: Data is normally distributed"
    ),
    outer = TRUE
  )
  my_list <- list("Anderson-Darling" = KS, "Shapiro" = SH)
  
  return(my_list)
}



###### Two-Sample t-Test ###############################
two_sample_t_test <- function(samples,
                              fact,
                              alternative = c("two.sided", "less", "greater"),
                              paired = FALSE,
                              var.equal = FALSE,
                              conf.level = conf.level,
                              samplename = "",
                              factorname = "") {
  oldpar <- par(no.readonly = TRUE)
  on.exit(par(oldpar))
  alternative <- match.arg(alternative)
  
  
  if (!missing(conf.level) &&
      (length(conf.level) != 1 || !is.finite(conf.level) ||
       conf.level < 0 || conf.level > 1)) {
    return(warning("'conf.level' must be a single number between 0 and 1"))
  }
  
  if (missing(conf.level)) {
    conf.level <- 0.95
  }
  alpha <- 1 - conf.level
  levels <- unique(sort(fact))
  twosamples <- create_two_samples_vector(samples, fact)
  x <- twosamples$sample1and2
  x1 <- twosamples$sample1
  x2 <- twosamples$sample2
  # Check normality of both samples-----
  p1 <- test_norm(twosamples$sample1)
  p2 <- test_norm(twosamples$sample2)
  
  # margins of y -axis
  lower <- 0.2
  upper <- 0.2
  margins <- calc_min_max_of_y_axis(x, lower, upper)
  mi <- margins[[1]]
  ma <- margins[[2]]
  
  x <- cbind(x, factor(c(rep(1, length(
    x1
  )), rep(2, length(
    x2
  )))))
  
  b <- boxplot(
    samples ~ fact,
    lwd = 0.5,
    xlab = factorname,
    ylab = samplename,
    ylim = c(mi - 2, ma),
    varwidth = T,
    col = colorscheme(1)
  )
  
  
  stripchart(
    samples ~ fact,
    vertical = TRUE,
    xlim = c(0, 3),
    ylim = c(mi, ma),
    # col = c("grey70", "grey80"),
    col = colorscheme(2),
    axes = FALSE,
    method = "jitter",
    add = TRUE
  )
  
  
  
  points(1,
         mean(x1),
         col = 2,
         pch = 1,
         lwd = 3)
  points(2,
         mean(x2),
         col = 2,
         pch = 1,
         lwd = 3)
  
  # alpha_sidak = 1 - sqrt(1 - alpha)
  
  alpha_sidak <- alpha
  
  correction1 <- qt(1 - 0.5 * alpha_sidak, length(x1) - 1) * sd(x1) / sqrt(length(x1))
  correction2 <- qt(1 - 0.5 * alpha_sidak, length(x2) - 1) * sd(x2) / sqrt(length(x2))
  
  arrows(
    1,
    mean(x1, na.rm = T) + correction1,
    1,
    mean(x1, na.rm = T) - correction1,
    angle = 90,
    code = 3,
    # halbes Konfidenzintervall
    col = 2,
    lty = 1,
    lwd = 2,
    length = 0.1
  )
  
  arrows(
    2,
    mean(x2) + correction2,
    2,
    mean(x2) - correction2,
    angle = 90,
    code = 3,
    col = 2,
    lty = 1,
    lwd = 2,
    length = 0.1
  )
  
  # Sample sizes above box plot
  text(1:length(b$n), c(ma, ma), paste("N=", b$n))
  t <- t.test(
    x1,
    x2,
    alternative = alternative,
    conf.level = conf.level,
    paired = FALSE,
    var.equal = FALSE,
    na.action = na.omit
  )
  # Legend
  legend(
    "bottomleft",
    inset = 0.05,
    horiz = F,
    c(paste(
      "mean with", conf.level * 100, "% conf. intervall "
    )),
    col = c(colors()[552]),
    bty = "n",
    lwd = 3
  )
  
  p_value <- t$p.value
  p_value <- signif(p_value, 2)
  test_statistic <- t$statistic
  test_statistic <- round(test_statistic, 2)
  stat_name <- names(t$statistic)
  
  # Title general generation
  if (alternative == "two.sided") {
    ah <- "equals"
  } else {
    ah <- alternative
  }
  compare <- side_of_nh(alternative)
  
  
  
  
  mean_or_median <- "population mean"
  comparepvalue <- calculate_comparepvalue(p_value, conf.level)
  
  two_sample_title <-
    paste(
      t$method,
      ", alpha =",
      1 - conf.level,
      "\n Null hypothesis: ",
      mean_or_median,
      " ",
      samplename,
      " of ",
      factorname,
      " \"",
      unique(sort(fact))[1],
      "\" ",
      compare,
      " ",
      mean_or_median,
      " ",
      samplename,
      " of ",
      factorname,
      " \"",
      unique(sort(fact))[2],
      "\" ",
      "\n",
      stat_name,
      "=",
      test_statistic,
      ", p = ",
      p_value,
      ", p ",
      comparepvalue,
      "alpha",
      sep = ""
    )
  
  
  
  
  
  
  mtext(two_sample_title)
  
  my_list <-
    list(
      "dependent variable (response)" = samplename,
      "independent variables (features)" = unique(fact),
      "t-test-statistics" = t,
      "Shapiro-Wilk-test_sample1" = p1,
      "Shapiro-Wilk-test_sample2" = p2
    )
  
  return(my_list)
}


# Two-Sample Wilcoxon-Test  ###############################
# One function with flags for greater, less, two sided and notch
two_sample_wilcoxon_test <- function(samples,
                                     fact,
                                     alternative = c("two.sided", "less", "greater"),
                                     conf.level = conf.level,
                                     notchf = FALSE,
                                     samplename = "",
                                     factorname = "",
                                     cex = 1) {
  oldparwilcox <- par(no.readonly = TRUE) # make a copy of current values
  on.exit(par(oldparwilcox))
  
  alternative <- match.arg(alternative)
  # Error handling ----
  
  
  if (missing(conf.level)) {
    conf.level <- 0.95
  }
  
  if (missing(notchf)) {
    notchf <- FALSE
  }
  
  if (!((length(conf.level) == 1L) && is.finite(conf.level) &&
        (conf.level > 0) && (conf.level < 1))) {
    return(warning("'conf.level' must be a single number between 0 and 1"))
  }
  
  if (!is.numeric(samples)) {
    return(warning("'samples' must be numeric"))
  }
  if (!is.null(fact)) {
    if (!is.factor(fact)) {
      return(warning("'fact' must be factorial"))
    }
  }
  
  # Store default graphical parameter
  # Define color palette
  colortuple2 <- colorscheme(2)
  
  
  
  
  
  # Create to numeric vectors
  twosamples <- create_two_samples_vector(samples, fact)
  x <- twosamples$sample1and2
  x1 <- twosamples$sample1
  x2 <- twosamples$sample2
  
  upper <- 0.2
  lower <- 0.05
  res <- calc_min_max_of_y_axis(x, upper, lower)
  mi <- res[[1]]
  ma <- res[[2]]
  
  x <- cbind(x, factor(c(rep(1, length(
    x1
  )), rep(2, length(
    x2
  )))))
  
  par(oma = c(0, 0, 2, 0)) # outer margin above: 2 lines..
  # par(mar = c(8,4,4,2) + 0.1) #increse margin of labels below
  
  b <- boxplot(samples ~ fact, plot = 0) # holds  the counts
  
  
  
  stripchart(
    samples ~ fact,
    vertical = TRUE,
    xlim = c(0, 3),
    # ylim = c(mi, ma),
    method = "jitter",
    col = colorscheme(2),
    ylim = c(0, ma),
    ylab = samplename,
    xlab = factorname
  )
  boxplot(
    samples ~ fact,
    notch = notchf,
    varwidth = T,
    col = colorscheme(1),
    ylim = c(0, ma),
    add = T
  )
  # text(1:length(b$n), b$stats[5,]+1, paste("n=", b$n))
  text(1:length(b$n), c(ma, ma), paste("N =", b$n))
  t <- wilcox.test(samples ~ fact, alternative = alternative, na.action = na.omit)
  p_value <- t$p.value
  p_value <- formatC(signif(p_value, digits = 2))
  
  test_statistic <- t$statistic
  test_statistic <- round(test_statistic, 2)
  stat_name <- names(t$statistic)
  
  
  compare <- side_of_nh(alternative)
  if (factorname == "match") {
    prefix <- "of matched"
  } else {
    prefix <- character()
  }
  
  # Title general generation
  if (alternative == "two.sided") {
    ah <- "equals"
  } else {
    ah <- alternative
  }
  compare <- side_of_nh(alternative)
  
  
  
  
  mean_or_median <- "population median"
  comparepvalue <- calculate_comparepvalue(p_value, conf.level)
  
  two_sample_title <-
    paste(
      t$method,
      ", alpha = ",
      1 - conf.level,
      "\n Null hypoth.: ",
      mean_or_median,
      " ",
      samplename,
      " of ",
      factorname,
      " ",
      unique(sort(fact))[1],
      " ",
      compare,
      " ",
      mean_or_median,
      " ",
      samplename,
      " of ",
      factorname,
      " ",
      unique(sort(fact))[2],
      "\n",
      stat_name,
      "=",
      test_statistic,
      ", p = ",
      p_value,
      ", p ",
      comparepvalue,
      " alpha",
      sep = ""
    )
  
  
  
  
  mtext(two_sample_title)
  
  
  
  my_list <-
    list(
      "dependent variable (response)" = samplename,
      "indepedent variables (features)" = unique(fact),
      "statsWilcoxon" = t,
      "statsBoxplot" = b
    )
  
  return(my_list)
}

# Two-Sample F-Test ###############################
# subtract means; two lines according to variances.
two_sample_FTest <- function(samples,
                             fact,
                             conf.int = conf.int,
                             alternative = "two.sided") {
  # if (missing(conf.int)) conf.int = 0.95
  #  if (missing(alternative)) alternative = "two.sided"
  # Store default graphical parameter
  oldparftest <- par(no.readonly = TRUE)
  on.exit(par(oldparftest))
  if (missing(conf.int)) {
    conf.int <- 0.95
  }
  
  alpha <- 1 - conf.int
  levels <- unique(sort(fact))
  
  x1 <- samples[fact == levels[1]]
  x2 <- samples[fact == levels[2]]
  
  x1 <- x1 - mean(x1, na.rm = T)
  
  x2 <- x2 - mean(x2)
  
  
  x <- c(x1, x2)
  spread <- max(x) - min(x)
  spread <- max(spread, var(x1), var(x2))
  
  mi <- min(x) - 0.3 * spread
  ma <- max(x) + 0.3 * spread
  
  x <- cbind(x, factor(c(rep(1, length(
    x1
  )), rep(2, length(
    x2
  )))))
  
  par(oma = c(0, 0, 3, 0))
  stripchart(
    x[, 1] ~ x[, 2],
    vertical = TRUE,
    xlim = c(0.5, 3),
    ylim = c(mi, ma),
    col = c("grey70", "grey80"),
    ylab = "centered samples",
    xlab = "",
    axes = FALSE
  )
  
  axis(side = 2)
  axis(side = 1,
       at = c(1, 2),
       labels = levels)
  box()
  
  lines(
    x = c(1.1, 1.1),
    y = c(-0.5 * var(x1), 0.5 * var(x1)),
    col = "blue",
    lwd = 5
  )
  lines(
    x = c(1.9, 1.9),
    y = c(-0.5 * var(x2), 0.5 * var(x2)),
    col = "blue",
    lwd = 5
  )
  
  legend(
    "topright",
    inset = 0.05,
    c("variances"),
    col = c("blue"),
    lwd = 2
  )
  
  t <- var.test(x1, x2, alternative = alternative)
  p_value <- t$p.value
  p_value <- signif(p_value, 3)
  
  test_statistic <- t$statistic
  test_statistic <- formatC(signif(test_statistic, digits = 2))
  
  
  mtext(
    paste(
      "Two Sample F-Test (",
      alternative,
      "): P = ",
      p_value,
      "\n Confidence Level = ",
      1 - alpha
    ),
    outer = TRUE
  )
}



# Chisquared Test of Fisher-test  ----


# vis_chi_squared_test: implemented in vis_samples_fact -----
vis_chi_squared_test <- function(samples,
                                 fact,
                                 samplename,
                                 factorname,
                                 cex = 1) {
  oldparchi <- par(no.readonly = TRUE)
  on.exit(par(oldparchi))
  
  colortuple <- colorscheme(1)
  ColorPalette <- colorscheme(3)
  if (missing(samplename)) {
    samplename <- character()
  }
  if (missing(factorname)) {
    factorname <- character()
  }
  
  counts <- makeTable(samples, fact, samplename, factorname)
  # check for minimal size of 2x2
  check_assumptions_chi <- check_assumptions_count_data(samples, fact)
  
  if (check_assumptions_chi == FALSE) {
    fisher_chi <- counts
    return(fisher_chi)
   
  } else {
    row_sum <- rowSums(counts)
    col_sum <- colSums(counts)
    count_labels <- dimnames(counts)[2]
    count_labels <- as.character(unlist(count_labels))
    
    category_names <- dimnames(counts)[1]
    category_names <- as.character(unlist(category_names))
    
    norm_counts <- (counts / row_sum) * 100 # 100 %percentage in each group
    max_val_y <- max(norm_counts, na.rm = T)
    # col_vec_browser=c(colortuple,rainbow(nrow(counts)-2, s = 0.5))
    if (nrow(counts) < (length(ColorPalette) + 2)) {
      col_vec_browser <- c(colortuple, head(ColorPalette, n = nrow(counts) - 2))
    } else {
      col_vec_browser <- c(colortuple, rainbow(nrow(counts) - 2, s = 0.4, alpha = 1))
    }
    
    
    # creates new plot for barplot
    
    par(mfrow = c(1, 1), oma = c(0, 0, 3, 0))
    
    maxlabels <- length(levels(samples))
    if (maxlabels > 7 |
        grepl("basis", samplename) | grepl("source", samplename) |
        grepl("basis", factorname) | grepl("source", factorname) |
        grepl("genotyped", samplename) |
        grepl("genotyped", factorname)) {
      labelsize <- 0.3 * cex
    } else if (maxlabels > 5) {
      labelsize <- 0.7 * cex
    } else {
      labelsize <- cex
    }
    
    
    fisher_chi <- fisher_chi(counts) # checks if Cochran requirements for chi2 are met, if not: only fisher exact test allowed
    
    # titletext <- paste0(fisher_chi$method,
    #                    "\n Chi-squared = ",signif(fisher_chi$p.statistic, 2),
    #                    ", p-value = ",
    #                    signif(fisher_chi$p.value, 3),
    #                    sep = "")
    #                    
    if (fisher_chi$method != "Fisher's Exact Test for Count Data") {
      chi2_fisher_text <- paste0(
        fisher_chi$method,
        "\nChi-squared = ", round(fisher_chi$statistic, 2),
        ", p-value = ", signif(fisher_chi$p.value, 3)
      )
    } else {
      chi2_fisher_text <- paste0(
        fisher_chi$method,
        ",\np-value = ", signif(fisher_chi$p.value, 3)
      )
    }
    
    # titletext <- paste0(
    #   fisher_chi$method,
    #   "\n Chi-squared = ", round(fisher_chi$statistic, 2),
    #   ", p-value = ", signif(fisher_chi$p.value, 3)
    # )
    
    titletext = chi2_fisher_text
    
    
    if (nrow(counts) > 3) {
      ma <- max(1.3 * max_val_y)
      legendsize <- 0.7 * cex
    } else {
      ma <- ma <- max(1.1 * max_val_y)
      legendsize <- cex
    }
    
    barplot(
      norm_counts,
      names.arg = count_labels,
      xlim = c(-0.5, ncol(counts) + 1),
      ylim = c(0, ma),
      width = 1 / (nrow(counts) + 1),
      space = c(0, 1),
      col = col_vec_browser,
      ylab = "%",
      xlab = samplename,
      beside = TRUE,
      cex.axis = 1,
      cex.names = labelsize # size of labels of barplot
    )
    
    box()
    mtext(titletext)
    category_names <- as.character(category_names)
    legend(
      "topright",
      inset = 0.05,
      category_names,
      col = col_vec_browser,
      bty = "n",
      lwd = 2,
      cex = legendsize
    )
    
    return(fisher_chi)
  }
}

###### Visualize ANOVA ###############################
## performs ANOVA or  oneway test and corresponding post hoc tests
vis_anova <- function(samples,
                      fact,
                      conf.level = conf.level,
                      samplename = "",
                      factorname = "",
                      cex = 1) {
  if (missing(conf.level)) {
    conf.level <- 0.95
  }
  
  oldparanova <- par(no.readonly = TRUE)
  on.exit(par(oldparanova))
  
  alpha <- 1 - conf.level
  
  samples3 <- na.omit(samples)
  fact <- subset(fact, !is.na(samples))
  samples <- samples3
  n_classes <- length(unique(fact))
  # https://en.wikipedia.org/wiki/Bonferroni_correction
  number_of_pairwise_comparisons <- n_classes * (n_classes - 1) / 2
  alpha_sidak <- 1 - conf.level^(1 / number_of_pairwise_comparisons) # Sidak correction, https://en.wikipedia.org/wiki/%C5%A0id%C3%A1k_correction
  # alpha_sidak=alpha #do not apply sidak correction
  sdna <- function(x) {
    sd(x, na.rm = T)
  }
  meanna <- function(x) {
    mean(x, na.rm = T)
  }
  
  s <- tapply(samples, fact, sdna)
  m <- tapply(samples, fact, meanna)
  
  samples_per_class <- integer(n_classes)
  for (i in 1:n_classes) {
    samples_per_class[i] <- sum(fact == unique(fact)[i])
  }
  # tests
  an <- aov(samples ~ fact)
  summaryAnova <- summary(an)
  oneway <- oneway.test(samples ~ fact)
  # check for homogeneity
  bartlett_test <- bartlett.test(samples ~ fact)
  p_bart <- bartlett_test$p.value
  
  if (p_bart > 1 - conf.level) {
    p_aov <- summaryAnova[[1]][["Pr(>F)"]][1]
    F_value <- round(summaryAnova[[1]]$`F value`[1],2)
    label_aov <- "Fisher's one-way ANOVA"
    summarystat <- summaryAnova
  } else {
    p_aov <- oneway$p.value
    F_value=round(oneway$statistic,2)
    label_aov <- "Welch's heteroscedastic one-way ANOVA"
    # label_aov <-oneway$method
    summarystat <- oneway
  }
  
  
  
  
  maximum <- max(samples, na.rm = T)
  minimum <- min(samples, na.rm = T)
  
  spread <- maximum - minimum
  
  mi <- minimum - 1.2 * spread
  ma <- maximum + 1.2 * spread
  par(mfrow = c(1, 1), oma = c(0, 0, 3, 0))
  
  stripchart(
    samples ~ fact,
    vertical = TRUE,
    xlim = c(0, n_classes + 1),
    ylim = c(mi, ma),
    col = rep("grey30", n_classes),
    ylab = samplename,
    xlab = factorname,
    las = 2
  )
  
  
  
  # sd:
  for (i in 1:n_classes) {
    sn <- qt(1 - alpha_sidak / 2, samples_per_class[i] - 1) * s[[i]] / sqrt(samples_per_class[i])
    lines(
      x = c(i - 0.2, i - 0.2),
      # y = c(m[[i]] - s[[i]], m[[i]] + s[[i]]),
      y = c(m[[i]] - sn, m[[i]] + sn),
      col = colors()[131],
      lwd = 5
    )
  }
  
  
  for (i in 1:n_classes) {
    lines(
      x = c(i - 0.1, i + 0.1),
      y = c(m[[i]], m[[i]]),
      col = colors()[552],
      lwd = 3
    )
    arrows(
      i,
      m[[i]] + qt(1 - alpha / 2, samples_per_class[i] - 1) * s[[i]] / sqrt(samples_per_class[i]),
      # m[[i]] + qt(1 - alpha_sidak/2, samples_per_class[i] - 1) * s[[i]] / sqrt(samples_per_class[i]),
      i,
      m[[i]] - qt(1 - alpha / 2, samples_per_class[i] - 1) * s[[i]] / sqrt(samples_per_class[i]),
      # m[[i]] - qt(1 - alpha_sidak/2, samples_per_class[i] - 1) * s[[i]] / sqrt(samples_per_class[i]),
      angle = 90,
      code = 3,
      col = colors()[552],
      lty = 1,
      lwd = 2,
      length = 0.1
    )
  }
  
  tuk <- TukeyHSD(an, conf.level = conf.level)
  
  s <- multcompLetters(tuk[[1]][, 4], threshold = alpha)
  
  ord <- c()
  
  v <- attributes(s$Letters)$names
  f_levels <- sort(unique(fact))
  for (i in 1:n_classes) {
    ord[i] <- which(v == f_levels[i])
  }
  
  text(seq(1:n_classes + 1),
       mi,
       s$Letters[ord],
       col = colors()[81],
       lwd = 2)
  
  
  mtext(
    paste0(label_aov, "\nF = ", F_value, ", p = ", signif(p_aov, 2)),
    outer = TRUE)
  
 
  
  legend(
    "topleft",
    inset = 0.05,
    horiz = F,
    c(
      paste("mean with", conf.level * 100, "% conf. intervall "),
      paste("Sidak corrected" , round((1 - alpha_sidak) * 100, 2), "% conf. interval")
    ),
    col = c(colors()[131], colors()[552]),
    bty = "n",
    lwd = 3
  )
  
  
  
  
  my_list <-
    list(
        "summary statistics of Fisher's one-way ANOVA" = summaryAnova,
        "summary statistics of Welch's one-way ANOVA (not assuming equal variances)" = oneway,
      #"summary statistics" =summarystat,
      "post-hoc analysis of TuckeyHSD" = tuk,
      "conf.level" = conf.level
    )
  
  return(my_list)
}



###### Visualize Kruskal_Wallis ###############################
## performs Kruskal Wallis and post-hoc Wilcoxon:

vis_Kruskal_Wallis_clusters <- function(samples,
                                        fact,
                                        conf.level = conf.level,
                                        samplename = "",
                                        factorname = "",
                                        cex = 1,
                                        notch = F) {
  oldparkruskal <- par(no.readonly = TRUE)
  on.exit(par(oldparkruskal))
  
  if (missing(conf.level)) {
    conf.level <- 0.95
  }
  alpha <- 1 - conf.level
  # remove rows with NAs in samples
  samples3 <- na.omit(samples)
  fact <- subset(fact, !is.na(samples))
  samples <- samples3
  n_classes <- length(unique(fact))
  # define color scheme dependent on number of classes

  mc <- rainbow(n_classes, alpha = 1)
  # mc=ColorPalette(n_classes)
  
  s <- tapply(samples, fact, sd)
  m <- tapply(samples, fact, mean)
  
  samples_per_class <- c()
  for (i in 1:n_classes) {
    samples_per_class[i] <- sum(fact == unique(fact)[i])
  }
  
  kk <- kruskal.test(samples ~ fact)
  
  extramargin <- 0.1
  margins <- calc_min_max_of_y_axis(samples, extramargin, extramargin)
  mi <- margins[[1]]
  ma <- margins[[2]]
  
  par(mfrow = c(1, 1), oma = c(1, 0, 1, 0)) # oma: outer margin sout, west, north, east
  
  if (notch == TRUE) {
    b <- boxplot(
      samples ~ fact,
      notch = TRUE,
      col = mc,
      las = 1,
      xlim = c(0, n_classes + 1),
      ylim = c(mi, ma),
      xlab = factorname,
      ylab = samplename,
      # changes group names size
      cex.lab = cex,
      cex.axis = 0.8 * cex,
      cex.main = cex,
      cex.sub = cex,
      boxwex = 0.5
    )
  } else {
    b <- boxplot(
      samples ~ fact,
      notch = FALSE,
      col = mc,
      las = 1,
      xlim = c(0, n_classes + 1),
      ylim = c(mi, ma),
      xlab = factorname,
      ylab = samplename,
      boxwex = 0.5
    )
  }
  
  
  stripchart(
    samples ~ fact,
    vertical = TRUE,
    # method="jitter",
    col = rep("grey50", n_classes),
    # ylab = ylab,
    # xlab = xlab,
    las = 1,
    # horizontal legend,
    add = TRUE
  )
  
  mtext(c("N = ", b$n), at = c(0.7, seq(1, n_classes)), las = 1) # nmber of cases in each group
  tuk <- sig_diffs_nongauss(samples, fact, conf.level = conf.level)
  
  s <- multcompLetters(tuk[[1]][, 4], threshold = alpha)
  
  ord <- c()
  
  v <- attributes(s$Letters)$names
  f_levels <- sort(unique(fact))
  for (i in 1:n_classes) {
    ord[i] <- which(v == f_levels[i])
  }
  (ma)
  text(
    seq(1:n_classes + 1),
    mi,
    s$Letters[ord],
    col = "darkgreen",
    cex = cex,
    lwd = 2
  )
  
  
  kk_value <- round(as.numeric(kk$statistic),2)
  
  title(paste0(kk$method,
               "\n H = ",kk_value,
              ", p = ", signif(kk$p.value, digits = 3)))
  my_list <-
    list("Kruskal Wallis rank sum test" = kk,
         "post-hoc by pairwise Wilcoxon rank sum test " = tuk)
  return(my_list)
}


##### Visualize Regression und trumpet curves ###############################
vis_regr_trumpets <- function(x, y, conf.level) {
  if (missing(conf.level)) {
    conf.level <- 0.95
  }
  oldparreg <- par(no.readonly = TRUE)
  on.exit(par(oldparreg))
  reg <- lm(y ~ x)
  summary(reg)
  
  ## error bands:
  y_conf_low <- conf_band(x, reg, conf.level, -1)
  y_conf_up <- conf_band(x, reg, conf.level, 1)
  
  ma <- max(y, reg$fitted)
  mi <- min(y, reg$fitted)
  spread <- ma - mi
  
  lower <- 0.1
  upper <- 0.4
  margins <- calc_min_max_of_y_axis(y, lower, upper)
  mi <- margins[[1]]
  ma <- margins[[2]]
  
  
  par(oma = c(0, 0, 5, 0))
  plot(x, y, ylim = c(mi, ma))
  points(x,
         reg$fitted,
         type = "l",
         col = 2,
         lwd = 2)
  
  points(
    x,
    y_conf_low,
    type = "l",
    lwd = 2,
    lty = 2,
    col = colors()[84]
  )
  points(
    x,
    y_conf_up,
    type = "l",
    lwd = 2,
    lty = 2,
    col = colors()[84]
  )
  legend(
    "bottomright",
    c(
      "regr. line",
      paste("confidence band for alpha=", 1 - conf.level)
    ),
    lwd = 2,
    col = c(2, colors()[84], colors()[85]),
    lty = c(1, 2, 3),
    bty = "n"
  )
  s <- summary(reg)
  mtext(
    paste(
      "Regression: ax + b. confidence band for alpha = ",
      1 - conf.level,
      "\n \n"
    ),
    outer = TRUE,
    cex = 1.5
  )
  mtext(
    paste(
      "\n \n a = ",
      signif(reg$coefficients[2], 2),
      ", p = ",
      signif(s$coefficients[2, 4], 2),
      "\n b = ",
      signif(reg$coefficients[1], 2),
      ", p = ",
      signif(s$coefficients[1, 4], 2),
      "\n R^2 = ",
      signif(summary(reg)$r.squared, 4)
    ),
    outer = TRUE
  )
  
  par(mfrow = c(1, 2), oma = c(0, 0, 3, 0))
  plot(
    reg$fitted,
    residuals(reg),
    main = "Residuals vs. Fitted",
    xlab = "Fitted Values",
    ylab = "Residuals"
  )
  abline(h = 0, col = 1, lwd = 2)
  
  qqnorm(residuals(reg), ylab = "Sample Quantiles of  Residuals")
  qqline(residuals(reg), col = "red", lwd = 2)
  
  KS <- ad.test(residuals(reg))
  p_KS <- signif(KS$p.value, 2)
  SH <- shapiro.test(residuals(reg))
  p_SH <- signif(SH$p.value, 2)
  
  mtext(
    paste(
      "Residual Analysis\n Shapiro-Wilk: p = ",
      p_SH,
      "\n  Anderson-Darling: p = ",
      p_KS
    ),
    outer = TRUE
  )
}

###### Visualize Residuals ###############################
vis_resid <- function(resid, fitted) {
  oldparresid <- par(no.readonly = TRUE)
  on.exit(par(oldparresid))
  
  par(mfrow = c(1, 2), oma = c(0, 0, 3, 0))
  plot(fitted, resid, main = "Residuals vs. Fitted")
  abline(h = 0, col = 1, lwd = 2)
  
  qqnorm(resid)
  qqline(resid, col = "red", lwd = 2)
  
  KS <- ad.test(resid)
  p_KS <- signif(KS$p.value, 2)
  SH <- shapiro.test(resid)
  p_SH <- signif(SH$p.value, 2)
  
  mtext(
    paste(
      "Residual Analysis\n Shapiro-Wilk: p = ",
      p_SH,
      "\n Anderson-Darling: p = ",
      p_KS
    ),
    outer = TRUE
  )
}


###### Visualize Regression ###############################

# only normality assumptions of standardised residuals
vis_normality_assumptions <- function(y, x, conf.level = 0.95) {
  oldparreg <- par(no.readonly = TRUE)
  on.exit(par(oldparreg))
  alpha <- 1 - conf.level
  # P = alpha
  
  # remove all NAs from both vectors
  xna <- x[!is.na(y) & !is.na(x)]
  yna <- y[!is.na(y) & !is.na(x)]
  
  x <- xna
  y <- yna
  
  ord <- order(x)
  x <- sort(x)
  y <- y[ord]
  
  reg <- lm(y ~ x)
  resreg <- summary(reg)
  
  
  par(mfrow = c(1, 2), oma = c(0, 0, 4, 0))
  plot(
    reg$fitted,
    rstandard(reg),
    main = "std. Residuals vs. Fitted",
    xlab = "Fitted Values",
    ylab = "standardised Residuals"
  )
  abline(h = 0, col = 1, lwd = 2)
  
  qqnorm(rstandard(reg), ylab = "Sample Quantiles of Std. Residuals")
  qqline(rstandard(reg), col = "red", lwd = 2)
  
  KS <- ad.test(rstandard(lm(y ~ x)))
  p_KS <- signif(KS$p.value, 2)
  SH <- shapiro.test(rstandard(lm(y ~ x)))
  p_SH <- signif(SH$p.value, 2)
  if (p_SH < alpha) {
    mtext(
      paste(
        "Residual Analysis\n Shapiro-Wilk: p = ",
        p_SH,
        ", Anderson-Darling: p = ",
        p_KS,
        "\n Requirement of normally distributed residuals not met "
      ),
      outer = TRUE
    )
  } else {
    mtext(
      paste(
        "Residual Analysis\n Shapiro-Wilk: p = ",
        p_SH,
        ", Anderson-Darling: p = ",
        p_KS
      ),
      outer = TRUE
    )
  }
  
  my_list <- list(
    "summary_regression" = resreg,
    "shapiro_test_residuals" = SH,
    "ad_test_residuals" = KS
  )
  
  
  return(my_list)
}




vis_regression <- function(y,
                           x,
                           conf.level = conf.level,
                           name_of_factor = character(),
                           name_of_sample = character()) {
  if (missing(conf.level)) {
    conf.level <- 0.95
  }
  oldparregr <- par(no.readonly = TRUE)
  on.exit(par(oldparregr))
  
  alpha <- 1 - conf.level
  
  # remove all NAs from both vectors
  xna <- x[!is.na(y) & !is.na(x)]
  yna <- y[!is.na(y) & !is.na(x)]
  
  x <- xna
  y <- yna
  
  ord <- order(x)
  x <- sort(x)
  y <- y[ord]
  ylim <- 1.1 * max(y, na.rm <- T)
  
  
  
  # di
  reg <- lm(y ~ x)
  resreg <- summary(reg)
  
  
  ## error bands:
  
  # #old cold-depreciated, replaced by predict function------
  # y_conf_lowa = conf_band(x, reg, conf.level, -1)
  # y_conf_upa = conf_band(x, reg, conf.level, 1)
  # y_progn_lowa = progn_band(x, reg, conf.level, -1)
  # y_progn_upa = progn_band(x, reg, conf.level, 1)
  # #-------------------
  
  conf.int_prediction <- predict(reg, interval = "confidence", level = conf.level) # confidence band
  pred.int_prediction <- suppressWarnings(predict(reg, interval = "prediction", level = conf.level)) # prediction band
  
  y_conf_low <- conf.int_prediction[, 2]
  y_conf_up <- conf.int_prediction[, 3]
  y_progn_low <- pred.int_prediction[, 2]
  y_progn_up <- pred.int_prediction[, 3]
  
  predicted_value <- conf.int_prediction[, 1]
  
  error_bands <- cbind(predicted_value,
                       y_conf_low,
                       y_conf_up,
                       y_progn_low,
                       y_progn_up)
  
  ma <- max(y, reg$fitted, y_progn_up, na.rm <- T)
  mi <- min(y, reg$fitted, y_progn_low, na.rm <- T)
  
  spread <- ma - mi
  
  par(mfrow = c(1, 1), oma = c(0, 0, 5, 0))
  plot(
    x,
    y,
    ylim = c(mi - 0.1 * spread, ma + 0.4 * spread),
    xlab = name_of_factor,
    ylab = name_of_sample
  )
  
  points(
    x,
    reg$fitted,
    type = "l",
    col = colorscheme(2)[1],
    # dark green
    lwd = 2
  )
  # plot confidence band, lower boundary
  points(
    x,
    y_conf_low,
    type = "l",
    lwd = 2,
    lty = 2,
    col = colorscheme(1)[1]
  )
  # plot confidence band, upper boundary
  points(
    x,
    y_conf_up,
    type = "l",
    lwd = 2,
    lty = 2,
    col = colorscheme(1)[1]
  )
  # plot prognosis band, lower boundary
  points(
    x,
    y_progn_low,
    type = "l",
    lwd = 2,
    lty = 3,
    col = colorscheme(1)[2]
  )
  # plot prognosis band, upper boundary
  points(
    x,
    y_progn_up,
    type = "l",
    lwd = 2,
    lty = 3,
    col = colorscheme(1)[2]
  )
  
  legend(
    "topleft",
    horiz = FALSE,
    text.width = 0.75,
    c("regr. line", "confidence band", "prognosis band"),
    lwd = 2,
    # line width
    col = c(colorscheme(2)[1], colorscheme(1)[1], colorscheme(1)[2]),
    lty = c(1, 2, 3),
    # line types of legend
    bty = "n",
    # no box around legend
    cex = 0.8 # reduces the legend size
  )
  
  
  s <- summary(reg)
  
  conf_intervall_regression <- confint(reg, level = conf.level) # conf.int confidence interval of slope and intercept
  
  AD <- ad.test(rstandard(lm(y ~ x)))
  
  SH <- shapiro.test(rstandard(lm(y ~ x)))
  
  mtext(
    paste(
      "y = a*x +b, confidence level = ",
      conf.level,
      ", adjusted R2 = ",
      signif(s$adj.r.squared, 2),
      " \n slope a = ",
      signif(reg$coefficients[2], 2),
      ", conf. interval [",
      signif(conf_intervall_regression[2, 1], 2),
      ",",
      signif(conf_intervall_regression[2, 2], 2),
      "]",
      ", p = ",
      signif(s$coefficients[2, 4], 2),
      "\n intercept b = ",
      signif(reg$coefficients[1], 2),
      ", conf. interval [",
      signif(conf_intervall_regression[1, 1], 2),
      ",",
      signif(conf_intervall_regression[1, 2], 2),
      "]",
      ", p = ",
      signif(s$coefficients[1, 4], 2)
    ),
    outer = TRUE
  )
  
  my_list <- list(
    "independent variable x" = name_of_factor,
    "dependent variable y" = name_of_sample,
    "summary_regression" = resreg,
    "shapiro_test_residuals" = SH,
    "anderson_darling_test_residuals" = AD,
    "error_bands" = error_bands
  )
  
  return(my_list)
}


# Mosaic plots-----
vis_mosaic <- function(samples,
                       fact,
                       name_of_sample = character(),
                       name_of_factor = character(),
                       minperc = 0.05,
                       numbers = TRUE) {
  oldparmosaic <- par(no.readonly = TRUE)
  oldparmosaic$new <- FALSE
  on.exit(par(oldparmosaic))
  
  if (missing(minperc)) {
    # minperc is the minimum percehntage a column has to contribute to be displayed
    minperc <- 0.05
  }
  if (missing(numbers)) {
    # numbers are shown in rectangle of category
    numbers <- TRUE
  }
  
  counts <- makeTable(samples, fact, name_of_sample, name_of_factor)
  check_assumptions <- check_assumptions_count_data(samples, fact)
  if (check_assumptions == FALSE) {
    my_list <- counts
    return(my_list)
  } else {
    ## Mosaic plot
    ## The height  of the box is the same for all boxes in the same row and
    # is equal to the total count in that row.
    #
    # The width of the box is the proportion of individuals in the row which fall into that cell.
    # #Full mosaic plot with all data only if unique number of samples and fact below threshold
    maxfactors <- max(length(unique(samples)), length(unique(fact)))
    threshold <- 6
    
    if (length(unique(samples)) < threshold &
        length(unique(fact)) < threshold) {
      res <- mosaic(
        counts,
        shade = TRUE,
        legend = TRUE,
        # shows pearsons residual
        pop = F
        # ,main = titletext
      )
      
      tab <-
        as.table(ifelse(counts < 0.005 * sum(counts), NA, counts))
      # puts numbers on count
      if (numbers == TRUE) {
        labeling_cells(text = tab, margin = 0)(counts)
      }
    } else {
      #
      ## Elimintate rows and columns distributing less than minperc total number of counts
      rowSum <- rowSums(counts)
      colSum <- colSums(counts)
      total <- sum(counts)
      
      
      countscolumn_row_reduced <- as.table(counts[which(rowSum > minperc * total), which(colSum > minperc * total)])
      
      # check dimensions after reduction: must be a contingency table
      test <- dim(as.table(countscolumn_row_reduced))
      if (is.na(test[2])) {
        countsreduced <- counts
      } else {
        countsreduced <- countscolumn_row_reduced
      }
      res <- mosaic(
        countsreduced,
        shade = TRUE,
        legend = TRUE,
        cex.axis = 50 / maxfactors,
        labeling_args = list(gp_labels = (gpar(
          fontsize = 70 / maxfactors
        ))),
        # main = titletext,
        pop = F
      )
      if (numbers == TRUE) {
        labeling_cells(text = countsreduced, margin = 0)(countsreduced)
      }
    }
    my_list <-
      list("mosaic_stats" = res)
    
    return(my_list)
  }
}




# Helper functions--------------------------------------

# Check for type of samples and fact
type_sample_fact <- function(samples, fact) {
  typesample <- class(samples)
  typefactor <- class(fact)
  listsf <- list("typesample" = typesample, "typefactor" = typefactor)
  return(listsf)
}

# helper function odds ratio
# calculation of odds ratio
odds_ratio <- function(a, b, c, d, alpha, zerocorrect) {
  attr(odds_ratio, "help") <-
    "odds_ratio calculates odds ratio OR=(a/b)/(c/d) and corresponding upper and lower confidence intervalls\n INPUT: a = group 1 positive, c = group 2 positive, b=group 1 non positive, d = group 2 non positive, 1-alpha: confidence level, default alpha=0.05"
  
  # "odds_ratio calculates odds ratio OR=(a/b)/(c/d) and corresponding upper and lower confidence intervalls\n
  # INPUT: a=number of positives in  group 1, c=group 2 positive, b=group 1 non positive, d =group 2 non positive,default alpha=0.05, OR=(a/b)/(c/d)"\n
  # a,b,c,d can be vectors, elementwise calculation
  #
  
  
  if (missing(alpha)) {
    alpha <- 0.05
  }
  if (missing(zerocorrect)) {
    zerocorrect <- TRUE
  }
  # odds ratio:=OR=a/b/(c/d)
  
  # eliminate columns with zeros
  # a=c=0 or b=d 0: no positive or no negative cases in both groups
  # Higgins and Green 2011:
  
  if (zerocorrect == TRUE) {
    # eliminate columns with zeros, if
    # a=c=0 or b=d=0: no positive or no control cases in BOTH groups
    # Higgins and Green 2011:
    doublezero <- which(a == 0 &
                          c == 0 | b == 0 & d == 0, arr.ind = T)
    a[doublezero] <- NaN
    b[doublezero] <- NaN
    c[doublezero] <- NaN
    d[doublezero] <- NaN
    # Where zeros cause problems with computation of effects or standard errors, 0.5 is added to all cells (a, b, c, d)
    singlezero <- which(a == 0 |
                          b == 0 | c == 0 | d == 0, arr.ind = T)
    a[singlezero] <- a[singlezero] + 0.5
    b[singlezero] <- b[singlezero] + 0.5
    c[singlezero] <- c[singlezero] + 0.5
    d[singlezero] <- d[singlezero] + 0.5
  }
  
  oddA <- a / b
  oddB <- c / d
  
  OR <- oddA / oddB
  
  # confidence intervall
  # SE of ln(OR)
  SE <- sqrt(1 / a + 1 / b + 1 / c + 1 / d)
  alpha <- 0.05
  zalph <- qnorm(1 - alpha / 2)
  
  log_low <- log(OR) - zalph * SE
  log_up <- log(OR) + zalph * SE
  
  lowconf <- exp(log_low) # lower confidence
  upconf <- exp(log_up)
  
  output <- rbind(OR, lowconf, upconf, SE)
  my_list <- ("odds_ratio_statistics" <- output)
  return(my_list)
}

# create sorted table
makeTable <- function(samples, fact, samplename, factorname) {
  counts <- data.frame(fact, samples)
  colnames(counts) <- c(factorname, samplename)
  counts2 <- table(counts)
  # sort by column sums
  counts3 <- counts2[, order(colSums(counts2), decreasing = T)]
  # sort by row sums
  counts4 <- counts3[order(rowSums(counts3), decreasing = T), ]
  # remove columnns with all entries zero
  counts4 <- counts4[, colSums(counts4 != 0) > 0]
  
  return(counts4)
}

fisher_chi <- function(counts) {
  # if Cochran requirements for chi2 not given: fisher test is performed
  # if more than 20% of cells have EXPECTED count smaller 5 or one cell has expected count smaller than 1
  #
  
  suppressWarnings(chisq <- chisq.test(counts))
  expected_counts <- chisq$expected
  
  if (any(expected_counts < 1) # at least one cell with expectation value smaller 1
      |
      sum(expected_counts < 5) / length(expected_counts) > 0.2 # more than 20% of cells have expected count smaller 5
      &
      # Fisher Tests breaks down for too large tables
      dim(counts)[2] < 7) {
    # fisher.test
    testFisherChi <- fisher.test(
      counts,
      workspace = 1e9,
      simulate.p.value = T,
      hybrid = F,
      B = 1e5
    )
  } else {
    testFisherChi <- chisq
  }
  
  return(testFisherChi)
}

side_of_nh <- function(alternative) {
  if (alternative == "less") {
    compare <- c(">=")
  } else if (alternative == "greater") {
    compare <- c("<=")
  } else {
    compare <- c("equals")
  }
  return(compare)
}

create_two_samples_vector <- function(samples, fact) {
  # Creates column vector built out of two samples
  # samples all in one column and sorted
  levels <- unique(sort(fact))
  # two levels
  if (length(levels) > 2) {
    return(warning(
      "warning: create_two_samples_vector: only two level input allowed"
    ))
  } else {
    samples1 <- samples[fact == levels[1]]
    samples1 <- samples1[!is.na(samples1)]
    if (length(samples1) == 0) {
      return(warning("each group needs at least one entry"))
    } else {
      samples2 <- samples[fact == levels[2]]
      samples2 <- samples2[!is.na(samples2)]
      if (length(samples2) == 0) {
        return(warning("each group needs at least one entry"))
      } else {
        x <- c(samples1, samples2)
        my_list <- list(
          "sample1" = samples1,
          "sample2" = samples2,
          "sample1and2" = x
        )
        return(my_list)
      }
    }
  }
}

calc_min_max_of_y_axis <- function(samples,
                                   lowerExtramargin,
                                   upperExtramargin) {
  maximum <- max(samples, na.rm = T)
  minimum <- min(samples, na.rm = T)
  spread <- maximum - minimum
  min_y_axis <- minimum - lowerExtramargin * spread
  max_y_axis <- maximum + upperExtramargin * spread
  return(list(min_y_axis, max_y_axis))
}


check_assumptions_shapiro <- function(x) {
  x <- sort(x[complete.cases(x)])
  n <- length(x)
  rng <- x[n] - x[1L] # 1L is integer
  checkSize <- !(is.na(n) || n < 3L || n > 5000L) # FALSE or TRUE
  if (checkSize == FALSE) {
    warning("sample size must be between 3 and 5000")
    return(FALSE)
  }
  if (rng == 0) {
    warning("all 'x' values are identical")
    return(FALSE)
  }
  
  
  
  return(TRUE)
}

check_assumption_shapiro_size_range_two_samples <- function(x1, x2) {
  boolean1 <- check_assumptions_shapiro(x1)
  boolean2 <- check_assumptions_shapiro(x2)
  if (boolean1 == TRUE & boolean2 == TRUE) {
    return(TRUE)
  } else {
    return(FALSE)
  }
}


check_assumptions_count_data <- function(samples, fact) {
  counts <- table(samples, fact)
  sr <- rowSums(counts)
  sc <- colSums(counts)
  counts <- counts[sr > 0, sc > 0, drop = FALSE]
  nr <- as.integer(nrow(counts))
  nc <- as.integer(ncol(counts))
  if (is.null(dim(counts))) {
    warning("no entries in count table ")
    return(FALSE)
  } else if (is.na(nr) || is.na(nc) || is.na(nr * nc)) {
    warning("invalid nrow  or ncol in count data ", domain = NA)
    return(FALSE)
  } else if (nr <= 1L) {
    warning("need 2 or more non-zero row marginals")
    return(FALSE)
  } else if (nc <= 1L) {
    warning("need 2 or more non-zero column marginals")
    return(FALSE)
  } else {
    return(TRUE)
  }
}

sig_diffs_nongauss <- function(samples, fact, conf.level = conf.level) {
  # function to produce a table similar to that produced for TukeyHSD,
  # but for non-normally distributed data
  # calculate p values for each data classification based on pairwise.wilcox.test
  
  if (missing(conf.level)) {
    conf.level <- 0.95
  }
  
  ufactor <- levels(fact)
  pwt <- pairwise.wilcox.test(samples, fact, conf.level = conf.level)
  factormeans <- matrix(0, length(ufactor), 1)
  for (ii in 1:length(ufactor)) {
    pos <- which(fact == ufactor[ii])
    
    factormeans[ii] <- mean(samples[pos])
  }
  
  # make a matrix with a row for every possible combination of
  # 2 data classifications and populate it with the calculated
  # p values
  
  xcomb <- combn(length(ufactor), 2)
  tukeylike <- matrix(0, ncol(xcomb), 4)
  colnames(tukeylike) <- c("diff", "lwr", "upr", "p adj")
  tukeynames <- vector("list", ncol(xcomb))
  for (ii in 1:ncol(xcomb)) {
    tukeynames[ii] <-
      paste(ufactor[xcomb[2, ii]], "-", ufactor[xcomb[1, ii]], sep = "")
    
    p_value <- pwt$p.value[xcomb[2, ii] - 1, xcomb[1, ii]]
    
    if (is.na(p_value)) {
      p_value <- 1
    }
    
    tukeylike[ii, 4] <- p_value
    tukeylike[ii, 1] <- NA
    tukeylike[ii, 2] <- NA
    tukeylike[ii, 3] <- NA
  }
  rownames(tukeylike) <- tukeynames
  
  # re-format the table slightly so it is the same as that produced
  # by TukeyHSD and output
  
  tukeylike2 <- list(tukeylike)
  
  return(tukeylike2)
}


conf_band <- function(x, reg, conf.level = conf.level, up) {
  # reg: result of linear regression lm
  # up: fact plus or minus
  if (missing(conf.level)) {
    conf.level <- 0.95
  }
  
  if (missing(up)) {
    up <- 1
  }
  alpha <- 1 - conf.level
  a <- reg$coefficients[2] # slope
  b <- reg$coefficients[1] # constant
  md <- x - mean(x)
  
  result <- x # initialization
  
  # formula standard error of the regression line at point x:
  # https://stats.stackexchange.com/questions/101318/understanding-shape-and-calculation-of-confidence-bands-in-linear-regression
  # See also statistics script page 124: konfidenzint4erval
  
  # http://www.sthda.com/english/articles/40-regression-analysis/166-predict-in-r-model-predictions-and-confidence-intervals/
  for (i in 1:length(x)) {
    result[i] <- a * x[i] + b +
      up * qt(1 - alpha / 2, length(x) - 2) *
      # Standard error of the estimate
      sqrt(sum(reg$resid * reg$resid) / (length(x) - 2)) *
      #
      sqrt(1 / (length(x)) + md[i]^2 / sum(md * md))
  }
  return(result)
}

progn_band <- function(x, reg, conf.level, up) {
  if (missing(conf.level)) {
    conf.level <- 0.95
  }
  alpha <- 1 - conf.level
  if (missing(up)) {
    up <- 1
  }
  a <- reg$coefficients[2]
  b <- reg$coefficients[1]
  md <- x - mean(x)
  
  result <- x
  
  for (i in 1:length(x)) {
    result[i] <- a * x[i] + b +
      up * qt(1 - alpha / 2, length(x) - 2) *
      # Standard error of the estimate
      sqrt(sum(reg$resid * reg$resid) / (length(x) - 2)) *
      #
      sqrt(1 + 1 / (length(x)) + md[i]^2 / sum(md * md))
  }
  return(result)
}
calculate_comparepvalue <- function(p_value, conf.level) {
  if (p_value < 1 - conf.level) {
    comparepvalue <- "<"
  } else {
    comparepvalue <- ">"
  }
  return(comparepvalue)
}

# Check for normality with Shapiro-Wilk test without visualization----
test_norm <- function(x) {
  # Remove NA from x
  x <- x[!is.na(x)]
  #  KS = ks.test(x, pnorm, mean(x), sd(x))
  shapiro_wilk_test <- shapiro.test(x)
  # my_list = list("Kolmogorov-Smirnoff" = KS, "Shapiro" =SH)
  return(shapiro_wilk_test)
}

# Check length of distributions for t-test----
check_assumption_sample_size_t_test <- function(x1, x2, minimum_size) {
  # x1 sample 1
  # x2 sample 2
  # minimum_size:return TRUE if length> minimum_size
  if (length(x1) > minimum_size & length(x2) > minimum_size) {
    return(TRUE)
  } else {
    return(FALSE)
  }
}

# Define color scheme-----

#' \code{colorscheme(x)} selects color scheme of graphical output. Function parameter NULL lists all available color schemes, 1 a color tuple of green and blue
#' 2 a color tuple of dark green and turquoi, 3 a colorplaette as defined by RcolorBrewer

#'
#' @param colorcode selects color scheme. parameters NULL: list of all available color schemes, 1: colortuple, 2, colortuple2, 3, ColorPalette
#' @return selected color scheme, colors are given with their Hex Code #RRGGBB names
#' @noRd

colorscheme <- function(colorcode = NULL) {
  browserLightGreen <- "#B8E0B8" # matched part group0
  browserLightBlue <- "#B3D1EF" # matched part group1
  browserLightTurquois <- "#B3E1EF" # light turquois
  browserDarkGreen <- "#5CB85C" # dark green
  colortuple <- c(browserLightGreen, browserLightBlue)
  colortuple2 <- c(browserDarkGreen, browserLightTurquois)
  # from package RColorBrewer Set 3
  ColorPalette <- c(
    "#8DD3C7",
    "#FFFFB3",
    "#BEBADA",
    "#FB8072",
    "#80B1D3",
    "#FDB462",
    "#B3DE69",
    "#FCCDE5",
    "#D9D9D9",
    "#BC80BD",
    "#CCEBC5",
    "#FFED6F"
  )
  
  
  my_list <- list(
    "colortuple" = colortuple,
    "colortuple2" = colortuple2,
    "ColorPalette" = ColorPalette
  )
  
  if (is.null(colorcode)) {
    return(my_list)
  } else if (colorcode == 1) {
    return(colortuple)
  } else if (colorcode == 2) {
    return(colortuple2)
  } else if (colorcode == 3) {
    return(ColorPalette)
  } else {
    message("Choose valid parameter: NULL, 1,2 or 3")
  }
}

resetPar <- function() {
  dev.new
  while (!is.null(dev.list()))
    dev.off() # restores to default values
  oldpar <- par(no.readonly = TRUE)
  return(oldpar)
}
Any scripts or data that you put into this service are public.
visStatistics documentation built on June 8, 2025, 1:58 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
visStatistics
Automated Selection and Visualisation of Statistical Hypothesis Tests

R/test_and_visuals.R
In visStatistics: Automated Selection and Visualisation of Statistical Hypothesis Tests

Try the visStatistics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

visStatistics Automated Selection and Visualisation of Statistical Hypothesis Tests

R/test_and_visuals.R In visStatistics: Automated Selection and Visualisation of Statistical Hypothesis Tests

Try the visStatistics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

visStatistics
Automated Selection and Visualisation of Statistical Hypothesis Tests

R/test_and_visuals.R
In visStatistics: Automated Selection and Visualisation of Statistical Hypothesis Tests