R/function.Summary_Measures.R
In EQUALSTATS: Algorithm Driven Statistical Analysis for Researchers without Coding Skills

Documented in function.Summary_Measures

function.Summary_Measures <- function(Predefined_lists, rv){
  # Lists
  plan <- {cbind.data.frame(
    analysis_number = paste0("AN", formatC((length(rv$plan) + 1), width = 4, format = "d", flag = 0)),
    first_menu_choice = rv$first_menu_choice,
    second_menu_choice = rv$second_menu_choice,
    entry_1 = paste0(rv$entry[[1]], collapse = "%_%"),
    entry_2 = paste0(rv$entry[[2]], collapse = "%_%"),
    entry_3 = paste0(rv$entry[[3]], collapse = "%_%"),
    entry_4 = paste0(rv$entry[[4]], collapse = "%_%"),
    entry_5 = paste0(rv$entry[[5]], collapse = "%_%"),
    entry_6 = paste0(rv$entry[[6]], collapse = "%_%"),
    entry_7 = paste0(rv$entry[[7]], collapse = "%_%"),
    entry_8 = paste0(rv$entry[[8]], collapse = "%_%"),
    entry_9 = paste0(rv$entry[[9]], collapse = "%_%"),
    entry_10 = paste0(rv$entry[[10]], collapse = "%_%"),
    entry_11 = paste0(rv$entry[[11]], collapse = "%_%"),
    entry_12 = paste0(rv$entry[[12]], collapse = "%_%"),
    entry_13 = paste0(rv$entry[[13]], collapse = "%_%"),
    entry_14 = paste0(rv$entry[[14]], collapse = "%_%"),
    entry_15 = paste0(rv$entry[[15]], collapse = "%_%"),
    same_row_different_row = ""
  )}
  selections <- {paste0(
    '<b>entry_1: </b>', paste0(rv$entry[[1]], collapse = "; "), '<br>',
    '<b>entry_2: </b>', paste0(rv$entry[[2]], collapse = "; "), '<br>',
    '<b>entry_3: </b>', paste0(rv$entry[[3]], collapse = "; "), '<br>'
  )}
  code <- {paste0(
    '# AN', formatC((length(rv$plan) + 1), width = 4, format = "d", flag = 0), '\n',
    'rv$first_menu_choice <- "', rv$first_menu_choice, '"\n',
    'rv$second_menu_choice <- ', ifelse(is.na(rv$second_menu_choice),NA,paste0('"',rv$second_menu_choice, '"')), '\n',
    'rv$entry[[1]] <- ', ifelse(length(rv$entry[[1]]) > 1,
                                paste0('c("', paste0(rv$entry[[1]], collapse = '", "'), '")'),
                                paste0('"',rv$entry[[1]],'"')), '\n',
    'rv$entry[[2]] <- ', ifelse(length(rv$entry[[2]]) > 1,
                                paste0('c("', paste0(rv$entry[[2]], collapse = '", "'), '")'),
                                paste0('"',rv$entry[[2]],'"')), '\n',
    'rv$entry[[3]] <- ', ifelse(length(rv$entry[[3]]) > 1,
                                paste0('c("', paste0(rv$entry[[3]], collapse = '", "'), '")'),
                                paste0('"',rv$entry[[3]],'"')), '\n',
    'AN', formatC((length(rv$plan) + 1), width = 4, format = "d", flag = 0), '_results <- function.',rv$first_menu_choice,'(Predefined_lists, rv)', '\n',
    if(length(rv$plan) == 0){
      'if (TRUE %in% (AN0001_results$plots_list != "")) {invisible(file.rename(AN0001_results$plots_list, paste0(AN0001_results$plots_list,"_copy")))}
'
    } else {
      paste0(
        'AN', formatC((length(rv$plan) + 1), width = 4, format = "d", flag = 0),'_results$results[2,1] <- "AN',formatC((length(rv$plan) + 1), width = 4, format = "d", flag = 0),'"', '\n',
        'if (TRUE %in% (AN', formatC((length(rv$plan) + 1), width = 4, format = "d", flag = 0),'_results$plots_list != "")) {invisible(file.rename(AN', formatC((length(rv$plan) + 1), width = 4, format = "d", flag = 0),'_results$plots_list, str_replace_all(AN', formatC((length(rv$plan) + 1), width = 4, format = "d", flag = 0),'_results$plots_list, "/AN0001_", "/AN', formatC((length(rv$plan) + 1), width = 4, format = "d", flag = 0),'_")))}', '\n')
    },
    'write.table(x = AN', formatC((length(rv$plan) + 1), width = 4, format = "d", flag = 0),'_results$results, append = TRUE, file = paste0(rv$StorageFolder, "/results.csv"), sep = ",", row.names = FALSE, col.names = FALSE, na = "", quote = FALSE, qmethod = "double")', '\n'
  )}
  report_preferences <- {list(
    categorical = list(
      mandatory_1 = c("Included rows"),
      mandatory_2 = c(paste0(rv$entry[[1]], ": Categories")),
      EQUAL_STATS_choice = c("Total observations", "Missing observations", "Available observations", "Proportion", "Proportion - LCI", "Proportion - UCI")
    ),
    quantitative = list(
      EQUAL_STATS_choice_1 = c("Total observations", "Missing observations", "Available observations"),
      EQUAL_STATS_choice_2 = c("Min.", "Max."),
      EQUAL_STATS_choice_4 = c("Kurtosis", "Kurtosis - LCI", "Kurtosis - UCI", "Skewness", "Skewness - LCI", "Skewness - UCI", "Shapiro-Wilk W-statistic", "Shapiro-Wilk p-value", "Kolmogrov-Smirnov D-statistic", "Kolmogrov-Smirnov p-value")
    )
  )}
  # Functions for summary
  func.categorical <- function(variable, variable_name) {
    summary <- list()
    summary$total <- length(variable)
    summary$missing <- length(variable[is.na(variable)])
    if (summary$total > summary$missing) {
      if(nlevels(variable) == 2) {
        summary$summary <- data.frame(sapply(
          1:nlevels(variable), function(x) {
            as.numeric(BinomCI(length(variable[(!is.na(variable)) & variable == levels(variable)[x]]), length(variable[! is.na(variable)]), conf.level = 0.95))
          }
        ), check.names = FALSE)
        colnames(summary$summary) <- levels(variable)
        row.names(summary$summary) <- c("Proportion", "Proportion - LCI", "Proportion - UCI")
      } else {
        summary$summary <- data.frame(t(MultinomCI(table(variable), conf.level = 0.95)),row.names = c("Proportion", "Proportion - LCI", "Proportion - UCI"), check.names = FALSE)
      }
    } else {
      summary$summary <- data.frame(
        `All levels` = c(NA, NA, NA),
        check.names = FALSE,
        row.names = c("Proportion", "Proportion - LCI", "Proportion - UCI"),
        check.rows = FALSE
      )
    }
    results <- data.frame(
      row.names = c("Total observations", "Missing observations", "Available observations"),
      c(summary$total, summary$missing, (summary$total - summary$missing)),
      check.names = FALSE
    )
    colnames(results) <- variable_name
    results <- function.rbind_different_column_numbers(list(results, summary$summary))
    return(results)
  }
  func.quantitative <- function(variable) {
    summary <- list()
    summary$total <- length(variable)
    summary$missing <- length(variable[is.na(variable)])
    if ((summary$total - summary$missing) > 1) {
      summary$summary <- summary(variable[!is.na(variable)])
      summary$sd <- sd(variable[!is.na(variable)])
      summary$mean_ci <- suppressWarnings(try(MeanCI(variable[!is.na(variable)], conf.level = 0.95)[2:3], silent = TRUE))
      if (length(summary$mean_ci) < 2) {
        summary$mean_ci <- c(NA, NA)
      }
      summary$median_ci <- suppressWarnings(try(MedianCI(variable[!is.na(variable)], conf.level = 0.95)[2:3], silent = TRUE))
      if (length(summary$median_ci) < 2) {
        summary$median_ci <- c(NA, NA)
      }
      summary$kurtosis <- suppressWarnings(try(Kurt(variable[!is.na(variable)], conf.level = 0.95), silent = TRUE))
      if (str_detect(summary$kurtosis[[1]][1], "Error")) {
        summary$kurtosis <- c(NA, NA, NA)
      } else {
        summary$kurtosis <- summary$kurtosis + 3 # kurtosis provided is excess kurtosis. To get the actual kurtosis add 3
      }
      summary$skewness <- suppressWarnings(try(Skew(variable[!is.na(variable)], conf.level = 0.95), silent = TRUE))
      if (str_detect(summary$skewness[[1]][1], "Error")) {
        summary$skewness <- c(NA, NA, NA)
      }
      summary$shapiro <- suppressWarnings(try(shapiro.test(variable[!is.na(variable)]), silent = TRUE))
      if (str_detect(summary$shapiro[[1]][1], "Error")) {
        summary$shapiro <- c(NA, NA)
      } else {
        summary$shapiro <- c(summary$shapiro$statistic, summary$shapiro$p.value)
      }
      summary$ks <- suppressWarnings(ks.test(variable[!is.na(variable)],
                                             "pnorm",
                                             mean=mean(variable[!is.na(variable)]),
                                             sd=sd(variable[!is.na(variable)])
      ))
      summary$ks <- c(summary$ks$statistic, summary$ks$p.value)
    } else {
      if ((summary$total == summary$missing)) {
        summary$summary <- rep(NA, 6)
      } else {
        summary$summary <- c(variable[! is.na(variable)], NA, variable[! is.na(variable)], variable[! is.na(variable)], NA, variable[! is.na(variable)])
      }
      names(summary$summary) <- c("Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.")
      summary$sd <- NA
      summary$mean_ci <- c(NA, NA)
      summary$median_ci <- c(NA, NA)
      summary$kurtosis <- c(NA, NA, NA)
      summary$skewness <- c(NA, NA, NA)
      summary$shapiro <- c(NA, NA)
      summary$ks <- c(NA, NA)
    }
    results <- data.frame(
      row.names = c("Total observations", "Missing observations", "Available observations", names(summary$summary),
                    "Standard deviation","Mean - LCI", "Mean - UCI", "Median - LCI", "Median - UCI",
                    "Kurtosis", "Kurtosis - LCI", "Kurtosis - UCI", "Skewness", "Skewness - LCI", "Skewness - UCI",
                    "Shapiro-Wilk W-statistic", "Shapiro-Wilk p-value", "Kolmogrov-Smirnov D-statistic", "Kolmogrov-Smirnov p-value"),
      summary = c(summary$total, summary$missing, (summary$total - summary$missing), as.numeric(summary$summary),
                  summary$sd, as.numeric(summary$mean_ci),as.numeric(summary$median_ci),
                  as.numeric(summary$kurtosis), as.numeric(summary$skewness),
                  as.numeric(summary$shapiro), as.numeric(summary$ks)
      )
    )
    return(results)
  }
  # Some generic processing of data if there is a second variable
  if (rv$entry[[2]] != "") {
    eachgroup <- list()
    data <- rv$import_data$data[,c(rv$entry[[1]], rv$entry[[2]])]
    factors_in_variable_2 <- levels(data[,2])
    is.ordinal <- (rv$entry[[2]] %in%  rv$import_data$ordinal)
    if (TRUE %in% (is.na(data[,2]))) {
      factors_in_variable_2 <- c(levels(data[,2]), "missing")
      data[,2] <- as.character(data[,2])
      data[,2][is.na(data[,2])] <- "missing"
      data[,2] <- factor(data[,2], levels = factors_in_variable_2, ordered = is.ordinal)
    }
  }
  # Find if the variable is categorical or quantitative
  if (rv$entry[[1]] %in% rv$import_data$categorical) {
    overall <- func.categorical(variable = rv$import_data$data[,rv$entry[[1]]], variable_name = "All")
    if (rv$entry[[2]] != "") {
      for (i in 1:nlevels(data[,2])) {
        eachgroup[[i]] <- func.categorical(variable = data[data[,2] == levels(data[,2])[i],1],
                                           variable_name = paste0(rv$entry[[2]], " = ", levels(data[,2])[i]))
      }
      results <- cbind.data.frame(
        overall,
        do.call(cbind.data.frame, eachgroup)
      )
    } else {
      results <- overall
    }
    row.names(results)[row.names(results) == "Column_names_1"] <- "Included rows"
    row.names(results)[row.names(results) == "Column_names_2"] <- paste0(rv$entry[[1]], ": Categories")
    # Keep only the necessary rows
    if (!("EQUAL-STATS choice" %in% rv$entry[[3]])){
      rows_to_keep <- as.vector(c(report_preferences$categorical$mandatory_1,
                                  if("Total observations" %in% rv$entry[[3]]){"Total observations"},
                                  if("Missing observations" %in% rv$entry[[3]]){"Missing observations"},
                                  if("Available observations" %in% rv$entry[[3]]){"Available observations"},
                                  report_preferences$categorical$mandatory_2,
                                  if("Proportions" %in% rv$entry[[3]]){"Proportion"},
                                  if("Confidence intervals for proportions" %in% rv$entry[[3]]){c("Proportion - LCI", "Proportion - UCI")}
      ))
      results <- results[rows_to_keep,]
    }
  } else {
    overall <- func.quantitative(variable = rv$import_data$data[,rv$entry[[1]]])
    colnames(overall) <- "All"
    if (rv$entry[[2]] != "") {
      for (i in 1:nlevels(data[,2])) {
        eachgroup[[i]] <- func.quantitative(variable = data[data[,2] == levels(data[,2])[i],1])
        colnames(eachgroup[[i]]) <- paste0(rv$entry[[2]], " = ", levels(data[,2])[i])
      }
      results <- cbind.data.frame(
        overall,
        do.call(cbind.data.frame, eachgroup)
      )
    } else {
      results <- overall
    }
    # Keep only the necessary rows
    if (!("EQUAL-STATS choice" %in% rv$entry[[3]])){
      rows_to_keep <- as.vector(c(
        if("Total observations" %in% rv$entry[[3]]){"Total observations"},
        if("Missing observations" %in% rv$entry[[3]]){"Missing observations"},
        if("Available observations" %in% rv$entry[[3]]){"Available observations"},
        if("Mean" %in% rv$entry[[3]]){"Mean"},
        if("Standard deviation" %in% rv$entry[[3]]){"Standard deviation"},
        if("Confidence intervals for mean" %in% rv$entry[[3]]){c("Mean - LCI", "Mean - UCI")},
        if("Median" %in% rv$entry[[3]]){"Median"},
        if("Quartiles" %in% rv$entry[[3]]){c("1st Qu.", "3rd Qu.")},
        if("Confidence intervals for median" %in% rv$entry[[3]]){c("Median - LCI", "Median - UCI")},
        if("Minimum" %in% rv$entry[[3]]){"Min."},
        if("Maximum" %in% rv$entry[[3]]){"Max."},
        if("Kurtosis" %in% rv$entry[[3]]){c("Kurtosis", "Kurtosis - LCI", "Kurtosis - UCI")},
        if("Skewness" %in% rv$entry[[3]]){c("Skewness", "Skewness - LCI", "Skewness - UCI")},
        if("Shapiro-Wilk test for normality" %in% rv$entry[[3]]){c("Shapiro-Wilk W-statistic", "Shapiro-Wilk p-value")},
        if("Kolmogrov-Smirnov test for normality" %in% rv$entry[[3]]){c("Kolmogrov-Smirnov D-statistic", "Kolmogrov-Smirnov p-value")}
      ))
    } else {
      # Test normality through skewness, kurtosis, Shapiro-Wilk test, and Kolmogrov-Smirnov test
      # Do this only for the main observations and not each subgroup
      # Skewness - the confidence intervals do not overlap (-0.5) to 0.5
      skewness_CI <- results[c("Skewness - LCI", "Skewness - UCI"),"All"]
      if ((is.na(skewness_CI[1])) | (is.na(skewness_CI[2]))) {
        skewness <- "Not possible to determine"
      } else if ((skewness_CI[2] < (-0.5)) | (skewness_CI[1] > 0.5)) {
        skewness <- "Non-normal"
      } else {
        skewness <- "No evidence that it is non-normal"
      }
      kurtosis_CI <- results[c("Kurtosis - LCI", "Kurtosis - UCI"),"All"]
      # Kurtosis (this is not excess kurtosis) does not overlap 3 (and entirely to the right of 3)
      if ((is.na(kurtosis_CI[1])) | (is.na(kurtosis_CI[2]))) {
        kurtosis <- "Not possible to determine"
      } else if (kurtosis_CI[1] > 3) {
        kurtosis <- "Non-normal"
      } else {
        kurtosis <- "No evidence that it is non-normal"
      }
      # Shapiro-Wilk test: This may not work for more than 5000 observations
      if (is.na(results["Shapiro-Wilk p-value","All"])) {
        if (results["Available observations", "All"] > 5000) {
          shapiro.wilk <- "No evidence that it is non-normal"
        } else {
          shapiro.wilk <- "Not possible to determine"
        }
      } else if (results["Shapiro-Wilk p-value","All"] <= 0.10) {
        shapiro.wilk <- "Non-normal"
      } else {
        shapiro.wilk <- "No evidence that it is non-normal"
      }
      # Shapiro-Wilk test: This may not work for more than 5000 observations
      if (is.na(results["Kolmogrov-Smirnov p-value","All"])) {
        ks <- "Not possible to determine"
      } else if (results["Kolmogrov-Smirnov p-value","All"] <= 0.10) {
        ks <- "Non-normal"
      } else {
        ks <- "No evidence that it is non-normal"
      }
      normality_results <- c(skewness, kurtosis, shapiro.wilk, ks)
      table(normality_results)
      # Now at least one is "Non-normal" or at least three have "Not possible to determine"
      if (
        ("Non-normal" %in% normality_results) |
        (length(normality_results[normality_results == "Not possible to determine"]) > 2)
      ) {
        distribution <- "Non-normal"
      } else {
        distribution <- "No evidence that it is non-normal"
      }
      rows_to_keep <- as.vector(c(
        report_preferences$quantitative$EQUAL_STATS_choice_1,
        report_preferences$quantitative$EQUAL_STATS_choice_2,
        if (distribution == "Non-normal") {
          c("Median", "1st Qu.", "3rd Qu.")
        } else {
          c("Mean", "Standard deviation")
        },
        report_preferences$quantitative$EQUAL_STATS_choice_4
      ))
    }
    if (ncol(results) == 1) {
      results <- data.frame(All = results[rows_to_keep,], row.names = rows_to_keep)
    } else {
      results <- results[rows_to_keep,]
    }
  }
  results <- rbind.data.frame(
    c(paste0("AN",formatC((length(rv$plan) + 1), width = 4, format = "d", flag = 0)), rep(NA, (ncol(results)-1))),
    c(paste0(Predefined_lists$main_menu[Predefined_lists$menu_short ==  rv$first_menu_choice], ifelse(! is.na(rv$second_menu_choice), paste0(": ", rv$second_menu_choice), "")), rep(NA, (ncol(results)-1))),
    c("Successful", rep(NA, (ncol(results)-1))),
    c(rv$entry[[1]], rep(NA, (ncol(results)-1))),
    if (rv$entry[[1]] %in% rv$import_data$quantitative) {
      colnames(results)
    },
    results
  )
  row.names(results)[1:4] <- c("Analysis number" , "Analysis type" , "Analysis outcome" , "Variable name")
  if ((rv$entry[[1]] %in% rv$import_data$quantitative)) {row.names(results)[5] <- "Included rows"}
  results <- cbind.data.frame(
    parameters = row.names(results),
    results
  )
  results$parameters[substr(results$parameters,1, 7) == "Header_"] <- ""
  results$parameters[substr(results$parameters,1, 13) == "Column_names_"] <- ""
  results <- t(results)
  # For display in R-shiny app some modifications are necessary
  results_display <- data.frame(results, check.names = FALSE, check.rows = FALSE)
  results_display <- results_display[2:nrow(results_display),]
  numeric_rows <- setdiff(colnames(results_display), c("Analysis number", "Analysis outcome", "Analysis type","Variable name", "Included rows", report_preferences$categorical$mandatory_2))
  results_display[,numeric_rows] <- sapply(results_display[,numeric_rows], as.numeric)
  # List of plots
  # Nothing for this function
  plots_list <- ""
  plots_list_display <- ""
  analysis_outcome <- "Successful"
  display_table <- TRUE
  display_plot <- FALSE
  function_output <- list(analysis_outcome = analysis_outcome, plan = plan, code = code, results = results, results_display = results_display, plots_list = plots_list, plots_list_display = plots_list_display, selections = selections, display_table = display_table, display_plot = display_plot)
  return(function_output)
}
Any scripts or data that you put into this service are public.
EQUALSTATS documentation built on Aug. 22, 2025, 5:10 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
EQUALSTATS
Algorithm Driven Statistical Analysis for Researchers without Coding Skills

R/function.Summary_Measures.R
In EQUALSTATS: Algorithm Driven Statistical Analysis for Researchers without Coding Skills

Defines functions function.Summary_Measures

Documented in function.Summary_Measures

Try the EQUALSTATS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EQUALSTATS Algorithm Driven Statistical Analysis for Researchers without Coding Skills

R/function.Summary_Measures.R In EQUALSTATS: Algorithm Driven Statistical Analysis for Researchers without Coding Skills

Defines functions function.Summary_Measures

Documented in function.Summary_Measures

Try the EQUALSTATS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EQUALSTATS
Algorithm Driven Statistical Analysis for Researchers without Coding Skills

R/function.Summary_Measures.R
In EQUALSTATS: Algorithm Driven Statistical Analysis for Researchers without Coding Skills