DBERlibR: DBERlibR

Documented in independentsamples itemanalysis loadpackages onewayancova pairedsamples twowayrepeatedanova

###############################################################################
#' Load Dependencies
#'
#' This function downloads, installs, and loads all dependencies required to use the functions in this package.
#'
#' @export

loadpackages <- function() {

if (!require(tidyverse)) install.packages('tidyverse')
library(tidyverse)
if (!require(readr)) install.packages('readr')
library(readr)
if (!require(dplyr)) install.packages('dplyr')
library(dplyr)
if (!require(broom)) install.packages('broom')
library(broom)
if (!require(car)) install.packages('car')
library(car)
if (!require(corrplot)) install.packages('corrplot')
library(corrplot)
if (!require(ggplot2)) install.packages('ggplot2')
library(ggplot2)
if (!require(ggpubr)) install.packages('ggpubr')
library(ggpubr)
if (!require(seriation)) install.packages('seriation')
library(seriation)
if (!require(PerformanceAnalytics)) install.packages('PerformanceAnalytics')
library(PerformanceAnalytics)
if (!require(naniar)) install.packages('naniar')
library(naniar)
if (!require(reshape)) install.packages('reshape')
library(reshape)
if (!require(openxlsx)) install.packages('openxlsx')
library(openxlsx)
if (!require(ggrepel)) install.packages('ggrepel')
library(ggrepel)
if (!require(rstatix)) install.packages('rstatix')
library(rstatix)
if (!require(emmeans)) install.packages('emmeans')
library(emmeans)
if (!require(stargazer)) install.packages('stargazer')
library(stargazer)
if (!require(psych)) install.packages('psych')
library(psych)

}

###############################################################################
#' Item Analysis
#'
#' This function automatically reads and cleans the data (e.g., converting missing values to "0"), calculates difficulty and discriminant scores, and exports the results to an Excel file, which is saved in the working directory. The Excel file name follows the data file name used for item analysis; for example, if you've input "data_treat_pre.csv" in the function, then the output Excel file name contains "treat_pre." The function also generates plots in the jpeg file format to visualize the results; the jpeg file name follows the data file name as well so that you can easily find the results of the item analysis you've just run. Find the output files of which name begins with "itemanalysis" instantly generated by this function in the working directory.
#'
#' @import tidyverse
#' @import readr
#' @importFrom dplyr select
#' @importFrom dplyr summarize
#' @import broom
#' @import ggplot2
#' @import corrplot
#' @import ggpubr
#' @import car
#' @import seriation
#' @import PerformanceAnalytics
#' @import naniar
#' @import reshape
#' @import openxlsx
#' @import ggrepel
#' @import rstatix
#' @import emmeans
#' @import stargazer
#' @importFrom psych describe
#' @importFrom psych describeBy
#'
#' @param csv_data This function requires a csv data file name: "data_treat_pre.csv", "data_treat_post.csv", "data_ctrl_pre.csv", or "data_ctrl_post.csv".
#'
#' @export
#'

itemanalysis <- function(csv_data) {

options(warn=-1)

  # Selecting types
  without_any_punc_filename <- gsub("[[:punct:]]", " ",tools::file_path_sans_ext(csv_data))
  if (grepl("\\bpre\\b", without_any_punc_filename, ignore.case=TRUE) ) {
    file_name="pre"
  } else if (grepl("\\bpost\\b", without_any_punc_filename, ignore.case=TRUE)) {
    file_name="post"
  } else { print("unavailable filename with pre and post")}

  # Selecting types
  if (grepl("\\bctrl\\b", without_any_punc_filename, ignore.case=TRUE) ) {
    group_name="ctrl"
  } else if (grepl("\\btreat\\b", without_any_punc_filename, ignore.case=TRUE) ) {
    group_name="treat"
  } else {
    print("unavailable filename with ctrl and treat ")}

  # Reading
  data_original <- read_csv(csv_data,col_types = cols())

  # Deleting students with too many skipped answers-----------------
  nrow_all <- nrow(data_original)
  n <- as.numeric(length(data_original[,-1]))
  data_original <- data_original %>% mutate(m_rate = round(rowSums(is.na(data_original))/n,3))

  cat("", sep="\n")
  message("Skipped answers will be treated as incorrect in this package, and too many skipped answers may skew the results of data analysis. By default, students with more than 15% of skipped answers will be excluded from data analysis to prevent skewed results. If you don't agree with this rate of 15%, you can provide a different rate below. If you're fine with 15%, just hit the 'Enter' key (i.e., no input) when you see a prompt 'Please enter an alternative rate...: ' below. If you want to apply an alternative rate, please put it below in a decimal format like 0.2 for 20%")
  cat("", sep="\n")
  m_rate_default <- readline(prompt="Please enter an alternative rate in a decimal format (e.g., 0.1 for 10%, 0.25 for 25%): ")

  if (m_rate_default > 0) {
    data_original <- subset(data_original, m_rate < m_rate_default)
    nrow_subset <- nrow(data_original)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."), sep="\n")
    } else {
      cat("", sep="\n")
      cat(paste0("The number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."), sep="\n")
    }
  } else {
    data_original <- subset(data_original, m_rate < 0.15)
    nrow_subset <- nrow(data_original)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."), sep="\n")
    } else {
      cat("", sep="\n")
      cat(paste0("The number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."), sep="\n")
    }
  }
  #----------------------------------------------------------------

  # Delete the column of "m_rate" to exclude from the analysis down the road.
  data_original$m_rate=NULL
  cat("", sep="\n")

  # Calculate average scores
  data_original[is.na(data_original)]= 0
  colnames(data_original) <- paste( colnames(data_original), file_name, sep = "_")
  data_original <- data_original %>% mutate(avg_score = rowMeans(data_original[,-1]))

  # ggplots: difficulty index
  difficulty_data <- data_original[,c(-1)] %>% colMeans() %>% t() %>% as.data.frame() %>% t()
  difficulty_data <- reshape::melt(round(difficulty_data,2), id.vars=c("Q_id"))  %>% dplyr::select(-X2)
  colnames(difficulty_data) = c("q.number","difficulty_index")
  plot_difficulty<- ggplot(difficulty_data, aes(x= difficulty_index , y= reorder(q.number,difficulty_index))) +
    geom_point(alpha=0.9) +
    geom_vline(xintercept = 0.2, color ="red")+
    geom_hline(yintercept = difficulty_data$q.number[length(data_original)-1],color="blue") +
    ggtitle("Difficulty Plot")+
    xlab("Proportion")+ ylab("Question Item Number\n\n\n\n\n") +theme_minimal() + geom_text_repel(aes(label = round(difficulty_index,2)))

  jpeg(paste0("itemanalysis_difficulty_plot_", group_name,"_",file_name,".jpeg"))
  print(plot_difficulty)
  dev.off()

  # Create data subset: difficulty index lower than 0.2
  toodifficultitems <- subset(difficulty_data, difficulty_index < 0.2)
  toodifficultitems_nrow <- nrow(toodifficultitems)

  # ggplots: discrimination index
  discrimination_data <- cor(data_original[,-1]) %>% as.data.frame()
  plot_discrimination <- ggplot(discrimination_data, aes(x=avg_score , y= reorder (colnames(discrimination_data),avg_score))) + geom_point() +
    geom_vline(xintercept = 0.2, color ="red")+
    xlab("Relationship Coefficient") +
    ylab("Question Item Number") + ggtitle("Discrimination Plot") +
    theme_minimal() + geom_text_repel(aes(label = round(avg_score,2)))

  jpeg(paste0("itemanalysis_discrimination_plot_", group_name,"_",file_name,".jpeg"))
  print(plot_discrimination)
  dev.off()

  # Create data subset: discrimination index lower than 0.2
  qnumber <- colnames(discrimination_data)
  discrimination_index <- round(discrimination_data$avg_score,2)
  discrimination_df <- data.frame(qnumber,discrimination_index)
  nondiscriminantitems <- subset(discrimination_df, discrimination_index < 0.2)
  nondiscriminantitems_nrow <- nrow(nondiscriminantitems)

  # Print difficulty data results
  cat("", sep="\n")
  cat("==================================", sep="\n")
  cat("Item Analysis Results - Difficulty", sep="\n")
  cat("==================================", sep="\n")
  print(difficulty_data)
  cat("", sep="\n")
  message("The result above has been exported to an Excel file, of which file name includes 'itemanalysis_outputs,' in the working directory.")
  plot(plot_difficulty)
  cat("", sep="\n")
  message("Refer to 'Difficulty Plot' in the 'Plots' panel. The plot has also been exported to an image file, of which file name includes 'itemanalysis_difficulty_plot,' in the working directory.")
  if (toodifficultitems_nrow > 0) {
    cat("", sep="\n")
    cat("As you can see in the plot, the question items of which difficulty index turned out to be lower than 0.2 are as follows:")
    print(toodifficultitems$Q_id)
  } else {
    cat("", sep="\n")
    cat("As you can see in the plot, none of the difficulty indixes was found to be lower than 0.2", sep="\n")
  }

  # Print discrimination data results
  cat("", sep="\n")
  cat("======================================", sep="\n")
  cat("Item Analysis Results - Discrimination", sep="\n")
  cat("======================================", sep="\n")
  print(discrimination_df)
  cat("", sep="\n")
  message("The result above has been exported to an Excel file, of which file name includes 'itemanalysis_outputs,' in the working directory.")
  plot(plot_discrimination)
  cat("", sep="\n")
  message("Refer to 'Discrimination Plot' in the 'Plots' panel. The plot has also been exported to an image file, of which file name includes 'itemanalysis_discrimination_plot,' in the working directory.")
  if (nondiscriminantitems_nrow > 0) {
    cat("", sep="\n")
    cat("As you can see in the plot, the question items of which discrimination index turned out to be lower than 0.2 are as follows:", sep="\n")
    print(nondiscriminantitems$qnumber)
  } else {
    cat("", sep="\n")
    cat("None of the discrimination indixes was found to be lower than 0.2", sep="\n")
  }

    # Export the results to Excel
  # Look for an Excel file name ending with the same data file name (e.g., itemanalysis_outputs_ctrl_pre.xlsx)
  wb <- createWorkbook()
  addWorksheet(wb, "difficulty_data")
  writeData(wb,"difficulty_data",difficulty_data)
  # addWorksheet(wb, "difficulty_plot")
  # insertImage(wb, "difficulty_plot",plot_difficulty)
  addWorksheet(wb, "discrimination_data")
  writeData(wb, "discrimination_data",discrimination_df)
  saveWorkbook(wb, file= paste0("itemanalysis_outputs_", group_name,"_",file_name,".xlsx"), overwrite=TRUE)

  # Export a summary of the results
  sink(paste0("itemanalysis_outputs_summary_", group_name,"_",file_name,".txt"))
  cat("==================================", sep="\n")
  cat("Item Analysis Results - Difficulty", sep="\n")
  cat("==================================", sep="\n")
  print(difficulty_data)
  cat("", sep="\n")
  cat("The result above has been exported to an Excel file, of which file name includes 'itemanalysis_outputs,' in the working directory.")
  cat("", sep="\n")
  cat("", sep="\n")
  cat("Refer to the difficulty plot that has been exported to an image file, of which file name includes 'itemanalysis_difficulty_plot,' in the working directory.", sep="\n")
  if (toodifficultitems_nrow > 0) {
    cat("", sep="\n")
    cat("As you can see in the plot (and the table above), the question items of which difficulty index turned out to be lower than 0.2 are as follows:")
    print(toodifficultitems$Q_id)
  } else {
    cat("", sep="\n")
    cat("As you can see in the plot (and the table above), none of the difficulty indixes was found to be lower than 0.2", sep="\n")
  }
  # Print discrimination data results
  cat("", sep="\n")
  cat("======================================", sep="\n")
  cat("Item Analysis Results - Discrimination", sep="\n")
  cat("======================================", sep="\n")
  print(discrimination_df)
  cat("", sep="\n")
  cat("The result above has been exported to an Excel file, of which file name includes 'itemanalysis_outputs,' in the working directory.", sep="\n")
  cat("", sep="\n")
  cat("Refer to the discrimination plot that has been exported to an image file, of which file name includes 'itemanalysis_discrimination_plot,' in the working directory.", sep="\n")
  if (nondiscriminantitems_nrow > 0) {
    cat("", sep="\n")
    cat("As you can see in the plot (and the table above), the question items of which discrimination index turned out to be lower than 0.2 are as follows:", sep="\n")
    print(nondiscriminantitems$qnumber)
  } else {
    cat("", sep="\n")
    cat("As you can see in the plot (and the table above), none of the discrimination indixes was found to be lower than 0.2", sep="\n")
  }
  sink()

options(warn=-1)
}

###############################################################################
#' Paired Samples Data Analysis
#'
#' This function automatically cleans the data sets (e.g., converting missing values to "0), merges pre-post Data Sets, runs the (parametric) Paired Samples T-test and (nonparametric) Wilcoxon Signed-Rank test, and then exports outputs to help you examine the difference between pre-post scores. Find the output files named as "pairedsamples_output.txt" and "pairedsamples_invidual_items.jpeg" instantly created by this function in the working directory.
#'
#' @export

pairedsamples <- function() {

  options(warn=-1)
  # Read pre-post data sets (of the treatment group)
  data_treat_pre <- read_csv("data_treat_pre.csv", show_col_types = FALSE)
  data_treat_post <- read_csv("data_treat_post.csv", show_col_types = FALSE)

  # Deleting students with too many skipped answers: data_treat_pre.csv-----------------
  nrow_all <- nrow(data_treat_pre)
  n <- as.numeric(length(data_treat_pre[,-1]))
  data_treat_pre <- data_treat_pre %>% mutate(m_rate = round(rowSums(is.na(data_treat_pre))/n,3))

  cat("", sep="\n")
  message("Skipped answers will be treated as incorrect in this package, and too many skipped answers may skew the results of data analysis. By default, students with more than 15% of skipped answers will be deleted from the data to prevent skewed results. If you don't agree with this rate of 15%, you need to provide a different rate below. If you're fine with 15%, just hit the 'Enter' key (i.e., no input) when you see a prompt 'Please enter an alternative rate...: ' below. If you want to apply an alternatie rate, please put it below in a decimal format like 0.2 for 20%")
  cat("", sep="\n")
  m_rate_default <- readline(prompt="Please enter an alternative rate in a decimal format (e.g., 0.1 for 10%, 0.25 for 25%): ")
  if (m_rate_default > 0) {
    data_treat_pre <- subset(data_treat_pre, m_rate < m_rate_default)
    nrow_subset <- nrow(data_treat_pre)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted from the 'data_treat_pre.csv' file is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_treat_pre <- subset(data_treat_pre, m_rate < 0.15)
    nrow_subset <- nrow(data_treat_pre)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  # Deleting students with too many skipped answers: data_treat_post.csv-----------------
  nrow_all <- nrow(data_treat_post)
  n <- as.numeric(length(data_treat_post[,-1]))
  data_treat_post <- data_treat_post %>% mutate(m_rate = round(rowSums(is.na(data_treat_post))/n,3))

  if (m_rate_default > 0) {
    data_treat_post <- subset(data_treat_post, m_rate < m_rate_default)
    nrow_subset <- nrow(data_treat_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_treat_post <- subset(data_treat_post, m_rate < 0.15)
    nrow_subset <- nrow(data_treat_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  #----------------------------------------------------------------

  n <- as.numeric(length(data_treat_pre[,-1]))
  n=n-1

  # Replace skipped answers with "0"
  data_treat_pre[is.na(data_treat_pre)]= 0
  data_treat_post[is.na(data_treat_post)]= 0

  # Change column names
  colnames(data_treat_pre) <- paste(colnames(data_treat_pre), "pre", sep = "_")
  colnames(data_treat_post) <- paste(colnames(data_treat_post), "post", sep = "_")

  # Calculate average scores
  data_treat_pre <- data_treat_pre %>% mutate(avg_score_pre = round(rowMeans(data_treat_pre[,-1]),3))
  data_treat_post <- data_treat_post %>% mutate(avg_score_post = round(rowMeans(data_treat_post[,-1]),3))

  # Merge pre/post data
  treat_data_merged<-merge(data_treat_pre, data_treat_post, by.x = "id_pre",  by.y = "id_post")
  names(treat_data_merged)[names(treat_data_merged) == 'id_pre'] <- "id"
  treat_data_merged <- treat_data_merged %>% mutate(avg_diff=avg_score_post-avg_score_pre)
  Mean_Differences <- treat_data_merged$avg_diff
  hist(Mean_Differences)
  jpeg("pairedsamples_histogram.jpeg")
  hist(Mean_Differences)
  dev.off()

  # Get descriptive statistics
  # Convert Data Frame to a Long Format & Define the Variable
  avg_score_df <- cbind(treat_data_merged$avg_score_pre, treat_data_merged$avg_score_post)
  df_Long <- melt(avg_score_df)
  names(df_Long) <- c("id", "Time", "Score")
  # Name Time(group) -> 1=Pre, 2=Post
  df_Long$Time <- factor(df_Long$Time,levels=c(1,2),labels=c("Pre", "Post"))
  # Descriptive Statistics
  descriptive <- df_Long %>%
    group_by(Time) %>%
    get_summary_stats(Score, type="common")
  boxplots <- ggboxplot(df_Long, x="Time", y="Score", add="point", title="Paired Samples - Boxplots")
  print(boxplots)
  jpeg("pairedsamples_boxplots.jpeg")
  print(boxplots)
  dev.off()

  #Check Assumptions
  #No Outliers
  Outliers <- treat_data_merged %>% identify_outliers(avg_diff)
  # Normality
  normality <- treat_data_merged %>% shapiro_test(avg_diff)
  qqplots <- ggqqplot(treat_data_merged, "avg_diff")
  print(qqplots)
  jpeg("pairedsamples_boxplots.jpeg")
  print(qqplots)
  dev.off()

  # Run paired samples t-test (two-sided)
  pairedsamplesttest <- t.test(treat_data_merged$avg_score_pre, treat_data_merged$avg_score_post, mu=0, alt="two.sided", paired=T, conf.level=0.95)

  # Run paired samples t-test (two-sided)
  wilcoxonsignedranktest <- wilcox.test(treat_data_merged$avg_score_pre, treat_data_merged$avg_score_post, paired=T, conf.level=0.95)

  # Run McNemar test to compare paired individual items
  qnumber <- numeric(n)
  pvalue <- numeric(n)
  for (i in 1:n) {
    contin <- treat_data_merged[c(paste0("Q",i,"_pre"),paste0("Q",i,"_post"))]
    contin = table(contin)
    mcnemar <- mcnemar.test(contin)
    qnumber[i] <- paste0("Q",i)
    pvalue[i] <- mcnemar$p.value
  }

  itemsmcnemar_df <- data.frame(qnumber,pvalue)
  itemsmcnemarplot <- ggplot(itemsmcnemar_df, aes(x= round(pvalue,3) , y= reorder(qnumber,pvalue))) +
    geom_point(alpha=0.9) +
    geom_vline(xintercept = 0.05, color ="red")+
    # geom_vline(xintercept = 0.8, color ="red")+
    geom_hline(yintercept = itemsmcnemar_df$qnumber[length(data_treat_pre)-1],color="blue") +
    ggtitle("McNemar Test Results",subtitle = "Ordered by P-value")+
    xlab("P-value")+ ylab("Test Item Number") +theme_minimal() +
    geom_text_repel(aes(label = round(pvalue,3)))

  jpeg(paste0("pairedsamples_individual_items.jpeg"))
  print(itemsmcnemarplot)
  dev.off()

  significant_items <- subset(itemsmcnemar_df, pvalue < 0.05)
  significant_items_nrow <- nrow(significant_items)

  # Print the results
  cat("", sep="\n")
  cat("", sep="\n")
  cat("=================================================", sep="\n")
  cat("Pre-test/Post-test Scores' Descriptive Statistics", sep="\n")
  cat("=================================================", sep="\n")
  message("Refer to the boxplot in the 'Plots' panel to visually examine the data.", sep="\n")
  print(descriptive)

  sample_size <- nrow(treat_data_merged)
  normality <- shapiro.test(treat_data_merged$avg_diff)

  cat("", sep="\n")
  cat("===================================", sep="\n")
  cat("Testing the Assumption of Normality", sep="\n")
  cat("===================================", sep="\n")
  print(normality)
  if (normality$p.value > 0.05) {
    cat("## Interpretation: the assumption of normality by group has been met (p>0.05)", sep="\n")
  } else {
    cat("## Interpretation: the assumption of normality by group has NOT been met (p<0.05)", sep="\n")
  }

  cat("", sep="\n")
  cat("=====================", sep="\n")
  cat("Paired T-Test Results", sep="\n")
  cat("=====================", sep="\n")
  print(pairedsamplesttest)

  cat("## Sample Interpretation of the outputs/results right above: (you can edit it per your preference)", sep="\n")
  if (pairedsamplesttest$p.value < 0.05) {
    cat(paste0("--> The average pre-test score was ",round(descriptive$mean[1],2),
                 " and the average post-test score was ",round(descriptive$mean[2],2),". ",
                 "The Paired Samples T-Test showed that the pre-post difference was statistically significant (p=",
                 round(pairedsamplesttest$p.value,3),")."))
    cat("", sep="\n")
  } else {
    cat(paste0("--> The average pre-test score was ",round(descriptive$mean[1],2),
                 " and the average post-test score was ",round(descriptive$mean[2],2),". ",
                 "The Paired Samples T-Test showed that the pre-post difference was NOT statistically significant (p=",
                 round(pairedsamplesttest$p.value,3),")."))
    cat("", sep="\n")
  }

  if (sample_size < 15 & normality$p.value < 0.05) {
    cat("", sep="\n")
    message(paste0("## The sample size of ", sample_size, " is small, and the Shapiro-Wilk test result shows a violation of normality assumption (p=",normality$p.value,"). Although the t-test is robust to a small sample size and a violation of the normality assumption, you may want to refer to the Wilcoxon signed rank test results below to be safe."))
  }
  if (sample_size < 15 & normality$p.value < 0.05) {
    cat("", sep="\n")
    message(paste0("## The sample size of ", sample_size, " may be too small to safely interpret the parametric t-test results above. Although the t-test is known to be robust to a small sample size you may want to refer to the Wilcoxon signed rank test results below to be safe."))
  }
  if (sample_size >= 15 & normality$p.value<0.05) {
    cat("", sep="\n")
    message(paste0("## The Shapiro-Wilk test result shows a violation of normality assumption (p=",round(normality$p.value,3),"). Although the t-test is known to be robust to a violation of the normality assumption, you may want to refer to the Wilcoxon signed rank test results below to be safe."))
  }

  if (sample_size < 15 | normality$p.value < 0.05) {
    cat("", sep="\n")
    cat("=====================================", sep="\n")
    cat("Wilcoxon Signed Rank Sum Test Results", sep="\n")
    cat("=====================================", sep="\n")
    print(wilcoxonsignedranktest)

    cat("## A sample interpretation of the Wilcoxon signed rand test results above: (you can edit it per your preference)", sep="\n")
    if (wilcoxonsignedranktest$p.value < 0.05) {
      cat(paste0("--> The Wilcoxon signed rank test results above show that the pre-post difference was statistically significant (p=",
               round(wilcoxonsignedranktest$p.value,3),")."))
    } else {
      cat(paste0("--> The Wilcoxon signed rank test results above show that the pre-post difference was NOT statistically significant (p=",
               round(wilcoxonsignedranktest$p.value,3),")."))
    }
  }

  cat("", sep="\n")
  cat("", sep="\n")
  cat("======================================================================", sep="\n")
  cat("McNemar Test Results: Pre-post difference of individual question items", sep="\n")
  cat("======================================================================", sep="\n")
  message("Refer to the plot of McNemar test results in the 'Plots' panel. It shows the significant levels (p-value) of individual quetion items' pre-post differences: the lower, the more significant.")
  plot(itemsmcnemarplot)
  if (significant_items_nrow > 0) {
    cat("", sep="\n")
    cat("As you can see in the plot, the question items of which pre-post difference turned out to be significant are as follows:")
    print(significant_items$qnumber)
  } else {
    cat("", sep="\n")
    cat("As you can see in the plot, no items showed a significant pre-post difference.")
  }


  # Export the results
  # Look for a text file named "pairedsamples_outputs.txt"
  sink("pairedsamples_outputs.txt")
  cat("=================================================", sep="\n")
  cat("Pre-test/Post-test Scores' Descriptive Statistics", sep="\n")
  cat("=================================================", sep="\n")
  cat("Refer to the boxplot in the 'Plots' panel to visually examine the data.", sep="\n")
  print(descriptive)

  cat("", sep="\n")
  cat("=====================", sep="\n")
  cat("Paired T-Test Results", sep="\n")
  cat("=====================", sep="\n")
  print(pairedsamplesttest)

  cat("## Sample Interpretation of the paired t-test results above: (you can edit it per your preference)", sep="\n")
  if (pairedsamplesttest$p.value < 0.05) {
    cat(paste0("--> The average pre-test score was ",round(descriptive$mean[1],2),
                 " and the average post-test score was ",round(descriptive$mean[2],2),". ",
                 "The Paired Samples T-Test showed that the pre-post difference was statistically significant (p=",
                 round(pairedsamplesttest$p.value,3),")."))
  } else {
    cat(paste0("--> The average pre-test score was ",round(descriptive$mean[1],2),
                 " and the average post-test score was ",round(descriptive$mean[2],2),". ",
                 "The Paired Samples T-Test showed that the pre-post difference was NOT statistically significant (p=",
                 round(pairedsamplesttest$p.value,3),")."))
  }

  cat("", sep="\n")
  if (sample_size < 15 & normality$p.value < 0.05) {
    cat("", sep="\n")
    cat(paste0("## The sample size of ", sample_size, " is small, and the Shapiro-Wilk test result shows a violation of normality assumption (p=",normality$p.value,"). Although the t-test is robust to a small sample size and a violation of the normality assumption, you may want to refer to the Wilcoxon signed rank test results below to be safe."))
  }
  if (sample_size < 15 & normality$p.value < 0.05) {
    cat("", sep="\n")
    cat(paste0("## The sample size of ", sample_size, " may be too small to safely interpret the parametric t-test results above. Although the t-test is known to be robust to a small sample size you may want to refer to the Wilcoxon signed rank test results below to be safe."))
  }
  if (sample_size >= 15 & normality$p.value<0.05) {
    cat("", sep="\n")
    cat(paste0("## The Shapiro-Wilk test result shows a violation of normality assumption (p=",round(normality$p.value,3),"). Although the t-test is known to be robust to a violation of the normality assumption, you may want to refer to the Wilcoxon signed rank test results below to be safe."))
  }

  if (sample_size < 15 | normality$p.value < 0.05) {
    cat("", sep="\n")
    cat("=====================================", sep="\n")
    cat("Wilcoxon Signed Rank Sum Test Results", sep="\n")
    cat("=====================================", sep="\n")
    print(wilcoxonsignedranktest)

    cat("## A sample interpretation of the Wilcoxon signed rand test results above: (you can edit it per your preference)", sep="\n")
    if (wilcoxonsignedranktest$p.value < 0.05) {
      cat(paste0("--> The Wilcoxon signed rank test results above show that the pre-post difference was statistically significant (p=",
                 round(wilcoxonsignedranktest$p.value,3),")."))
    } else {
      cat(paste0("--> The Wilcoxon signed rank test results above show that the pre-post difference was NOT statistically significant (p=",
                 round(wilcoxonsignedranktest$p.value,3),")."))
    }
  }
  cat("", sep="\n")
  cat("", sep="\n")
  cat("======================================================================", sep="\n")
  cat("McNemar Test Results: Pre-post difference of individual question items", sep="\n")
  cat("======================================================================", sep="\n")
  cat("Refer to the plot of McNemar test results in the plot that has been exported to an image file named 'pairedsamples_individual_items.jpeg' in the working directory. It shows the significant levels (p-value) of individual quetion items' pre-post differences: the lower, the more significant.", sep="\n")
  plot(itemsmcnemarplot)
  if (significant_items_nrow > 0) {
    cat("", sep="\n")
    cat("As you can see in the plot, the question items of which pre-post difference turned out to be significant are as follows:")
    print(significant_items$qnumber)
  } else {
    cat("", sep="\n")
    cat("As you can see in the plot, no items showed a significant pre-post difference.")
  }
  sink()
options(warn=0)
}

###############################################################################
#' Independent Samples Data Analysis
#'
#' This function automatically cleans the data sets (e.g., converting missing values to "0), binds treatment-control group Data Sets, runs the Independent Samples T-test (parametric) and Mann–Whitney U test (nonparametric), and then exports outputs to help you examine the difference between the groups. R scripts and their outputs are as follows (just pay attention to the outputs since the codes are automatically run back-end by the function). Find the output file named "independentsamples_output.txt" instantly generated by this function in the working directory.
#'
#' @export

independentsamples <- function() {

  # Read the treatment and control group post-test data sets
  data_treat_post<- read_csv("data_treat_post.csv", show_col_types = FALSE)
  data_ctrl_post<- read_csv("data_ctrl_post.csv", show_col_types = FALSE)

  # Deleting students with too many skipped answers: data_treat_post.csv-----------------
  nrow_all <- nrow(data_treat_post)
  n <- as.numeric(length(data_treat_post[,-1]))
  data_treat_post <- data_treat_post %>% mutate(m_rate = round(rowSums(is.na(data_treat_post))/n,3))

  cat("", sep="\n")
  message("Skipped answers will be treated as incorrect in this package, and too many skipped answers may skew the results of data analysis. By default, students with more than 15% of skipped answers will be deleted from the data to prevent skewed results. If you don't agree with this rate of 15%, you need to provide a different rate below. If you're fine with 15%, just hit the 'Enter' key (i.e., no input) when you see a prompt 'Please enter an alternative rate...: ' below. If you want to apply an alternatie rate, please put it below in a decimal format like 0.2 for 20%")
  cat("", sep="\n")
  m_rate_default <- readline(prompt="Please enter an alternative rate in a decimal format (e.g., 0.1 for 10%, 0.25 for 25%): ")
  if (m_rate_default > 0) {
    data_treat_post <- subset(data_treat_post, m_rate < m_rate_default)
    nrow_subset <- nrow(data_treat_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_post.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted from the 'data_treat_post.csv' file is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_post.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_treat_post <- subset(data_treat_post, m_rate < 0.15)
    nrow_subset <- nrow(data_treat_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_post.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_post.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  # Deleting students with too many skipped answers: data_ctrl_post.csv-----------------
  nrow_all <- nrow(data_ctrl_post)
  n <- as.numeric(length(data_ctrl_post[,-1]))
  data_ctrl_post <- data_ctrl_post %>% mutate(m_rate = round(rowSums(is.na(data_ctrl_post))/n,3))

  if (m_rate_default > 0) {
    data_ctrl_post <- subset(data_ctrl_post, m_rate < m_rate_default)
    nrow_subset <- nrow(data_ctrl_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_ctrl_post <- subset(data_ctrl_post, m_rate < 0.15)
    nrow_subset <- nrow(data_ctrl_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  #----------------------------------------------------------------

  # Replace skipped answers with "0"
  data_treat_post[is.na(data_treat_post)]= 0
  data_ctrl_post[is.na(data_ctrl_post)]= 0

  # Change column names
  colnames(data_treat_post) <- paste( colnames(data_treat_post), "post", sep = "_")
  colnames(data_ctrl_post) <- paste( colnames(data_ctrl_post), "post", sep = "_")

  # Calculate average scores and generate group variable
  data_treat_post <- data_treat_post %>%
    mutate(avg_score_post = round(rowMeans(data_treat_post[,-1]),3)) %>%
    mutate(datagroup=1)
  data_ctrl_post <- data_ctrl_post %>%
    mutate(avg_score_post = round(rowMeans(data_ctrl_post[,-1]),3)) %>%
    mutate(datagroup=0)

  treat_post_average <- describe(data_treat_post$avg_score_post)
  ctrl_post_average <- describe(data_ctrl_post$avg_score_post)

  # Bind treat/control data
  group_data_binded <- rbind(data_treat_post, data_ctrl_post)

  # Name datagroup -> 0=control, 1=treatment
  group_data_binded$datagroup<-factor(group_data_binded$datagroup,levels=c(0,1),labels=c("Control", "Treatment"))

  # Get descriptive statistics
  # group_means <- describeBy(group_data_binded$avg_score_post, group_data_binded$datagroup)
  group_means <- group_data_binded %>% group_by(datagroup) %>% get_summary_stats(avg_score_post, type = "mean_sd")

  boxplots <- ggboxplot(group_data_binded, x="datagroup", y="avg_score_post", add="point", title="Independent Samples - Boxplots")
  print(boxplots)
  jpeg("independentsamples_boxplots.jpeg")
  print(boxplots)
  dev.off()

  # Check Assumptions
  # No Outliers
  Outliers <- group_data_binded %>% group_by(datagroup) %>% identify_outliers(avg_score_post)

  # Normality
  shapirotest <- group_data_binded %>% group_by(datagroup) %>% shapiro_test(avg_score_post)
  qqplots <- ggqqplot(group_data_binded, x = "avg_score_post", facet.by = "datagroup")
  print(qqplots)
  jpeg("independentsamples_qqplots.jpeg")
  print(qqplots)
  dev.off()

  # Run independent samples t-test (two-sided)
  independentsamplesttest_e <- t.test(group_data_binded$avg_score_post~group_data_binded$datagroup, mu=0, alt="two.sided", conf=0.95, var.eq=T, paired=F)
  independentsamplesttest_ne <- t.test(group_data_binded$avg_score_post~group_data_binded$datagroup, mu=0, alt="two.sided", conf=0.95, var.eq=F, paired=F)

  # Run Mann-Whitney U Test
  mannwhitneyutest <- wilcox.test(group_data_binded$avg_score_post~group_data_binded$datagroup)

  # Print the results
  cat("", sep="\n")
  cat("", sep="\n")
  cat("======================", sep="\n")
  cat("Descriptive Statistics", sep="\n")
  cat("======================", sep="\n")
  message("Refer to the boxplots in the 'Plots' panel for visual inspection of the descriptive statistics. The plots have also been exported to the working directory.", sep="\n")
  print(group_means)
  cat("", sep="\n")

  cat("====================", sep="\n")
  cat("Checking Assumptions", sep="\n")
  cat("====================", sep="\n")
  cat("", sep="\n")
  cat("The Equality of Variances", sep="\n")
  equalvariance <- group_data_binded %>% levene_test(avg_score_post ~ datagroup)
  print(equalvariance)
  if (equalvariance$p > 0.05) {
    cat("## Interpretation: the assumption of equality of variances has been met (p>0.05)", sep="\n")
  } else {
    cat("## Interpretation: the assumption of equality of variances has NOT been met (p<0.05)", sep="\n")
  }

  cat("", sep="\n")
  cat("----------------------", sep="\n")
  cat("The Nomality by Groups", sep="\n")
  message("Refer to the Q-Q Plots in the 'Plots' panel for visual inspection of the normality. The plots have also been exported to the working directory.", sep="\n")
  if (shapirotest$p[2] >0.05 & shapirotest$p[1] >0.05) {
    cat("## Interpretation: the assumption of normality has been met (p>0.05 for each group)", sep="\n")
  } else {
    cat("## Interpretation: the assumption of equality of variances has NOT been met (p<0.05)", sep="\n")
  }

  cat("", sep="\n")
  cat("===============================================", sep="\n")
  cat("Independent Samples T-Test Results (Parametric)", sep="\n")
  cat("===============================================", sep="\n")
  if (equalvariance$p > 0.05) {
    print(independentsamplesttest_e)
    cat("## A sample interpretation of the t-test results above: (you can edit it per your preference)", sep="\n")
    if (independentsamplesttest_e$p.value < 0.05) {
      cat(paste0("--> The treatment group's average score was ",round(treat_post_average$mean,2),
                 " and the control group's average score was ",round(ctrl_post_average$mean,2),". ",
                 "The independent samples t-test results show that the pre-post difference is statistically significant
                 (p=", round(independentsamplesttest_e$p.value,3),"."))
    } else {
      cat(paste0("--> The treatment group's average score was ",round(treat_post_average$mean,2),
                 ", and the control group's average score was ",round(ctrl_post_average$mean,2),". ",
                 "The independent samples t-test results show that the group difference is NOT statistically significant (p=",
                 round(independentsamplesttest_e$p.value,3),"."))
    }
  } else {
    print(independentsamplesttest_ne)
    cat("## A sample interpretation of the t-test results above: (you can edit it per your preference)", sep="\n")
    if (independentsamplesttest_ne$p.value < 0.05) {
      cat(paste0("--> The treatment group's average score was ",round(treat_post_average$mean,2),
                 " and the control group's average score was ",round(ctrl_post_average$mean,2),". ",
                 "The independent samples t-test results show that the pre-post difference is statistically significant
                 (p=", round(independentsamplesttest_ne$p.value,3),"."))
    } else {
      cat(paste0("--> The treatment group's average score was ",round(treat_post_average$mean,2),
                 ", and the control group's average score was ",round(ctrl_post_average$mean,2),". ",
                 "The independent samples t-test results show that the group difference is NOT statistically significant (p=",
                 round(independentsamplesttest_ne$p.value,3),"."))
    }
  }

  sample_size_t <- nrow(data_treat_post)
  sample_size_c <- nrow(data_ctrl_post)

  if (sample_size_t < 15 | sample_size_c < 15 | shapirotest$p[2] < 0.05 | shapirotest$p[1] < 0.05) {
    cat("", sep="\n")
    message(paste0("## Either a sample size is too small or the data violates the assumption of eval variances (refer to the descriptive statistics and the Shapiro-Wilk test resutls). Although the t-test is known to be robust to a small sample size or violation of the normality assumption, you may want to refer to the Mann-Whitney U (Wilcoxon Rank Sum) test results below to be safe."))
    cat("===============================================================", sep="\n")
    cat("Mann-Whitney U (Wilcoxon Rank Sum) Test Results (Nonparametric)", sep="\n")
    cat("===============================================================", sep="\n")
    print(mannwhitneyutest)

    cat("## A sample interpretation of the Mann-Whitney U (Wilcoxon Rank Sum) test results above: (you can edit it per your preference)", sep="\n")
    if (mannwhitneyutest$p.value < 0.05) {
      cat(paste0("--> The the Mann-Whitney U (Wilcoxon Rank Sum) test results above show that the pre-post difference was statistically significant (p=",
               round(mannwhitneyutest$p.value,3),")."))
    } else {
      cat(paste0("--> The the Mann-Whitney U (Wilcoxon Rank Sum) test results above show that the pre-post difference was NOT statistically significant (p=",
               round(mannwhitneyutest$p.value,3),")."))
    }
  }

  # Export the results
  # Look for a text file named "independentsamplesttest_outputs.txt"
  sink("independentsamples_outputs.txt")
  cat("======================", sep="\n")
  cat("Descriptive Statistics", sep="\n")
  cat("======================", sep="\n")
  cat("Refer to the boxplots in the 'Plots' panel for visual inspection of the descriptive statistics. The plots have also been exported to the working directory.", sep="\n")
  print(group_means)
  cat("", sep="\n")

  cat("====================", sep="\n")
  cat("Checking Assumptions", sep="\n")
  cat("====================", sep="\n")
  cat("", sep="\n")
  cat("The Equality of Variances", sep="\n")
  equalvariance <- group_data_binded %>% levene_test(avg_score_post ~ datagroup)
  print(equalvariance)
  if (equalvariance$p > 0.05) {
    cat("## Interpretation: the assumption of equality of variances has been met (p>0.05)", sep="\n")
  } else {
    cat("## Interpretation: the assumption of equality of variances has NOT been met (p<0.05)", sep="\n")
  }

  cat("", sep="\n")
  cat("The Nomality by Groups", sep="\n")
  cat("Refer to the Q-Q Plots in the 'Plots' panel for visual inspection of the normality. The plots have also been exported to the working directory.", sep="\n")
  if (shapirotest$p[2] >0.05 & shapirotest$p[1] >0.05) {
    cat("## Interpretation: the assumption of normality has been met (p>0.05 for each group)", sep="\n")
  } else {
    cat("## Interpretation: the assumption of equality of variances has NOT been met (p<0.05)", sep="\n")
  }

  cat("", sep="\n")
  cat("===============================================", sep="\n")
  cat("Independent Samples T-Test Results (Parametric)", sep="\n")
  cat("===============================================", sep="\n")
  if (equalvariance$p > 0.05) {
    print(independentsamplesttest_e)
    cat("## A sample interpretation of the t-test results above: (you can edit it per your preference)", sep="\n")
    if (independentsamplesttest_e$p.value < 0.05) {
      cat(paste0("--> The treatment group's average score was ",round(treat_post_average$mean,2),
                 " and the control group's average score was ",round(ctrl_post_average$mean,2),". ",
                 "The independent samples t-test results show that the pre-post difference is statistically significant
                 (p=", round(independentsamplesttest_e$p.value,3),"."))
    } else {
      cat(paste0("--> The treatment group's average score was ",round(treat_post_average$mean,2),
                 ", and the control group's average score was ",round(ctrl_post_average$mean,2),". ",
                 "The independent samples t-test results show that the group difference is NOT statistically significant (p=",
                 round(independentsamplesttest_e$p.value,3),"."))
    }
  } else {
    print(independentsamplesttest_ne)
    cat("## A sample interpretation of the t-test results above: (you can edit it per your preference)", sep="\n")
    if (independentsamplesttest_ne$p.value < 0.05) {
      cat(paste0("--> The treatment group's average score was ",round(treat_post_average$mean,2),
                 " and the control group's average score was ",round(ctrl_post_average$mean,2),". ",
                 "The independent samples t-test results show that the pre-post difference is statistically significant
                 (p=", round(independentsamplesttest_ne$p.value,3),"."))
    } else {
      cat(paste0("--> The treatment group's average score was ",round(treat_post_average$mean,2),
                 ", and the control group's average score was ",round(ctrl_post_average$mean,2),". ",
                 "The independent samples t-test results show that the group difference is NOT statistically significant (p=",
                 round(independentsamplesttest_ne$p.value,3),"."))
    }
  }

  sample_size_t <- nrow(data_treat_post)
  sample_size_c <- nrow(data_ctrl_post)

  if (sample_size_t < 15 | sample_size_c < 15 | shapirotest$p[2] < 0.05 | shapirotest$p[1] < 0.05) {
    cat("", sep="\n")
    cat(paste0("## Either a sample size is too small or the data violates the assumption of eval variances (refer to the descriptive statistics and the Shapiro-Wilk test resutls). Although the t-test is known to be robust to a small sample size or violation of the normality assumption, you may want to refer to the Mann-Whitney U (Wilcoxon Rank Sum) test results below to be safe."))
    cat("===============================================================", sep="\n")
    cat("Mann-Whitney U (Wilcoxon Rank Sum) Test Results (Nonparametric)", sep="\n")
    cat("===============================================================", sep="\n")
    print(mannwhitneyutest)

    cat("## A sample interpretation of the Mann-Whitney U (Wilcoxon Rank Sum) test results above: (you can edit it per your preference)", sep="\n")
    if (mannwhitneyutest$p.value < 0.05) {
      cat(paste0("--> The the Mann-Whitney U (Wilcoxon Rank Sum) test results above show that the pre-post difference was statistically significant (p=",
                 round(mannwhitneyutest$p.value,3),")."))
    } else {
      cat(paste0("--> The the Mann-Whitney U (Wilcoxon Rank Sum) test results above show that the pre-post difference was NOT statistically significant (p=",
                 round(mannwhitneyutest$p.value,3),")."))
    }
  }
  sink()

}

###############################################################################
#' One-way ANCOVA
#'
#' This function automatically merges pre-post data sets, binds treatment-control data sets, and runs scripts to check assumptions of one-way ANCOVA, runs the main One-way ANCOVA, and then exports all outputs for you all at once. Please make sure to name data files accurately (i.e., “data_treat_pre.csv”, “data_treat_post.csv”, “data_ctrl_pre.csv”," data_ctrl_post.csv") and have them saved in the working directory. Find the output files "onewayancova_outputs.txt" and "onewayancova_linearity_check_scatter_plot.jpeg" instantly generated by this function in the working directory.
#'
#' @export

onewayancova <- function() {

options(warn=-1)

    # Read all data sets
  data_treat_pre <- read_csv("data_treat_pre.csv", show_col_types = FALSE)
  data_treat_post<- read_csv("data_treat_post.csv", show_col_types = FALSE)
  data_ctrl_pre <- read_csv("data_ctrl_pre.csv", show_col_types = FALSE)
  data_ctrl_post<- read_csv("data_ctrl_post.csv", show_col_types = FALSE)

  # Deleting students with too many skipped answers: data_treat_pre.csv-----------------
  nrow_all <- nrow(data_treat_pre)
  n <- as.numeric(length(data_treat_pre[,-1]))
  data_treat_pre <- data_treat_pre %>% mutate(m_rate = round(rowSums(is.na(data_treat_pre))/n,3))
  cat("", sep="\n")
  message("Skipped answers will be treated as incorrect in this package, and too many skipped answers may skew the results of data analysis. By default, students with more than 15% of skipped answers will be deleted from the data to prevent skewed results. If you don't agree with this rate of 15%, you need to provide a different rate below. If you're fine with 15%, just hit the 'Enter' key (i.e., no input) when you see a prompt 'Please enter an alternative rate...: ' below. If you want to apply an alternatie rate, please put it below in a decimal format like 0.2 for 20%")
  cat("", sep="\n")
  m_rate_default <- readline(prompt="Please enter an alternative rate in a decimal format (e.g., 0.1 for 10%, 0.25 for 25%): ")
  if (m_rate_default > 0) {
    data_treat_pre <- subset(data_treat_pre, m_rate < m_rate_default)
    nrow_subset <- nrow(data_treat_pre)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted from the 'data_treat_pre.csv' file is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_treat_pre <- subset(data_treat_pre, m_rate < 0.15)
    nrow_subset <- nrow(data_treat_pre)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  # Deleting students with too many skipped answers: data_treat_post.csv-----------------
  nrow_all <- nrow(data_treat_post)
  n <- as.numeric(length(data_treat_post[,-1]))
  data_treat_post <- data_treat_post %>% mutate(m_rate = round(rowSums(is.na(data_treat_post))/n,3))

  if (m_rate_default > 0) {
    data_treat_post <- subset(data_treat_post, m_rate < m_rate_default)
    nrow_subset <- nrow(data_treat_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_treat_post <- subset(data_treat_post, m_rate < 0.15)
    nrow_subset <- nrow(data_treat_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  # Deleting students with too many skipped answers: data_ctrl_pre.csv-----------------
  nrow_all <- nrow(data_ctrl_pre)
  n <- as.numeric(length(data_ctrl_pre[,-1]))
  data_ctrl_pre <- data_ctrl_pre %>% mutate(m_rate = round(rowSums(is.na(data_ctrl_pre))/n,3))

  if (m_rate_default > 0) {
    data_ctrl_pre <- subset(data_ctrl_pre, m_rate < m_rate_default)
    nrow_subset <- nrow(data_ctrl_pre)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_pre.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_pre.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_ctrl_pre <- subset(data_ctrl_pre, m_rate < 0.15)
    nrow_subset <- nrow(data_ctrl_pre)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_pre.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_pre.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  # Deleting students with too many skipped answers: data_ctrl_post.csv-----------------
  nrow_all <- nrow(data_ctrl_post)
  n <- as.numeric(length(data_ctrl_post[,-1]))
  data_ctrl_post <- data_ctrl_post %>% mutate(m_rate = round(rowSums(is.na(data_ctrl_post))/n,3))

  if (m_rate_default > 0) {
    data_ctrl_post <- subset(data_ctrl_post, m_rate < m_rate_default)
    nrow_subset <- nrow(data_ctrl_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_ctrl_post <- subset(data_ctrl_post, m_rate < 0.15)
    nrow_subset <- nrow(data_ctrl_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  #----------------------------------------------------------------


  # Replace skipped answers with "0"
  data_treat_pre[is.na(data_treat_pre)]= 0
  data_treat_post[is.na(data_treat_post)]= 0
  data_ctrl_pre[is.na(data_ctrl_pre)]= 0
  data_ctrl_post[is.na(data_ctrl_post)]= 0

  # Creat data sets for descriptive statistics
  data_c_pre <- data_ctrl_pre %>% mutate(avg_score = round(rowMeans(data_ctrl_pre[,-1]),3))
  data_c_post <- data_ctrl_post %>% mutate(avg_score = round(rowMeans(data_ctrl_post[,-1]),3))
  data_t_pre <- data_treat_pre %>% mutate(avg_score = round(rowMeans(data_treat_pre[,-1]),3))
  data_t_post <- data_treat_post %>% mutate(avg_score = round(rowMeans(data_treat_post[,-1]),3))

  # Change column names with their origin'sir
  colnames(data_treat_pre) <- paste( colnames(data_treat_pre), "pre", sep = "_")
  colnames(data_treat_post) <- paste( colnames(data_treat_post), "post", sep = "_")
  colnames(data_ctrl_pre) <- paste( colnames(data_ctrl_pre), "pre", sep = "_")
  colnames(data_ctrl_post) <- paste( colnames(data_ctrl_post), "post", sep = "_")

  # Calculate average scores
  data_treat_pre <- data_treat_pre %>% mutate(avg_score_pre = round(rowMeans(data_treat_pre[,-1]),3))
  data_treat_post <- data_treat_post %>% mutate(avg_score_post = round(rowMeans(data_treat_post[,-1]),3))
  data_ctrl_pre <- data_ctrl_pre %>% mutate(avg_score_pre = round(rowMeans(data_ctrl_pre[,-1]),3))
  data_ctrl_post <- data_ctrl_post %>% mutate(avg_score_post = round(rowMeans(data_ctrl_post[,-1]),3))

  # Descriptive Statistics

  # Merge pre/post data and generate group code (treat=1, control=0)
  treat_data_merged<-merge(data_treat_pre, data_treat_post, by.x = "id_pre",  by.y = "id_post")
  names(treat_data_merged)[names(treat_data_merged) == 'id_pre'] <- "id"
  treat_data_merged <- treat_data_merged %>% mutate(datagroup=1)

  ctrl_data_merged<-merge(data_ctrl_pre, data_ctrl_post, by.x = "id_pre",  by.y = "id_post")
  names(ctrl_data_merged)[names(ctrl_data_merged) == 'id_pre'] <- "id"
  ctrl_data_merged <- ctrl_data_merged %>% mutate(datagroup=0)

  # Bind treat/control data
  full_data_binded <- rbind(ctrl_data_merged, treat_data_merged)

  # Name datagroup -> 0=control, 1=treatment
  full_data_binded$datagroup<-factor(full_data_binded$datagroup,levels=c(0,1),labels=c("Control", "Treatment"))

  # Descriptive Statistics
  pre_descriptive <- group_by (full_data_binded, datagroup) %>%
    summarize(mean=mean(avg_score_pre), sd=sd(avg_score_pre), min=min(avg_score_pre), max=max(avg_score_pre))
  post_descriptive <- group_by (full_data_binded, datagroup) %>%
    summarize(mean=mean(avg_score_post), sd=sd(avg_score_post), min=min(avg_score_post), max=max(avg_score_post))

  data_c_pre <- data_c_pre %>% mutate(datagroup=1)
  data_c_post <- data_c_post %>% mutate(datagroup=2)
  data_t_pre <- data_t_pre %>% mutate(datagroup=3)
  data_t_post <- data_t_post %>% mutate(datagroup=4)

  df_Long <- rbind(data_c_pre, data_c_post, data_t_pre, data_t_post)
  df_Long <- df_Long %>% select(avg_score, datagroup)
  names(df_Long) <- c("Average_Score", "Group")
  # Name Time(group) -> 1=Control-Pre, 2=Control-Post, 3=Treatment-Pre, 4=Treatment-Post
  df_Long$Group <- factor(df_Long$Group,levels=c(1,2,3,4),labels=c("Control-Pre", "Control-Post", "Treatment-Pre", "Treatment-Post"))

  boxplots <- ggboxplot(df_Long, x="Group", y="Average_Score", add="point", title="Boxplots by Group" )
  print(boxplots)
  jpeg("onewayancova_boxplots.jpeg")
  print(boxplots)
  dev.off()

  # Check assumptions
  # Check linearity (visual inspection)
  plot_scatter <- ggscatter(full_data_binded, x="avg_score_pre",y= "avg_score_post", color = "datagroup",add = "reg.line" )+
    stat_regline_equation(aes(label =  paste(..eq.label.., ..rr.label.., sep = "~~~~"), color = datagroup)) +
    ggtitle("Scatter Plot to Check Linearity") + theme_minimal() +
    xlab("Pre-test Scores") + ylab("Post-test Scores")
  plot(plot_scatter)
  jpeg("onewayancova_linearity_check_scatter_plot.jpeg")
  print(plot_scatter)
  dev.off()

  # Check homogeneity of regression slopes
  # lineslopes <- lm(avg_score_post ~ avg_score_pre + datagroup + avg_score_pre:datagroup, data = full_data_binded)
  lineslopes <- full_data_binded %>% anova_test(avg_score_post ~ datagroup*avg_score_pre)

  # Inspect the model diagnostic metrics
  model <- lm(avg_score_post ~ avg_score_pre + datagroup, data = full_data_binded)
  model_metrics <- augment(model) %>% dplyr::select(-.hat, -.sigma, -.fitted)
  # print(head(model_metrics, 3))

  # Check normality of Residuals
  # shapirotest<- shapiro_test(model_metrics$.resid) %>% as.data.frame()
  norm.all.aov <- aov(avg_score_post ~ datagroup, data=full_data_binded)
  shapirotest <- shapiro.test(norm.all.aov$residuals)
  Residuals <- norm.all.aov$residuals
  hist(Residuals)
  jpeg("onewayancova_histogram.jpeg")
  hist(Residuals)
  dev.off()
  qqplots <- ggqqplot(residuals(norm.all.aov),title = "Normal Q-Q Plot of Residuals")
  print(qqplots)
  jpeg("onewayancova_qqplots.jpeg")
  print(qqplots)
  dev.off()

  # Check homogeneity of variances
  # levene_result <- model_metrics %>% levene_test(.resid ~ as.factor(datagroup)) %>% as.data.frame()
  levene_result <- leveneTest(avg_score_post ~ datagroup, full_data_binded)
  # Check outlier
  outlier_variables <- model_metrics %>%  dplyr::filter(abs(.std.resid) > 3)  %>% as.data.frame()

  # Run one-way ANCOVA
  res.aov <- full_data_binded %>%  rstatix::anova_test(avg_score_post ~ datagroup + avg_score_pre)

  # Display the adjusted means (a.k.a., estimated marginal means) for each group
  emms <- emmeans_test(full_data_binded,
                       avg_score_post ~ datagroup,
                       covariate = avg_score_pre,
                       p.adjust.method = "bonferroni",
                       conf.level=0.95,
                       detailed=TRUE
  )
  posthoc_emm <- get_emmeans(emms) %>% as.data.frame()

  # Print the results
  cat("", sep="\n")
  cat("", sep="\n")
  cat("======================", sep="\n")
  cat("Descriptive Statistics", sep="\n")
  cat("======================", sep="\n")
  cat("Pre-test scores by group", sep="\n")
  print(pre_descriptive)
  cat("-------------------------", sep="\n")
  cat("Post-test scores by group", sep="\n")
  print(post_descriptive)
  message("Refer to the boxplots in the 'Plots' panel to visually inspect the descriptive statistics")

  cat("", sep="\n")
  cat("===============================", sep="\n")
  cat("Results of Checking Assumptions", sep="\n")
  cat("===============================", sep="\n")
  cat("# Normality of Residuals:", sep="\n")
  print(shapirotest)
  if (shapirotest$p.value > 0.05) {
    cat("## Interpretation: the assumption of normality by group has been met (p>0.05).", sep="\n")
  } else {
    cat("## Interpretation: the assumption of normality by group has NOT been met (p<0.05). Although ANCOVA is robust to a violation of the assumption of normality of residuals, you may want to mention this violation in you report. For example, you can say: 'The data has slightly violated the assumption of normality of residuals, but ANCOVA is known to be robust to this violation (so it's not a serious issue).", sep="\n")
  }
  message("Refer to the histogram and the normal Q-Q plot in the 'Plots' panel to visually inspect the normality of residuals.")

  cat("", sep="\n")
    cat("---------------------------", sep="\n")
  cat("# Homogeneity of Variances:", sep="\n")
  print(levene_result)
  if (levene_result$`Pr(>F)` > 0.05) {
    cat("## Interpretation: the assumption of equality of error variances has been met (p>0.05).", sep="\n")
  } else {
    cat("## Interpretation: the assumption of equality of error variances has NOT been met (p<0.05). You need to transform your dependent variable (post-test average scores) to see if you can remove the heterogeneity in your ANCOVA model.", sep="\n")
  }

  cat("", sep="\n")
  cat("-----------", sep="\n")
  cat("# Linearity", sep="\n")
  message("Refer to the scatter plot to check linearity in the 'Plots' panel. The same plot has also been exported to an image file named 'ancova_linearity_check_scatter_plot.jpeg' in the working directory.", sep="\n")
  cat("## Interpretation: if you are seeing a liner relationship between the covariate (i.e., pre-test scores for this analysis) and dependent variable (i.e., post-test scores for this analysis) for both treatment and control group in the plot, then you can say this assumption has been met or the data has not violated this assumption of linearity. If your relationships are not linear, you have violated this assumption, and an ANCOVA is not a siutable analysis. However, you might be able to coax your data to have a linear relationship by transforming the covariate, and still be able to run an ANCOVA.", sep="\n")

  cat("", sep="\n")
  cat("----------------------------------------", sep="\n")
  cat("# Homogeneity of Regression Slopes:", sep="\n")
  print(lineslopes)
  if (lineslopes$p[3] > 0.05) {
    cat("## Interpretation: there was homogeneity of regression slopes as the interaction term (i.e., datagroup:avg_score_pre) was not statistically significant (p>0.05).", sep="\n")
  } else {
    cat("## Interpretation: the data has violated the assumption of homogeneity of regression slopes as the interaction term (i.e., datagroup:avg_score_pre) was statistically significant (p<0.05). You will not be able to undertake an ANCOVA analysis.", sep="\n")
  }

  cat("", sep="\n")
  if (nrow(outlier_variables) != 0) {
    cat("-----------", sep="\n")
    cat("# Outliers:", sep="\n")
    print(outlier_variables)
  } else {
    cat("----------", sep="\n")
    cat("# Outliers: No outlier has been found.", sep="\n")
  }
  cat("", sep="\n")
  cat("==================================", sep="\n")
  cat("Results of the main One-way ANCOVA", sep="\n")
  cat("==================================", sep="\n")
  print(res.aov)
  cat("--------------------------", sep="\n")
  cat("# Estimated Marginal Means", sep="\n")
  print(posthoc_emm)
  cat("", sep="\n")
  if (res.aov$p[1] < 0.05) {
    cat(paste0("--> A sample summary of the outputs/results above: The difference of post-test scores between the treatment and control groups turned out to be significant with pre-test scores being controlled: F(1,",res.aov$DFd[1],")=",res.aov$F[1],", p=",res.aov$p[1]," (effect size=",res.aov$ges[1],"). The adjusted marginal mean of post-test scores of the treatment group (",round(posthoc_emm$emmean[2],2),", SE=,",round(posthoc_emm$se[2],2),") was significantly different from that of the control group (",round(posthoc_emm$emmean[1],2),", SE=,",round(posthoc_emm$se[1],2),")."), sep="\n")
  } else {
    cat(paste0("--> A sample summary of the outputs/results above: The difference of post-test scores between the treatment and control groups turned out to be insignificant with pre-test scores being controlled: F(1,",res.aov$DFd[1],")=",res.aov$F[1],", p=",res.aov$p[1]," (effect size=",res.aov$ges[1],"). The adjusted marginal mean of post-test scores of the treatment group (",round(posthoc_emm$emmean[2],2),", SE=,",round(posthoc_emm$se[2],2),") was not significantly different from that of the control group (",round(posthoc_emm$emmean[1],2),", SE=,",round(posthoc_emm$se[1],2),")."), sep="\n")
  }

    # Export the main ANCOVA and post-hoc analysis results to report
  sink("onewayancova_outputs.txt")
  cat("======================", sep="\n")
  cat("Descriptive Statistics", sep="\n")
  cat("======================", sep="\n")
  cat("Pre-test scores by group", sep="\n")
  print(pre_descriptive)
  cat("-------------------------", sep="\n")
  cat("Post-test scores by group", sep="\n")
  print(post_descriptive)
  cat("Refer to the boxplots separately exported to the working directory to visually inspect the descriptive statistics", sep="\n")

  cat("", sep="\n")
  cat("===============================", sep="\n")
  cat("Results of Checking Assumptions", sep="\n")
  cat("===============================", sep="\n")
  cat("# Normality of Residuals:", sep="\n")
  print(shapirotest)
  if (shapirotest$p.value > 0.05) {
    cat("## Interpretation: the assumption of normality by group has been met (p>0.05).", sep="\n")
  } else {
    cat("## Interpretation: the assumption of normality by group has NOT been met (p<0.05). Although ANCOVA is robust to a violation of the assumption of normality of residuals, you may want to mention this violation in you report. For example, you can say: 'The data has slightly violated the assumption of normality of residuals, but ANCOVA is known to be robust to this violation (so it's not a serious issue).", sep="\n")
  }
  cat("Refer to the histogram and the normal Q-Q plot  separately exported to the working directory to visually inspect the normality of residuals.")

  cat("", sep="\n")
  cat("---------------------------", sep="\n")
  cat("# Homogeneity of Variances:", sep="\n")
  print(levene_result)
  if (levene_result$`Pr(>F)` > 0.05) {
    cat("## Interpretation: the assumption of equality of error variances has been met (p>0.05).", sep="\n")
  } else {
    cat("## Interpretation: the assumption of equality of error variances has NOT been met (p<0.05). You need to transform your dependent variable (post-test average scores) to see if you can remove the heterogeneity in your ANCOVA model.", sep="\n")
  }

  cat("", sep="\n")
  cat("-----------", sep="\n")
  cat("# Linearity", sep="\n")
  cat("Refer to the scatter plot to check linearity in the 'Plots' panel. The same plot has also been exported to an image file named 'ancova_linearity_check_scatter_plot.jpeg' in the working directory.", sep="\n")
  cat("## Interpretation: if you are seeing a liner relationship between the covariate (i.e., pre-test scores for this analysis) and dependent variable (i.e., post-test scores for this analysis) for both treatment and control group in the plot, then you can say this assumption has been met or the data has not violated this assumption of linearity. If your relationships are not linear, you have violated this assumption, and an ANCOVA is not a siutable analysis. However, you might be able to coax your data to have a linear relationship by transforming the covariate, and still be able to run an ANCOVA.", sep="\n")

  cat("", sep="\n")
  cat("----------------------------------------", sep="\n")
  cat("# Homogeneity of Regression Slopes:", sep="\n")
  print(lineslopes)
  if (lineslopes$p[3] > 0.05) {
    cat("## Interpretation: there was homogeneity of regression slopes as the interaction term (i.e., datagroup:avg_score_pre) was not statistically significant (p>0.05).", sep="\n")
  } else {
    cat("## Interpretation: the data has violated the assumption of homogeneity of regression slopes as the interaction term (i.e., datagroup:avg_score_pre) was statistically significant (p<0.05). You will not be able to undertake an ANCOVA analysis.", sep="\n")
  }

  cat("", sep="\n")
  if (nrow(outlier_variables) != 0) {
    cat("-----------", sep="\n")
    cat("# Outliers:", sep="\n")
    print(outlier_variables)
  } else {
    cat("----------", sep="\n")
    cat("# Outliers: No outlier has been found.", sep="\n")
  }
  cat("", sep="\n")
  cat("==================================", sep="\n")
  cat("Results of the main One-way ANCOVA", sep="\n")
  cat("==================================", sep="\n")
  print(res.aov)
  cat("--------------------------", sep="\n")
  cat("# Estimated Marginal Means", sep="\n")
  print(posthoc_emm)
  cat("", sep="\n")
  if (res.aov$p[1] < 0.05) {
    cat(paste0("--> A sample summary of the outputs/results above: The difference of post-test scores between the treatment and control groups turned out to be significant with pre-test scores being controlled: F(1,",res.aov$DFd[1],")=",res.aov$F[1],", p=",res.aov$p[1]," (effect size=",res.aov$ges[1],"). The adjusted marginal mean of post-test scores of the treatment group (",round(posthoc_emm$emmean[2],2),", SE=,",round(posthoc_emm$se[2],2),") was significantly different from that of the control group (",round(posthoc_emm$emmean[1],2),", SE=,",round(posthoc_emm$se[1],2),")."), sep="\n")
  } else {
    cat(paste0("--> A sample summary of the outputs/results above: The difference of post-test scores between the treatment and control groups turned out to be insignificant with pre-test scores being controlled: F(1,",res.aov$DFd[1],")=",res.aov$F[1],", p=",res.aov$p[1]," (effect size=",res.aov$ges[1],"). The adjusted marginal mean of post-test scores of the treatment group (",round(posthoc_emm$emmean[2],2),", SE=,",round(posthoc_emm$se[2],2),") was not significantly different from that of the control group (",round(posthoc_emm$emmean[1],2),", SE=,",round(posthoc_emm$se[1],2),")."), sep="\n")
  }
  sink()

options(warn=0)

}

###############################################################################
#' One-way Repeated Measures ANOVA
#'
#' This function automatically merges pre, post, and post2 data sets, runs the one-way repeated measures ANOVA with assumptions check, and then exports outputs for you all at once. Please make sure to name data files accurately (i.e., “data_treat_pre.csv”, “data_treat_post.csv”, and “data_treat_post2.csv”) and have them saved in the working directory. Find the output files "onewayrepeatedanova_outputs.txt" instantly generated by this function in the working directory.
#'
#' @export

onewayrepeatedanova <- function() {

  options(warn=-1)
  # Read all data sets
  data_treat_pre <- read_csv("data_treat_pre.csv", show_col_types = FALSE)
  data_treat_post<- read_csv("data_treat_post.csv", show_col_types = FALSE)
  data_treat_post2 <- read_csv("data_treat_post2.csv", show_col_types = FALSE)

  # Deleting students with too many skipped answers: data_treat_pre.csv-----------------
  nrow_all <- nrow(data_treat_pre)
  n <- as.numeric(length(data_treat_pre[,-1]))
  data_treat_pre <- data_treat_pre %>% mutate(m_rate = round(rowSums(is.na(data_treat_pre))/n,3))
  cat("", sep="\n")
  message("Skipped answers will be treated as incorrect in this package, and too many skipped answers may skew the results of data analysis. By default, students with more than 15% of skipped answers will be deleted from the data to prevent skewed results. If you don't agree with this rate of 15%, you need to provide a different rate below. If you're fine with 15%, just hit the 'Enter' key (i.e., no input) when you see a prompt 'Please enter an alternative rate...: ' below. If you want to apply an alternatie rate, please put it below in a decimal format like 0.2 for 20%")
  cat("", sep="\n")
  m_rate_default <- readline(prompt="Please enter an alternative rate in a decimal format (e.g., 0.1 for 10%, 0.25 for 25%): ")
  if (m_rate_default > 0) {
    data_treat_pre <- subset(data_treat_pre, m_rate < m_rate_default)
    nrow_subset <- nrow(data_treat_pre)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted from the 'data_treat_pre.csv' file is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_treat_pre <- subset(data_treat_pre, m_rate < 0.15)
    nrow_subset <- nrow(data_treat_pre)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  # Deleting students with too many skipped answers: data_treat_post.csv-----------------
  nrow_all <- nrow(data_treat_post)
  n <- as.numeric(length(data_treat_post[,-1]))
  data_treat_post <- data_treat_post %>% mutate(m_rate = round(rowSums(is.na(data_treat_post))/n,3))

  if (m_rate_default > 0) {
    data_treat_post <- subset(data_treat_post, m_rate < m_rate_default)
    nrow_subset <- nrow(data_treat_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_treat_post <- subset(data_treat_post, m_rate < 0.15)
    nrow_subset <- nrow(data_treat_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  # Deleting students with too many skipped answers: data_treat_post2.csv-----------------
  nrow_all <- nrow(data_treat_post2)
  n <- as.numeric(length(data_treat_post2[,-1]))
  data_treat_post2 <- data_treat_post2 %>% mutate(m_rate = round(rowSums(is.na(data_treat_post2))/n,3))

  if (m_rate_default > 0) {
    data_treat_post2 <- subset(data_treat_post2, m_rate < m_rate_default)
    nrow_subset <- nrow(data_treat_post2)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post2.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post2.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_treat_post2 <- subset(data_treat_post2, m_rate < 0.15)
    nrow_subset <- nrow(data_treat_post2)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post2.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post2.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  #----------------------------------------------------------------

  # Replace skipped answers with "0"
  data_treat_pre[is.na(data_treat_pre)]= 0
  data_treat_post[is.na(data_treat_post)]= 0
  data_treat_post2[is.na(data_treat_post2)]= 0

  # Change column names with their origin'sir
  colnames(data_treat_pre) <- paste( colnames(data_treat_pre), "pre", sep = "_")
  colnames(data_treat_post) <- paste( colnames(data_treat_post), "post", sep = "_")
  colnames(data_treat_post2) <- paste( colnames(data_treat_post2), "post2", sep = "_")

  # Calculate average scores
  data_treat_pre <- data_treat_pre %>% mutate(avg_score_pre = round(rowMeans(data_treat_pre[,-1]),3))
  data_treat_post <- data_treat_post %>% mutate(avg_score_post = round(rowMeans(data_treat_post[,-1]),3))
  data_treat_post2 <- data_treat_post2 %>% mutate(avg_score_post2 = round(rowMeans(data_treat_post2[,-1]),3))

  # Merge pre/post/post2 data
  data_treat_prepost<-merge(data_treat_pre, data_treat_post, by.x = "id_pre",  by.y = "id_post")
  treat_data_merged<-merge(data_treat_prepost, data_treat_post2, by.x = "id_pre",  by.y = "id_post2")
  names(treat_data_merged)[names(treat_data_merged) == 'id_pre'] <- "id"

  # Convert Data Frame to a Long Format & Define the Variable
  avg_score_df <- cbind(treat_data_merged$avg_score_pre, treat_data_merged$avg_score_post, treat_data_merged$avg_score_post2)
  df_Long <- melt(avg_score_df)
  names(df_Long) <- c("id", "Time", "Score")
  # Name Time(group) -> 1=Pre, 2=Post1, 3=Post2
  df_Long$id <- factor(df_Long$id)
  df_Long$Time <- factor(df_Long$Time,levels=c(1,2,3),labels=c("Pre", "Post1", "Post2"))

  # Descriptive Statistics
  descriptive <- df_Long %>%
    group_by(Time) %>%
    get_summary_stats(Score, type="common")
  boxplots <- ggboxplot(df_Long, x="Time", y="Score", add="point")
  print(boxplots)
  jpeg("onewayrepeatedanova_boxplots.jpeg")
  print(boxplots)
  dev.off()


  # Check Outliers
  outliers <- df_Long %>% group_by(Time) %>% identify_outliers(Score)

  # Check normality of Residuals
  res.aov <- aov(Score~Time, data=df_Long)
  shapirotest <- shapiro.test(resid(res.aov))
  Residuals <- res.aov$residuals
  hist(Residuals)
  jpeg("onewayrepeatedanova_histogram.jpeg")
  hist(Residuals)
  dev.off()
  qqplots <- ggqqplot(residuals(res.aov),title = "QQ Plots of Residuals")
  jpeg("onewayrepeatedanova_qqplots.jpeg")
  print(qqplots)
  dev.off()

  # One-way Repeated Measures ANOVA
  res.aov <- anova_test(data=df_Long, dv=Score, wid=id, within=Time)
  result <- get_anova_table(res.aov)

  # Pairwise Comparisons
  pwc <- df_Long %>% pairwise_t_test(Score~Time, paired=TRUE, p.adjust.method="bonferroni")

  # Friedman Test (Non-parametric)
  friedman_result <- friedman.test(avg_score_df)

  # Pairwise Comparisons - Friedman
  friedman_pwc <- df_Long %>%
    wilcox_test(Score~Time, paired=TRUE, p.adjust.method="bonferroni")


  # Print the results ###############################################
  cat("", sep="\n")
  cat("", sep="\n")
  cat("======================================", sep="\n")
  cat("Descriptive Statistics: Average Scores", sep="\n")
  cat("======================================", sep="\n")
  print(descriptive)
  message("Refer to the boxplots in the 'Plots' panel to visually inspect the descriptive statistics.")
  cat("", sep="\n")

  cat("==============================", sep="\n")
  cat("Results of Testing Assumptions", sep="\n")
  cat("==============================", sep="\n")
  cat("# Outliers:")
  print(outliers)
  if (outliers$is.extreme[1]==FALSE & outliers$is.extreme[2]==FALSE) {
    cat("## Interpretation: No extreme outlier was identified in your data.")
  } else {
    cat("## Interpretation: At least one extreme outlier was identified in your data, probably due to data entry errors, mesurement errors, or unusual values. you need to check the outlier(s) to understand them.You can still include the outlier(s) in the analysis if you don't believe the result will be substantially affected. This can be evaluated by comparing the result of the ANOVA with and without the outlier(s).", sep="\n")
  }

  cat("", sep="\n")
  cat("---------------------------", sep="\n")
  cat("# Normality:")
  print(shapirotest)
  if (shapirotest$p>0.05) {
    cat("--> Interpretation: the average test score was normally distributed at each time point, as assessed by Shapiro-Wilk test (p>0.05).", sep="\n")
  } else {
    cat("--> Interpretation: the assumption of normality has NOT been met; the average score was not normally distributed at least at one time point. Although the repeated measures ANOVA is known to be robust to a violation of the assumption of normality, you may want/need to refer to the result of the Friedman test, which is a non-parametric version of one-way repeated measures ANOVA in you report.", sep="\n")
  }
  message("If the sample size is greater than 50, it would be better refer to the QQ plots displayed in the 'Plots' panel to visually inspect the normality. This is because the Shapiro-Wilk test becomes very sensitive to a minor deviation from normality at a larger sample size (>50 in this case).", sep="\n")
  print(qqplots)
  cat("--> Interpretation: if all the points fall in the plots above approximately along the reference line, you can assume normality.", sep="\n")

  cat("---------------------------", sep="\n")
  cat("# Sphericity:")
  message("--> The assumption of sphericity has been checked during the computation of the ANOVA test (the Mauchly's test has been internally run to assess the sphericity assumption). Then, the Greenhouse-Geisser sphericity correction has been automatically applied to factors violating the sphericity of assumption.", sep="\n")

  cat("", sep="\n")
  cat("===============================================================", sep="\n")
  cat("Result of the main One-way Repeated Measures ANOVA (Parametric)", sep="\n")
  cat("===============================================================", sep="\n")
  print(result)
  if (result$p<0.001) {
    cat(paste0("--> Interpretation: the average test score turned out to be statistically significantly different at the different time points during the intervention: F(",result$DFn," ",result$DFd,")=",result$F,", p<0.001, eta2(g)=",result$ges,"."), sep="\n")
  } else if (result$p<0.01) {
    cat(paste0("--> Interpretation: the average test score turned out to be statistically significantly different at the different time points during the intervention: F(",result$DFn," ",result$DFd,")=",result$F,", p<0.01, eta2(g)=",result$ges,"."), sep="\n")
  } else if (result$p<0.05) {
    cat(paste0("--> Interpretation: the average test score turned out to be statistically significantly different at the different time points during the intervention: F(",result$DFn," ",result$DFd,")=",result$F,", p<0.05, eta2(g)=",result$ges,"."), sep="\n")
  } else {
    cat(paste0("--> Interpretation: the average test score didn't turn out to be statistically significantly different at the different time points during the intervention: F(",result$DFn," ",result$DFd,")=",result$F,", p>0.05, eta2(g)=",result$ges,"."), sep="\n")
  }

  cat("", sep="\n")
  cat("--------------------", sep="\n")
  cat("Pairwise Comparisons", sep="\n")
  print(pwc)
  # Between Pre and Post1
  if (pwc$p.adj[2]<0.001) {
    if((descriptive$mean[2]-descriptive$mean[1])>0) {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average post1-test score (",descriptive$mean[2],") turned out to be significant; that is, the average post1-test score was significantly greater than the average pre-test score (p.adj<0.001)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average post1-test score (",descriptive$mean[2],") turned out to be significant; that is, the average post1-test score was significantly smaller than the average pre-test score (p.adj<0.001)."), sep="\n")
    }
  } else if (pwc$p.adj[2]<0.01) {
    if((descriptive$mean[2]-descriptive$mean[1])>0) {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average post1-test score (",descriptive$mean[2],") turned out to be significant; that is, the average post1-test score was significantly greater than the average pre-test score (p.adj<0.01)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average post1-test score (",descriptive$mean[2],") turned out to be significant; that is, the average post1-test score was significantly smaller than the average pre-test score (p.adj<0.01)."), sep="\n")
    }
  } else if (pwc$p.adj[2]<0.05) {
    if((descriptive$mean[2]-descriptive$mean[1])>0) {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average post1-test score (",descriptive$mean[2],") turned out to be significant; that is, the average post1-test score was significantly greater than the average pre-test score (p.adj<0.05)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average post1-test score (",descriptive$mean[2],") turned out to be significant; that is, the average post1-test score was significantly smaller than the average pre-test score (p.adj<0.05)."), sep="\n")
    }
  } else {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average post1-test score (",descriptive$mean[2],") didn't turn out to be significant (p.adj>0.05)."), sep="\n")
  }
  # Between Post1 and Post2
  if (pwc$p.adj[1]<0.001) {
    if((descriptive$mean[3]-descriptive$mean[2])>0) {
      cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly greater than the average post1-test score (p.adj<0.001)."), sep="\n")
    } else {
    cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly smaller than the average post1-test score (p.adj<0.001)."), sep="\n")
    }
  } else if (pwc$p.adj[1]<0.01) {
    if((descriptive$mean[3]-descriptive$mean[2])>0) {
      cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly greater than the average post1-test score (p.adj<0.01)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly smaller than the average post1-test score (p.adj<0.01)."), sep="\n")
    }
  } else if (pwc$p.adj[1]<0.05) {
    if((descriptive$mean[3]-descriptive$mean[2])>0) {
      cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly greater than the average post1-test score (p.adj<0.05)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly smaller than the average post1-test score (p.adj<0.05)."), sep="\n")
    }
  } else {
    cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average post2-test score (",descriptive$mean[3],") didn't turn out to be significant (p.adj>0.05)."), sep="\n")
  }
  # Between Pre and Post2
  if (pwc$p.adj[3]<0.001) {
    if((descriptive$mean[3]-descriptive$mean[2])>0) {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly greater than the average pre-test score (p.adj<0.001)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[2],") and the average post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly smaller than the average pre-test score (p.adj<0.001)."), sep="\n")
    }
  } else if (pwc$p.adj[3]<0.01) {
    if((descriptive$mean[3]-descriptive$mean[2])>0) {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly greater than the average pre-test score (p.adj<0.01)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[2],") and the average Post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly smaller than the average pre-test score (p.adj<0.01)."), sep="\n")
    }
  } else if (pwc$p.adj[3]<0.05) {
    if((descriptive$mean[3]-descriptive$mean[2])>0) {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly greater than the average pre-test score (p.adj<0.05)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[2],") and the average post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly smaller than the average pre-test score (p.adj<0.05)."), sep="\n")
    }
  } else {
    cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average post2-test score (",descriptive$mean[3],") didn't turn out to be significant (p.adj>0.05)."), sep="\n")
  }

  # Display the result of the Friedman test only when the assumption of normality is violated
  if (shapirotest$p<0.05) {
    cat("---------------------------------------------", sep="\n")
    cat("---------------------------------------------", sep="\n")
    message("As the result of the Shapiro-Wilk normality test show above, the assumption of normality of residuals has been violated (i.e., p-value from the Shapiro-Wilk test is less than 0.05). Although the repeated measures ANOVA is fairly 'robust' to violations of normality, you may want/need to use the Friedman test results presented below:")
    cat("", sep="\n")
    cat("===============================================", sep="\n")
    cat("Friedman Rank Sum Test - Result (Nonparametric)", sep="\n")
    cat("===============================================", sep="\n")
    print(friedman_result)
    if (friedman_result$p.value<0.001) {
      cat(paste0("--> Interpretation: the median test score turned out to be statistically significantly different at the different time points during the intervention (p<0.001)."), sep="\n")
    } else if (friedman_result$p.value<0.01) {
      cat(paste0("--> Interpretation: the median test score turned out to be statistically significantly different at the different time points during the intervention (p<0.01)."), sep="\n")
    } else if (friedman_result$p.value<0.05) {
      cat(paste0("--> Interpretation: the median test score turned out to be statistically significantly different at the different time points during the intervention (p<0.05)."), sep="\n")
    } else {
      cat(paste0("--> Interpretation: the median test score didn't turn out to be statistically significantly different at the different time points during the intervention (p>0.05)."), sep="\n")
    }

    cat("", sep="\n")
    cat("---------------------------------------------", sep="\n")
    cat("Friedman Rank Sum Test - Pairwise Comparisons", sep="\n")
    print(friedman_pwc)
    # Between Pre and Post1
    if (friedman_pwc$p.adj[2]<0.001) {
      if((descriptive$median[2]-descriptive$median[1])>0) {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") turned out to be significant; that is, the median post1-test score was significantly greater than the median pre-test score (p.adj<0.001)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") turned out to be significant; that is, the median post1-test score was significantly smaller than the median pre-test score (p.adj<0.001)."), sep="\n")
      }
    } else if (friedman_pwc$p.adj[2]<0.01) {
      if((descriptive$median[2]-descriptive$median[1])>0) {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") turned out to be significant; that is, the median post1-test score was significantly greater than the median pre-test score (p.adj<0.01)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") turned out to be significant; that is, the median post1-test score was significantly smaller than the median pre-test score (p.adj<0.01)."), sep="\n")
      }
    } else if (friedman_pwc$p.adj[2]<0.05) {
      if((descriptive$median[2]-descriptive$median[1])>0) {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") turned out to be significant; that is, the median post1-test score was significantly greater than the median pre-test score (p.adj<0.05)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") turned out to be significant; that is, the median post1-test score was significantly smaller than the median pre-test score (p.adj<0.05)."), sep="\n")
      }
    } else {
      cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") didn't turn out to be significant (p.adj>0.05)."), sep="\n")
    }
    # Between Post1 and Post2
    if (friedman_pwc$p.adj[1]<0.001) {
      if((descriptive$median[3]-descriptive$median[2])>0) {
        cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly greater than the median post1-test score (p.adj<0.001)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly smaller than the median post1-test score (p.adj<0.001)."), sep="\n")
      }
    } else if (friedman_pwc$p.adj[1]<0.01) {
      if((descriptive$median[3]-descriptive$median[2])>0) {
        cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly greater than the median post1-test score (p.adj<0.01)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly smaller than the median post1-test score (p.adj<0.01)."), sep="\n")
      }
    } else if (friedman_pwc$p.adj[1]<0.05) {
      if((descriptive$median[3]-descriptive$median[2])>0) {
        cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly greater than the median post1-test score (p.adj<0.05)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly smaller than the median post1-test score (p.adj<0.05)."), sep="\n")
      }
    } else {
      cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") didn't turn out to be significant (p.adj>0.05)."), sep="\n")
    }
    # Between Pre and Post2
    if (friedman_pwc$p.adj[3]<0.001) {
      if((descriptive$median[3]-descriptive$median[2])>0) {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly greater than the median pre-test score (p.adj<0.001)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly smaller than the median pre-test score (p.adj<0.001)."), sep="\n")
      }
    } else if (friedman_pwc$p.adj[3]<0.01) {
      if((descriptive$median[3]-descriptive$median[2])>0) {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly greater than the median pre-test score (p.adj<0.01)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly smaller than the median pre-test score (p.adj<0.01)."), sep="\n")
      }
    } else if (friedman_pwc$p.adj[3]<0.05) {
      if((descriptive$median[3]-descriptive$median[2])>0) {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly greater than the median pre-test score (p.adj<0.05)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly smaller than the median pre-test score (p.adj<0.05)."), sep="\n")
      }
    } else {
      cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post2-test score (",descriptive$median[3],") didn't turn out to be significant (p.adj>0.05)."), sep="\n")
    }
  }

  # Export the main one-way repeated ANOVA and post-hoc analysis results to report
  sink("onewayrepeatedanova_outputs.txt")
  cat("======================================", sep="\n")
  cat("Descriptive Statistics: Average Scores", sep="\n")
  cat("======================================", sep="\n")
  print(descriptive)
  cat("Refer to the boxplots separately exported into the working directory to visually inspect the descriptive statistics.")
  cat("", sep="\n")

  cat("==============================", sep="\n")
  cat("Results of Testing Assumptions", sep="\n")
  cat("==============================", sep="\n")
  cat("# Outliers:")
  print(outliers)
  if (outliers$is.extreme[1]==FALSE & outliers$is.extreme[2]==FALSE) {
    cat("## Interpretation: No extreme outlier was identified in your data.")
  } else {
    cat("## Interpretation: At least one extreme outlier was identified in your data, probably due to data entry errors, mesurement errors, or unusual values. you need to check the outlier(s) to understand them.You can still include the outlier(s) in the analysis if you don't believe the result will be substantially affected. This can be evaluated by comparing the result of the ANOVA with and without the outlier(s).", sep="\n")
  }

  cat("", sep="\n")
  cat("---------------------------", sep="\n")
  cat("# Normality:")
  print(shapirotest)
  if (shapirotest$p>0.05) {
    cat("--> Interpretation: the average test score was normally distributed at each time point, as assessed by Shapiro-Wilk test (p>0.05).", sep="\n")
  } else {
    cat("--> Interpretation: the assumption of normality has NOT been met; the average score was not normally distributed at least at one time point. Although the repeated measures ANOVA is known to be robust to a violation of the assumption of normality, you may want/need to refer to the result of the Friedman test, which is a non-parametric version of one-way repeated measures ANOVA in you report.", sep="\n")
  }
    cat("If the sample size is greter than 50, you'd better refer to the QQ plots displayed in the 'Plots' panel to visually inspect the normality. This is because the Shapiro-Wilk test becomes very sensitive to a minor deviation from normality at a larger sample size (>50 in this case).", sep="\n")
    print(qqplots)
    cat("--> Interpretation: if all the points fall in the plots above approximately along the reference line, you can assume normality.", sep="\n")

  cat("---------------------------", sep="\n")
  cat("# Sphericity:")
  cat("--> The assumption of sphericity has been checked during the computation of the ANOVA test (the Mauchly's test has been internally run to assess the sphericity assumption). Then, the Greenhouse-Geisser sphericity correction has been automatically applied to factors violating the sphericity of assumption.", sep="\n")

  cat("", sep="\n")
  cat("===============================================================", sep="\n")
  cat("Result of the main One-way Repeated Measures ANOVA (Parametric)", sep="\n")
  cat("===============================================================", sep="\n")
  print(result)
  if (result$p<0.001) {
    cat(paste0("--> Interpretation: the average test score turned out to be statistically significantly different at the different time points during the intervention: F(",result$DFn," ",result$DFd,")=",result$F,", p<0.001, eta2(g)=",result$ges,"."), sep="\n")
  } else if (result$p<0.01) {
    cat(paste0("--> Interpretation: the average test score turned out to be statistically significantly different at the different time points during the intervention: F(",result$DFn," ",result$DFd,")=",result$F,", p<0.01, eta2(g)=",result$ges,"."), sep="\n")
  } else if (result$p<0.05) {
    cat(paste0("--> Interpretation: the average test score turned out to be statistically significantly different at the different time points during the intervention: F(",result$DFn," ",result$DFd,")=",result$F,", p<0.05, eta2(g)=",result$ges,"."), sep="\n")
  } else {
    cat(paste0("--> Interpretation: the average test score didn't turn out to be statistically significantly different at the different time points during the intervention: F(",result$DFn," ",result$DFd,")=",result$F,", p>0.05, eta2(g)=",result$ges,"."), sep="\n")
  }

  cat("", sep="\n")
  cat("--------------------", sep="\n")
  cat("Pairwise Comparisons", sep="\n")
  print(pwc)
  # Between Pre and Post1
  if (pwc$p.adj[2]<0.001) {
    if((descriptive$mean[2]-descriptive$mean[1])>0) {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average Post1-test score (",descriptive$mean[2],") turned out to be significant; that is, the average post1-test score was significantly greater than the average pre-test score (p.adj<0.001)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average Post1-test score (",descriptive$mean[2],") turned out to be significant; that is, the average post1-test score was significantly smaller than the average pre-test score (p.adj<0.001)."), sep="\n")
    }
  } else if (pwc$p.adj[2]<0.01) {
    if((descriptive$mean[2]-descriptive$mean[1])>0) {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average Post1-test score (",descriptive$mean[2],") turned out to be significant; that is, the average post1-test score was significantly greater than the average pre-test score (p.adj<0.01)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average Post1-test score (",descriptive$mean[2],") turned out to be significant; that is, the average post1-test score was significantly smaller than the average pre-test score (p.adj<0.01)."), sep="\n")
    }
  } else if (pwc$p.adj[2]<0.05) {
    if((descriptive$mean[2]-descriptive$mean[1])>0) {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average Post1-test score (",descriptive$mean[2],") turned out to be significant; that is, the average post1-test score was significantly greater than the average pre-test score (p.adj<0.05)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average Post1-test score (",descriptive$mean[2],") turned out to be significant; that is, the average post1-test score was significantly smaller than the average pre-test score (p.adj<0.05)."), sep="\n")
    }
  } else {
    cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average Post1-test score (",descriptive$mean[2],") didn't turn out to be significant (p.adj>0.05)."), sep="\n")
  }
  # Between Post1 and Post2
  if (pwc$p.adj[1]<0.001) {
    if((descriptive$mean[3]-descriptive$mean[2])>0) {
      cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average Post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly greater than the average post1-test score (p.adj<0.001)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average Post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly smaller than the average post1-test score (p.adj<0.001)."), sep="\n")
    }
  } else if (pwc$p.adj[1]<0.01) {
    if((descriptive$mean[3]-descriptive$mean[2])>0) {
      cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average Post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly greater than the average post1-test score (p.adj<0.01)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average Post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly smaller than the average post1-test score (p.adj<0.01)."), sep="\n")
    }
  } else if (pwc$p.adj[1]<0.05) {
    if((descriptive$mean[3]-descriptive$mean[2])>0) {
      cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average Post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly greater than the average post1-test score (p.adj<0.05)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average Post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly smaller than the average post1-test score (p.adj<0.05)."), sep="\n")
    }
  } else {
    cat(paste0("--> The difference between the average post1-test score (",descriptive$mean[2],") and the average Post2-test score (",descriptive$mean[3],") didn't turn out to be significant (p.adj>0.05)."), sep="\n")
  }
  # Between Pre and Post2
  if (pwc$p.adj[3]<0.001) {
    if((descriptive$mean[3]-descriptive$mean[2])>0) {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average Post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly greater than the average pre-test score (p.adj<0.001)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[2],") and the average Post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly smaller than the average pre-test score (p.adj<0.001)."), sep="\n")
    }
  } else if (pwc$p.adj[3]<0.01) {
    if((descriptive$mean[3]-descriptive$mean[2])>0) {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average Post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly greater than the average pre-test score (p.adj<0.01)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[2],") and the average Post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly smaller than the average pre-test score (p.adj<0.01)."), sep="\n")
    }
  } else if (pwc$p.adj[3]<0.05) {
    if((descriptive$mean[3]-descriptive$mean[2])>0) {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average Post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly greater than the average pre-test score (p.adj<0.05)."), sep="\n")
    } else {
      cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[2],") and the average Post2-test score (",descriptive$mean[3],") turned out to be significant; that is, the average post2-test score was significantly smaller than the average pre-test score (p.adj<0.05)."), sep="\n")
    }
  } else {
    cat(paste0("--> The difference between the average pre-test score (",descriptive$mean[1],") and the average Post2-test score (",descriptive$mean[3],") didn't turn out to be significant (p.adj>0.05)."), sep="\n")
  }

  # Display the result of the Friedman test only when the assumption of normality is violated
  if (shapirotest$p<0.05) {
    cat("---------------------------------------------", sep="\n")
    cat("---------------------------------------------", sep="\n")
    cat("As the result of the Shapiro-Wilk normality test show above, the assumption of normality of residuals has been violated (i.e., p-value from the Shapiro-Wilk test is less than 0.05). Although the repeated measures ANOVA is fairly 'robust' to violations of normality, you may want/need to use the Friedman test results presented below:")
    cat("", sep="\n")
    cat("===============================================", sep="\n")
    cat("Friedman Rank Sum Test - Result (Nonparametric)", sep="\n")
    cat("===============================================", sep="\n")
    print(friedman_result)
    if (friedman_result$p.value<0.001) {
      cat(paste0("--> Interpretation: the median test score turned out to be statistically significantly different at the different time points during the intervention (p<0.001)."), sep="\n")
    } else if (friedman_result$p.value<0.01) {
      cat(paste0("--> Interpretation: the median test score turned out to be statistically significantly different at the different time points during the intervention (p<0.01)."), sep="\n")
    } else if (friedman_result$p.value<0.05) {
      cat(paste0("--> Interpretation: the median test score turned out to be statistically significantly different at the different time points during the intervention (p<0.05)."), sep="\n")
    } else {
      cat(paste0("--> Interpretation: the median test score didn't turn out to be statistically significantly different at the different time points during the intervention (p>0.05)."), sep="\n")
    }

    cat("", sep="\n")
    cat("---------------------------------------------", sep="\n")
    cat("Friedman Rank Sum Test - Pairwise Comparisons", sep="\n")
    print(friedman_pwc)
    # Between Pre and Post1
    if (friedman_pwc$p.adj[2]<0.001) {
      if((descriptive$median[2]-descriptive$median[1])>0) {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") turned out to be significant; that is, the median post1-test score was significantly greater than the median pre-test score (p.adj<0.001)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") turned out to be significant; that is, the median post1-test score was significantly smaller than the median pre-test score (p.adj<0.001)."), sep="\n")
      }
    } else if (friedman_pwc$p.adj[2]<0.01) {
      if((descriptive$median[2]-descriptive$median[1])>0) {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") turned out to be significant; that is, the median post1-test score was significantly greater than the median pre-test score (p.adj<0.01)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") turned out to be significant; that is, the median post1-test score was significantly smaller than the median pre-test score (p.adj<0.01)."), sep="\n")
      }
    } else if (friedman_pwc$p.adj[2]<0.05) {
      if((descriptive$median[2]-descriptive$median[1])>0) {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") turned out to be significant; that is, the median post1-test score was significantly greater than the median pre-test score (p.adj<0.05)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") turned out to be significant; that is, the median post1-test score was significantly smaller than the median pre-test score (p.adj<0.05)."), sep="\n")
      }
    } else {
      cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post1-test score (",descriptive$median[2],") didn't turn out to be significant (p.adj>0.05)."), sep="\n")
    }
    # Between Post1 and Post2
    if (friedman_pwc$p.adj[1]<0.001) {
      if((descriptive$median[3]-descriptive$median[2])>0) {
        cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly greater than the median post1-test score (p.adj<0.001)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly smaller than the median post1-test score (p.adj<0.001)."), sep="\n")
      }
    } else if (friedman_pwc$p.adj[1]<0.01) {
      if((descriptive$median[3]-descriptive$median[2])>0) {
        cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly greater than the median post1-test score (p.adj<0.01)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly smaller than the median post1-test score (p.adj<0.01)."), sep="\n")
      }
    } else if (friedman_pwc$p.adj[1]<0.05) {
      if((descriptive$median[3]-descriptive$median[2])>0) {
        cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly greater than the median post1-test score (p.adj<0.05)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly smaller than the median post1-test score (p.adj<0.05)."), sep="\n")
      }
    } else {
      cat(paste0("--> The difference between the median post1-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") didn't turn out to be significant (p.adj>0.05)."), sep="\n")
    }
    # Between Pre and Post2
    if (friedman_pwc$p.adj[3]<0.001) {
      if((descriptive$median[3]-descriptive$median[2])>0) {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly greater than the median pre-test score (p.adj<0.001)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly smaller than the median pre-test score (p.adj<0.001)."), sep="\n")
      }
    } else if (friedman_pwc$p.adj[3]<0.01) {
      if((descriptive$median[3]-descriptive$median[2])>0) {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly greater than the median pre-test score (p.adj<0.01)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly smaller than the median pre-test score (p.adj<0.01)."), sep="\n")
      }
    } else if (friedman_pwc$p.adj[3]<0.05) {
      if((descriptive$median[3]-descriptive$median[2])>0) {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly greater than the median pre-test score (p.adj<0.05)."), sep="\n")
      } else {
        cat(paste0("--> The difference between the median pre-test score (",descriptive$median[2],") and the median post2-test score (",descriptive$median[3],") turned out to be significant; that is, the median post2-test score was significantly smaller than the median pre-test score (p.adj<0.05)."), sep="\n")
      }
    } else {
      cat(paste0("--> The difference between the median pre-test score (",descriptive$median[1],") and the median post2-test score (",descriptive$median[3],") didn't turn out to be significant (p.adj>0.05)."), sep="\n")
    }
  }
  sink()

options(warn=0)
}

###############################################################################
#' Demographic Group Differences
#'
#' This function automatically combines demographic variables to a data set, runs the analysis of variance (ANOVA) with assumptions check to examine demographic sub-group differences, and then exports outputs for you all at once. Please make sure to name data files accurately and have them saved in the working directory. Find the output files "subgroupdifferences_outputs.txt" instantly generated by this function in the working directory.
#'
#' @param csv_data This function requires a csv data file name: "data_treat_pre.csv", "data_treat_post.csv".
#'
#' @export

demogroupdiff <- function(csv_data) {

  options(warn=-1)

  # Selecting types
    without_any_punc_filename <- gsub("[[:punct:]]", " ",tools::file_path_sans_ext(csv_data))
    if (grepl("\\bpre\\b", without_any_punc_filename, ignore.case=TRUE) ) {
      file_name="pre"
    } else if (grepl("\\bpost\\b", without_any_punc_filename, ignore.case=TRUE)) {
      file_name="post"
    } else { print("unavailable filename with pre and post")}

    # Selecting types
    if (grepl("\\bctrl\\b", without_any_punc_filename, ignore.case=TRUE) ) {
      group_name="ctrl"
    } else if (grepl("\\btreat\\b", without_any_punc_filename, ignore.case=TRUE) ) {
      group_name="treat"
    } else {
      print("unavailable filename with ctrl and treat ")}

    # Reading
    data_original <- read_csv(csv_data,col_types = cols())
    demographic_data <- read_csv("demographic_data.csv", show_col_types = FALSE)

    # Deleting students with too many skipped answers: data_original.csv-----------------
    nrow_all <- nrow(data_original)
    n <- as.numeric(length(data_original[,-1]))
    data_original <- data_original %>% mutate(m_rate = round(rowSums(is.na(data_original))/n,3))
    cat("", sep="\n")
    message("Skipped answers will be treated as incorrect in this package, and too many skipped answers may skew the results of data analysis. By default, students with more than 15% of skipped answers will be deleted from the data to prevent skewed results. If you don't agree with this rate of 15%, you need to provide a different rate below. If you're fine with 15%, just hit the 'Enter' key (i.e., no input) when you see a prompt 'Please enter an alternative rate...: ' below. If you want to apply an alternatie rate, please put it below in a decimal format like 0.2 for 20%")
    cat("", sep="\n")
    m_rate_default <- readline(prompt="Please enter an alternative rate in a decimal format (e.g., 0.1 for 10%, 0.25 for 25%): ")
    if (m_rate_default > 0) {
      data_original <- subset(data_original, m_rate < m_rate_default)
      nrow_subset <- nrow(data_original)
      n_students_deleted <- nrow_all-nrow_subset
      if (n_students_deleted > 0) {
        cat("", sep="\n")
        cat(paste0("Students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted from the 'data_original.csv' file is ",n_students_deleted,"."))
      } else {
        cat("", sep="\n")
        cat(paste0("The number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
      }
    } else {
      data_original <- subset(data_original, m_rate < 0.15)
      nrow_subset <- nrow(data_original)
      n_students_deleted <- nrow_all-nrow_subset
      if (n_students_deleted > 0) {
        cat("", sep="\n")
        cat(paste0("Students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
      } else {
        cat("", sep="\n")
        cat(paste0("The number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."), sep="\n")
      }
    }
    #----------------------------------------------------------------

  data_original <- read.csv("data_treat_pre.csv")

  # Replace skipped answers with "0"
  data_original[is.na(data_original)]= 0

  # Calculate average scores
  data_original <- data_original %>% mutate(avg_score = round(rowMeans(data_original[,-1]),3))

  # Merge test data and demographic data
  data_original <- merge(data_original, demographic_data, by.x = "id",  by.y = "id")

  # Take demographic variable name and create a subset of the dataframe with average score and demographic data
  message("############")
  nd <- ncol(demographic_data)
  for (i in 2:nd) {
    message(paste0(colnames(demographic_data[i]),"=",i-1))
  }
  message("############")
  cat("Refer to the demographic variables listed right above.")
  dn <- as.numeric(readline(prompt="Enter the number assigned to the demographic variable name you want to analyze: "))
  group_name <- colnames(demographic_data[dn+1])
  data_original <- select(data_original, c('avg_score', all_of(group_name)))
  names(data_original) <- c("average_score", "group")
  data_original <- data_original[complete.cases(data_original),]

  # Descriptive statistics
  descriptive <- data_original %>% group_by(group) %>% get_summary_stats(average_score, type = "mean_sd")
  boxplots <- ggboxplot(data_original, x = "group", y = "average_score", add="point")
  jpeg("demogroupdiff_boxplots.jpeg")
  print(boxplots)
  dev.off()

  # Conduct one-way ANOVA and pairwise sub-group comparisons
  one_way <- aov(average_score ~ factor(group), data=data_original)
  pwc <- TukeyHSD(one_way)

  one_way_unequal <- data_original %>% welch_anova_test(average_score ~ group)
  pwc2 <- data_original %>% games_howell_test(average_score ~ group)

  # Check Outliers
  outliers <- data_original %>% group_by(group) %>% identify_outliers(average_score)

  # Check normality of residuals
  shapirotest <- shapiro.test(resid(one_way))
  qqplots <- ggqqplot(residuals(one_way),title = "QQ Plots")
  jpeg("demogroupdiff_qqplots.jpeg")
  print(qqplots)
  dev.off()
  hist(residuals(one_way))
  jpeg("demogroupdiff_histogram.jpeg")
  hist(residuals(one_way))
  dev.off()

  # Check homogeneity of variances
  levene_result <- leveneTest(average_score ~ factor(group), data=data_original)
  plot(one_way,1)
  jpeg("demogroupdiff_rfplots.jpeg")
  plot(one_way,1)
  dev.off()

  ## Kruskal-Wallis Test: Non-parametric version of one-way ANOVA
  np_one_way <- kruskal.test(average_score ~ group, data=data_original)
  np_pwc <- pairwise.wilcox.test(data_original$average_score, data_original$group,
                                 p.adjust.method = "BH")

  # Print the results
  cat("", sep="\n")
  cat("", sep="\n")
  cat("======================", sep="\n")
  cat("Descriptive Statistics", sep="\n")
  cat("======================", sep="\n")
  message("Refer to the boxplot in the 'Plots' panel. The boxplot has also been exported to the working directory.")
  print(descriptive)

  cat("", sep="\n")
  cat("==============================", sep="\n")
  cat("Results of Testing Assumptions", sep="\n")
  cat("==============================", sep="\n")
  cat("# Normality of Residuals:", sep="\n")
  message("Refer to the histogram and the normal Q-Q plot in the 'Plots' panel to visually inspect the normality of residuals. The histogram and Q-Q plot have also been exported to the working directory.")
  print(shapirotest)
  if (shapirotest$p.value > 0.05) {
    cat("## Interpretation: the assumption of normality by group has been met (p>0.05).", sep="\n")
  } else {
    cat("## Interpretation: the assumption of normality by group has NOT been met (p<0.05). Although ANCOVA is robust to a violation of the assumption of normality of residuals, you may want to mention this violation in you report. For example, you can say: 'The data has slightly violated the assumption of normality of residuals, but ANCOVA is known to be robust to this violation (so it's not a serious issue).", sep="\n")
  }

  cat("-------------------------", sep="\n")
  cat("Homogeneity of Variances:", sep="\n")
  message("Refer to the 'Residuals vs Fitted' plot in the 'Plots' panel. The plot has also been exported to the working directory.")
  print(levene_result)
  if (levene_result$`Pr(>F)`[1] > 0.05) {
    cat("## Interpretation: the assumption of equality of variances has been met (p>0.05).", sep="\n")
  } else {
    cat("## Interpretation: the assumption of equality of variances has NOT been met (p<0.05).", sep="\n")
  }

  cat("", sep="\n")
  if (levene_result$`Pr(>F)`[1]>0.05) {
  cat("===================================================================================", sep="\n")
  cat("Results of One-way ANOVA: Group Difference(s) (Parametric: Equal variances assumed)", sep="\n")
  cat("===================================================================================", sep="\n")
  print(summary(one_way))
  cat("----------------------------------------------", sep="\n")
  cat("Pairwide Comparisons (Equal variances assumed)", sep="\n")
  print(pwc)
  } else {
    message("Since the assumption of equal variances has NOT been satisfied, the Welch one-way test and the Games-Howell post-hoc test results are presented below.")
    cat("=====================================================================================", sep="\n")
    cat("Results of One-way ANOVA: Group Difference(s) (Parametric: Unequal variances assumed)")
    cat("=====================================================================================", sep="\n")
    print(summary(one_way2))
    cat("--------------------", sep="\n")
    cat("Pairwide Comparisons (Unequal variances assumed)", sep="\n")
    print(pwc2)
  }

  if (shapirotest$p.value < 0.05) {
    cat("----------------------------------------------------------", sep="\n")
    cat("----------------------------------------------------------", sep="\n")
    message("As shown above in the the result of the Shapiro-Wilk test, the assumption of normality is violated. Although ANOVA is known to be robust to a violation of normality, you may want/need to use the Kruskal-Wallis test result presented below")
    cat("===============================================", sep="\n")
    cat("Results of Kruskal_Wallis Test (Non-parametric)", sep="\n")
    cat("===============================================", sep="\n")
    print(np_one_way)
    print(np_pwc)
  }

  # Export a summary of the results
  sink(paste0("demogroupdiff_outputs_summary_", group_name,"_",file_name,".txt"))
  cat("======================", sep="\n")
  cat("Descriptive Statistics", sep="\n")
  cat("======================", sep="\n")
  cat("Refer to the boxplot in the 'Plots' panel. The boxplot has also been exported to the working directory.", sep="\n")
  print(descriptive)

  cat("", sep="\n")
  cat("==============================", sep="\n")
  cat("Results of Testing Assumptions", sep="\n")
  cat("==============================", sep="\n")
  cat("# Normality of Residuals:", sep="\n")
  cat("Refer to the histogram and the normal Q-Q plot in the 'Plots' panel to visually inspect the normality of residuals. The histogram and Q-Q plot have also been exported to the working directory.", sep="\n")
  print(shapirotest)
  if (shapirotest$p.value > 0.05) {
    cat("## Interpretation: the assumption of normality by group has been met (p>0.05).", sep="\n")
  } else {
    cat("## Interpretation: the assumption of normality by group has NOT been met (p<0.05). Although ANCOVA is robust to a violation of the assumption of normality of residuals, you may want to mention this violation in you report. For example, you can say: 'The data has slightly violated the assumption of normality of residuals, but ANCOVA is known to be robust to this violation (so it's not a serious issue).", sep="\n")
  }

  cat("-------------------------", sep="\n")
  cat("Homogeneity of Variances:", sep="\n")
  cat("Refer to the 'Residuals vs Fitted' plot in the 'Plots' panel. The plot has also been exported to the working directory.", sep="\n")
  print(levene_result)
  if (levene_result$`Pr(>F)`[1] > 0.05) {
    cat("## Interpretation: the assumption of equality of variances has been met (p>0.05).", sep="\n")
  } else {
    cat("## Interpretation: the assumption of equality of variances has NOT been met (p<0.05).", sep="\n")
  }

  cat("", sep="\n")
  if (levene_result$`Pr(>F)`[1]>0.05) {
    cat("===================================================================================", sep="\n")
    cat("Results of One-way ANOVA: Group Difference(s) (Parametric: Equal variances assumed)", sep="\n")
    cat("===================================================================================", sep="\n")
    print(summary(one_way))
    cat("----------------------------------------------", sep="\n")
    cat("Pairwide Comparisons (Equal variances assumed)", sep="\n")
    print(pwc)
  } else {
    cat("Since the assumption of equal variances has NOT been satisfied, the Welch one-way test and the Games-Howell post-hoc test results are presented below.", sep="\n")
    cat("=====================================================================================", sep="\n")
    cat("Results of One-way ANOVA: Group Difference(s) (Parametric: Unequal variances assumed)")
    cat("=====================================================================================", sep="\n")
    print(summary(one_way2))
    cat("--------------------", sep="\n")
    cat("Pairwide Comparisons (Unequal variances assumed)", sep="\n")
    print(pwc2)
  }

  if (shapirotest$p.value < 0.05) {
    cat("----------------------------------------------------------", sep="\n")
    cat("----------------------------------------------------------", sep="\n")
    cat("As shown above in the the result of the Shapiro-Wilk test, the assumption of normality is violated. Although ANOVA is known to be robust to a violation of normality, you may want/need to use the Kruskal-Wallis test result presented below.", sep="\n")
    cat("===============================================", sep="\n")
    cat("Results of Kruskal_Wallis Test (Non-parametric)", sep="\n")
    cat("===============================================", sep="\n")
    print(np_one_way)
    print(np_pwc)
  }
  sink()

  options(warn=0)

}


###### Fun(?) Time to Test It ######

# loadpackages()

# itemanalysis("data_ctrl_pre.csv")
# itemanalysis("data_ctrl_post.csv")
# itemanalysis("data_treat_pre.csv")
# itemanalysis("data_treat_post.csv")

# onewayancova()

# pairedsamples()

# independentsamples()

# onewayrepeatedanova()

# demogroupdiff("data_ctrl_pre.csv")
# demogroupdiff("data_ctrl_post.csv")
# demogroupdiff("data_treat_pre.csv")
# demogroupdiff("data_treat_post.csv")



###############################################################################
#' two-way Repeated Measures ANOVA
#'
#' This function automatically merges pre, post, and post2 data sets, binds intervention and control group data sets, runs the two-way repeated measures ANOVA with assumptions check, and then displays/exports outputs all at once. Please make sure to name data files accurately (i.e., “data_treat_pre.csv”, “data_treat_post.csv”, and “data_treat_post2.csv”, “data_ctrl_pre.csv”, “data_ctrl_post.csv”, and “data_ctrl_post2.csv”) and have them saved in the working directory. Find the output files "twowayrepeatedanova_outputs.txt" instantly generated by this function in the working directory.
#'
#' @export

twowayrepeatedanova <- function() {

  options(warn=-1)
  # Read all data sets
  data_treat_pre <- read_csv("data_treat_pre.csv", show_col_types = FALSE)
  data_treat_post<- read_csv("data_treat_post.csv", show_col_types = FALSE)
  data_treat_post2 <- read_csv("data_treat_post2.csv", show_col_types = FALSE)

  data_ctrl_pre <- read_csv("data_ctrl_pre.csv", show_col_types = FALSE)
  data_ctrl_post<- read_csv("data_ctrl_post.csv", show_col_types = FALSE)
  data_ctrl_post2 <- read_csv("data_ctrl_post2.csv", show_col_types = FALSE)

  # Deleting students with too many skipped answers: data_treat_pre.csv-----------------
  nrow_all <- nrow(data_treat_pre)
  n <- as.numeric(length(data_treat_pre[,-1]))
  data_treat_pre <- data_treat_pre %>% mutate(m_rate = round(rowSums(is.na(data_treat_pre))/n,3))
  cat("", sep="\n")
  message("Skipped answers will be treated as incorrect in this package, and too many skipped answers may skew the results of data analysis. By default, students with more than 15% of skipped answers will be deleted from the data to prevent skewed results. If you don't agree with this rate of 15%, you need to provide a different rate below. If you're fine with 15%, just hit the 'Enter' key (i.e., no input) when you see a prompt 'Please enter an alternative rate...: ' below. If you want to apply an alternatie rate, please put it below in a decimal format like 0.2 for 20%")
  cat("", sep="\n")
  m_rate_default <- readline(prompt="Please enter an alternative rate in a decimal format (e.g., 0.1 for 10%, 0.25 for 25%): ")
  if (m_rate_default > 0) {
    data_treat_pre <- subset(data_treat_pre, m_rate < m_rate_default)
    nrow_subset <- nrow(data_treat_pre)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted from the 'data_treat_pre.csv' file is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_treat_pre <- subset(data_treat_pre, m_rate < 0.15)
    nrow_subset <- nrow(data_treat_pre)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_treat_pre.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  # Deleting students with too many skipped answers: data_treat_post.csv-----------------
  nrow_all <- nrow(data_treat_post)
  n <- as.numeric(length(data_treat_post[,-1]))
  data_treat_post <- data_treat_post %>% mutate(m_rate = round(rowSums(is.na(data_treat_post))/n,3))

  if (m_rate_default > 0) {
    data_treat_post <- subset(data_treat_post, m_rate < m_rate_default)
    nrow_subset <- nrow(data_treat_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_treat_post <- subset(data_treat_post, m_rate < 0.15)
    nrow_subset <- nrow(data_treat_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  # Deleting students with too many skipped answers: data_treat_post2.csv-----------------
  nrow_all <- nrow(data_treat_post2)
  n <- as.numeric(length(data_treat_post2[,-1]))
  data_treat_post2 <- data_treat_post2 %>% mutate(m_rate = round(rowSums(is.na(data_treat_post2))/n,3))

  if (m_rate_default > 0) {
    data_treat_post2 <- subset(data_treat_post2, m_rate < m_rate_default)
    nrow_subset <- nrow(data_treat_post2)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post2.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post2.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_treat_post2 <- subset(data_treat_post2, m_rate < 0.15)
    nrow_subset <- nrow(data_treat_post2)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post2.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_treat_post2.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  #----------------------------------------------------------------

  # Deleting students with too many skipped answers: data_ctrl_pre.csv-----------------
  nrow_all <- nrow(data_ctrl_pre)
  n <- as.numeric(length(data_ctrl_pre[,-1]))
  data_ctrl_pre <- data_ctrl_pre %>% mutate(m_rate = round(rowSums(is.na(data_ctrl_pre))/n,3))
  if (m_rate_default > 0) {
    data_ctrl_pre <- subset(data_ctrl_pre, m_rate < m_rate_default)
    nrow_subset <- nrow(data_ctrl_pre)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_ctrl_pre.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted from the 'data_ctrl_pre.csv' file is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_ctrl_pre.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_ctrl_pre <- subset(data_ctrl_pre, m_rate < 0.15)
    nrow_subset <- nrow(data_ctrl_pre)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_ctrl_pre.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("Regarding the 'data_ctrl_pre.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  # Deleting students with too many skipped answers: data_ctrl_post.csv-----------------
  nrow_all <- nrow(data_ctrl_post)
  n <- as.numeric(length(data_ctrl_post[,-1]))
  data_ctrl_post <- data_ctrl_post %>% mutate(m_rate = round(rowSums(is.na(data_ctrl_post))/n,3))

  if (m_rate_default > 0) {
    data_ctrl_post <- subset(data_ctrl_post, m_rate < m_rate_default)
    nrow_subset <- nrow(data_ctrl_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_ctrl_post <- subset(data_ctrl_post, m_rate < 0.15)
    nrow_subset <- nrow(data_ctrl_post)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  # Deleting students with too many skipped answers: data_ctrl_post2.csv-----------------
  nrow_all <- nrow(data_ctrl_post2)
  n <- as.numeric(length(data_ctrl_post2[,-1]))
  data_ctrl_post2 <- data_ctrl_post2 %>% mutate(m_rate = round(rowSums(is.na(data_ctrl_post2))/n,3))

  if (m_rate_default > 0) {
    data_ctrl_post2 <- subset(data_ctrl_post2, m_rate < m_rate_default)
    nrow_subset <- nrow(data_ctrl_post2)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post2.csv' file, students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post2.csv' file, the number of students with more than ",as.numeric(m_rate_default)*100,"% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  } else {
    data_ctrl_post2 <- subset(data_ctrl_post2, m_rate < 0.15)
    nrow_subset <- nrow(data_ctrl_post2)
    n_students_deleted <- nrow_all-nrow_subset
    if (n_students_deleted > 0) {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post2.csv' file, students with more than 15% of skipped answers have been deleted from the dataframe. The number of students deleted is ",n_students_deleted,"."))
    } else {
      cat("", sep="\n")
      cat(paste0("For the 'data_ctrl_post2.csv' file, the number of students with more than 15% of skipped answers is zero, so no student has been deleted from the dataframe."))
    }
  }
  #----------------------------------------------------------------

  # Replace skipped answers with "0"
  data_treat_pre[is.na(data_treat_pre)]= 0
  data_treat_post[is.na(data_treat_post)]= 0
  data_treat_post2[is.na(data_treat_post2)]= 0

  data_ctrl_pre[is.na(data_ctrl_pre)]= 0
  data_ctrl_post[is.na(data_ctrl_post)]= 0
  data_ctrl_post2[is.na(data_ctrl_post2)]= 0

  # Change column names with their origin'sir
  colnames(data_treat_pre) <- paste( colnames(data_treat_pre), "pre", sep = "_")
  colnames(data_treat_post) <- paste( colnames(data_treat_post), "post", sep = "_")
  colnames(data_treat_post2) <- paste( colnames(data_treat_post2), "post2", sep = "_")

  colnames(data_ctrl_pre) <- paste( colnames(data_ctrl_pre), "pre", sep = "_")
  colnames(data_ctrl_post) <- paste( colnames(data_ctrl_post), "post", sep = "_")
  colnames(data_ctrl_post2) <- paste( colnames(data_ctrl_post2), "post2", sep = "_")

  # Calculate average scores
  data_treat_pre <- data_treat_pre %>% mutate(avg_score_pre = round(rowMeans(data_treat_pre[,-1]),3))
  data_treat_post <- data_treat_post %>% mutate(avg_score_post = round(rowMeans(data_treat_post[,-1]),3))
  data_treat_post2 <- data_treat_post2 %>% mutate(avg_score_post2 = round(rowMeans(data_treat_post2[,-1]),3))

  data_ctrl_pre <- data_ctrl_pre %>% mutate(avg_score_pre = round(rowMeans(data_ctrl_pre[,-1]),3))
  data_ctrl_post <- data_ctrl_post %>% mutate(avg_score_post = round(rowMeans(data_ctrl_post[,-1]),3))
  data_ctrl_post2 <- data_ctrl_post2 %>% mutate(avg_score_post2 = round(rowMeans(data_ctrl_post2[,-1]),3))

  # Merge pre/post/post2 data
  data_treat_prepost<-merge(data_treat_pre, data_treat_post, by.x = "id_pre",  by.y = "id_post")
  treat_data_merged<-merge(data_treat_prepost, data_treat_post2, by.x = "id_pre",  by.y = "id_post2")
  names(treat_data_merged)[names(treat_data_merged) == 'id_pre'] <- "id"
  treat_data_merged <- treat_data_merged %>% mutate(datagroup=1)
  treat_data_merged <- treat_data_merged[ , c("id", "datagroup", "avg_score_pre", "avg_score_post", "avg_score_post2")]

  data_ctrl_prepost<-merge(data_ctrl_pre, data_ctrl_post, by.x = "id_pre",  by.y = "id_post")
  ctrl_data_merged<-merge(data_ctrl_prepost, data_ctrl_post2, by.x = "id_pre",  by.y = "id_post2")
  names(ctrl_data_merged)[names(ctrl_data_merged) == 'id_pre'] <- "id"
  ctrl_data_merged <- ctrl_data_merged %>% mutate(datagroup=0)
  ctrl_data_merged <- ctrl_data_merged[ , c("id", "datagroup", "avg_score_pre", "avg_score_post", "avg_score_post2")]

  # Bind treat/control data and change column names for reshaping ("long")
  full_data_binded <- rbind(ctrl_data_merged, treat_data_merged)
  names(full_data_binded) <- c("id", "group", "score0", "score1", "score2")

  # Rearrange the data to a long format
  df_long <- reshape(full_data_binded, direction="long", varying=3:5, sep="")

  # Label group variable (0=control, 1=treatment)
  df_long$id <- factor(df_long$id)
  df_long$group <- factor(df_long$group,levels=c(0,1),labels=c("Control", "Treatment"))
  df_long$time <- factor(df_long$time,levels=c(0,1,2),labels=c("Pre", "Post1", "Post2"))

  # Plot the data
  interaction.plot(df_long$time, df_long$group, df_long$score)

  # Run the two-way repeated measures ANOVA
  model <- aov(score~group*time +Error(id), data=df_long)
  result <- summary(model)

  # Run post-hoc analyses to check the time the difference is significant (apply bonferroni correction: apply 0.05/3)
  Pre <- df_long[df_long$time=="Pre",]
  Post1 <- df_long[df_long$time=="Post1",]
  Post2 <- df_long[df_long$time=="Post2",]

  t.test(score~group, data=Pre)
  t.test(score~group, data=Post1)
  t.test(score~group, data=Post2)

  # Print the results
  # print("=======================================================================")
  # print("Results of Testing Assumptions")
  # print("=======================================================================")
  # print("Normality of Residuals:")
  # print(shapirotest)
  # print("Refer to the histogram and the normal Q-Q plot in the 'Plots' panel to visually inspect the normality of residuals.")
  # print("-----------------------------------------------------------------------")
  # print("Homogeneity of Variances:")
  # print(levene_result)
  # print("-----------------------------------------------------------------------")
  # print("Descriptive Statistics: Average Scores")
  # print(descriptive)
  cat("", sep="\n")
  cat("", sep="\n")
  print("=======================================================================")
  print("Result of the main One-way Repeated Measures ANOVA")
  print("=======================================================================")
  print(result)
  print("=======================================================================")
  print("Post-hoc Tests: Difference by Time")
  print("=======================================================================")
  print("Group difference at the pre-test")
  print(t.test(score~group, data=Pre))
  print("Group difference at the post1-test")
  print(t.test(score~group, data=Post1))
  print("Group difference at the post2-test")
  print(t.test(score~group, data=Post2))
  # print("-----------------------------------------------------------------------")
  # if (shapirotest$p.value < 0.05) {
  #   print("Since the p-value of the Shapiro-Wilk test is less than 0.05, you need to use the Friedman test results presented below")
  # } else {
  #   print("It is safe to use the results above since the p-value of the Shapiro-Wilk test is greater than 0.05. The Friedman results below confirm/complement the results above.")
  # }
  # print("=======================================================================")
  # print("Friedman Rank Sum Test - Result")
  # print("=======================================================================")
  # print(friedman_result)
  # print("=======================================================================")
  # print("Friedman Rank Sum Test - Pairwise Comparisons")
  # print("=======================================================================")
  # print(friedman_pwc)
  #
  # # Export the main ANCOVA and post-hoc analysis results to report
  # sink("onewayrepeatedanova_outputs.txt")
  # print("=======================================================================")
  # print("Results of Testing Assumptions")
  # print("=======================================================================")
  # print("Normality of Residuals:")
  # print(shapirotest)
  # print("Refer to the histogram and the normal Q-Q plot in the 'Plots' panel to visually inspect the normality of residuals.")
  # print("-----------------------------------------------------------------------")
  # print("Homogeneity of Variances:")
  # print(levene_result)
  # print("-----------------------------------------------------------------------")
  # print("Descriptive Statistics: Average Scores")
  # print(descriptive)
  # print("=======================================================================")
  # print("Result of the main One-way Repeated Measures ANOVA")
  # print("=======================================================================")
  # print(result)
  # print("=======================================================================")
  # print("Pairwise Comparisons")
  # print("=======================================================================")
  # print(pwc)
  # print("-----------------------------------------------------------------------")
  # if (shapirotest$p.value < 0.05) {
  #   print("Since the p-value from the Shapiro-Wilk test result is less than 0.05, you need to use the Friedman test results presented below")
  # } else {
  #   print("It is safe to use the results above since the p-value from the Shapiro-Wilk test result is greater than 0.05. The Friedman results below confirm/complement the results above.")
  # }
  # print("=======================================================================")
  # print("Friedman Rank Sum Test - Result")
  # print("=======================================================================")
  # print(friedman_result)
  # print("=======================================================================")
  # print("Friedman Rank Sum Test - Pairwise Comparisons")
  # print("=======================================================================")
  # print(friedman_pwc)
  # sink()

  options(warn=0)
}
HelikarLabPersonal/DBERlibR documentation built on April 2, 2022, 12:03 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
HelikarLabPersonal/DBERlibR
DBERlibR

R/DBERlibR.R
In HelikarLabPersonal/DBERlibR: DBERlibR

Defines functions twowayrepeatedanova onewayancova independentsamples pairedsamples itemanalysis loadpackages

Documented in independentsamples itemanalysis loadpackages onewayancova pairedsamples twowayrepeatedanova

R Package Documentation

Browse R Packages

We want your feedback!

HelikarLabPersonal/DBERlibR DBERlibR

R/DBERlibR.R In HelikarLabPersonal/DBERlibR: DBERlibR

Defines functions twowayrepeatedanova onewayancova independentsamples pairedsamples itemanalysis loadpackages

Documented in independentsamples itemanalysis loadpackages onewayancova pairedsamples twowayrepeatedanova

R Package Documentation

Browse R Packages

We want your feedback!

HelikarLabPersonal/DBERlibR
DBERlibR

R/DBERlibR.R
In HelikarLabPersonal/DBERlibR: DBERlibR