R/prep.R

#' Creates One Finalized Table Ready for Statistical Analysis
#'
#'@description \code{prep()} aggregates a single dataset in a long format
#'   according to any number of grouping variables. This makes \code{prep()}
#'   suitable for aggregating data from various types of experimental designs
#'   such as between-subjects, within-subjects (i.e., repeated measures), and
#'   mixed designs (i.e., experimental designs that include both between- and
#'   within- subjects independent variables). \code{prep()} returns a data
#'   frame with a number of dependent measures for further analysis for each
#'   aggregated cell (i.e., experimental cell) according to the  provided
#'   grouping variables (i.e., independent variables). Dependent measures for
#'   each experimental cell include among others means before and after
#'   rejecting observations according to a flexible standard deviation
#'   criteria, number of rejected observations according to the flexible
#'   standard deviation criteria, proportions of rejected observations
#'   according to the flexible standard deviation criteria, number of
#'   observations before rejection, means after rejecting observations
#'   according to procedures described in Van Selst & Jolicoeur (1994;
#'   suitable when measuring reaction-times), standard deviations, medians,
#'   means according to any percentile (e.g., 0.05, 0.25, 0.75, 0.95) and
#'   harmonic means. The data frame \code{prep()} returns can also be exported
#'   as a txt or csv file to be used for statistical analysis in other
#'   statistical programs.
#'
#' @usage prep(
#'    dataset = NULL
#'    , file_name = NULL
#'    , file_path = NULL
#'    , id = NULL
#'    , within_vars = c()
#'    , between_vars = c()
#'    , dvc = NULL
#'    , dvd = NULL
#'    , keep_trials = NULL
#'    , drop_vars = c()
#'    , keep_trials_dvc = NULL
#'    , keep_trials_dvd = NULL
#'    , id_properties = c()
#'    , sd_criterion = c(1, 1.5, 2)
#'    , percentiles = c(0.05, 0.25, 0.75, 0.95)
#'    , outlier_removal = NULL
#'    , keep_trials_outlier = NULL
#'    , decimal_places = 4
#'    , notification = TRUE
#'    , dm = c()
#'    , save_results = TRUE
#'    , results_name = "results.txt"
#'    , results_path = NULL
#'    , save_summary = TRUE
#' )
#' @param dataset Name of the data frame in R that contains the long format
#'   table after merging the individual data files using
#'   \code{file_merge()}. Either \code{dataset} or \code{file_name} must be
#'   provided. Default is \code{NULL}.
#' @param file_name A string with the name of a txt or csv file (including the
#'   file extension, e.g. \code{"my_data.txt"}) with the merged table in case
#'   the user already merged the individual data files. Either \code{dataset} or
#'   \code{file_name} must be provided. Default is \code{NULL}.
#' @param file_path A string with the path of the folder in which
#'   \code{file_name} is located. If \code{file_name} was used, then
#'   \code{file_path} must be provided. Default is \code{NULL}.
#' @param id A string with the name of the column in \code{file_name} or in
#'   \code{dataset} that contains the variable specifying the case identifier
#'   (i.e., the variable upon which the measurement took place; e.g.,
#'   \code{"subject_number"}). This should be a unique value per case. Values
#'   in this column must be numeric. Argument must be provided. Default is
#'   \code{NULL}.
#' @param within_vars String vector with names of grouping variables in
#'   \code{file_name} or in \code{dataset} that contain independent variables
#'   manipulated (or observed) within-ids (i.e., within-subjects, repeated
#'   measures). Single or multiple values must be specified as a string (e.g.,
#'   \code{c("SOA", "condition")}) according to the hierarchical order you
#'   wish. Note that the order of the names in \code{within_vars()} is
#'   important because \code{prep()} aggregates the data for the dependent
#'   measures by first dividing them to the levels of the first grouping
#'   variable in \code{witin_vars()}, and then within each of those levels
#'   \code{prep()} divides the data according to the next variable in
#'   \code{within_vars()} and so forth. Values in these columns must be
#'   numeric. Either \code{within_vars} or \code{between_vars} (or both)
#'   arguments must be provided. Default is \code{c()}.
#' @param between_vars String vector with names of grouping variables in
#'   \code{file_name} or in \code{dataset} that contain independent variables
#'   manipulated (or observed) between-ids (i.e., between-subjects). Single
#'   or multiple values must be specified as a string (e.g., \code{c("order")}).
#'   Order of the names in \code{between_vars()} does not matter. Values in
#'   this column must be numeric. Either \code{between_vars} or
#'   \code{within_vars} (or both) arguments must be provided. Default is
#'   \code{c()}.
#' @param dvc A string with the name of the column in \code{file_name} or in
#'   \code{dataset} that contains the dependent variable (e.g., "rt" for
#'   reaction-time as a dependent variable). Values in this column must be in
#'   an interval or ratio scale. Either \code{dvc} or \code{dvd} (or both)
#'   arguments must be provided. Default is \code{NULL}.
#' @param dvd A string with the name of the column in \code{file_name} or in
#'   \code{dataset} that contains the dependent variable (e.g., \code{"ac"}
#'   for accuracy as a dependent variable). Values in this column must be
#'   numeric and discrete (e.g., 0 and 1). Either \code{dvc} or \code{dvd} (or
#'   both) arguments must be provided. Default is \code{NULL}.
#' @param keep_trials A string. Allows deleting unnecessary observations and
#'   keeping necessary observations in \code{file_name} or in \code{dataset}
#'   according to logical conditions specified as a string. For example, if the
#'   dataset contains practice trials for each subject, these trials should not
#'   be included in the aggregation. The user should remove these trials by
#'   specifying how they were coded in the raw data (i.e., data before
#'   aggregation). For example, if practice trials are the ones for which
#'   the "block" column in the raw data tables equals to zero, the
#'   \code{keep_trials} argument should be \code{"raw_data$block !== 0"}.
#'   \code{raw_data} is the internal object in \code{prep()} representing the
#'   merged table. All logical conditions in \code{keep_trials} should be put
#'   in the same string and be concatenated by \code{&} or \code{|}. Logical
#'   conditions for this argument can relate to different columns in the merged
#'   table. Note that all further arguments of \code{prep()} will relate to the
#'   remaining observations in the merged table. Default is \code{NULL}.
#' @param drop_vars String vector with names of columns to delete in \code{file_name}
#'   or in \code{dataset}. Single or multiple values must be specified as a
#'   string (e.g., \code{c("font_size")}). Order of the names in
#'   \code{drop_vars} does not matter. Note that all further arguments of
#'   \code{prep()} will relate to the remaining variables in the merged table.
#'   Default is \code{c()}.
#' @param keep_trials_dvc A string. Allows deleting unnecessary observations
#'   and keeping necessary observations in \code{file_name} or in \code{dataset}
#'   for calculations and aggregation of the dependent variable in \code{dvc}
#'   according to logical conditions specified as a string. Logical conditions
#'   should be specified as a string as in the \code{keep_trials} argument
#'   (e.g., \code{"raw_data$rt > 100 & raw_data$rt < 3000 & raw_dada$ac == 1"}).
#'   All dependent measures for \code{dvc} except for those specified in
#'   \code{outlier_removal} will be calculated on the remaining observations.
#'   Defalut is \code{NULL}.
#' @param keep_trials_dvd A string. Allows deleting unnecessary observations
#'   and keeping necessary observations in \code{file_name} or in \code{dataset}
#'   for calculations and aggregation of the dependent variable in \code{dvd}
#'   according to logical conditions specified as a string. Logical conditions
#'   should be specified as a string as in the \code{keep_trials} argument
#'   (e.g., \code{raw_data$rt > 100 & raw_data$rt < 3000}). All dependent
#'   measures for \code{dvd} (i.e., \code{"mdvd"} and \code{"merr"}) will be
#'   calculated on the remaining observations. Default is \code{NULL}.
#' @param id_properties String vector with names of columns in \code{dataset} or in
#'   \code{file_name} that describe the ids (e.g., subjects) in the data and
#'   were not manipulated within-or between-ids. For example, in case the user
#'   logged for each observation and for each id in an experiment also the age
#'   and the gender of the subject, this argument will be
#'   \code{c("age", "gender")}. Order of the names in \code{id_properties} does
#'   not matter. Single or multiple values must be specified as a string.
#'   Values in these columns must be numeric. Default is \code{c()}.
#' @param sd_criterion Numeric vector specifying a number of standard deviation
#'   criteria for which \code{prep()} will calculate the mean \code{dvc} for
#'   each cell in the finalized table after rejecting observations that did not
#'   meet the criterion (e.g., rejecting observations that were more than 2
#'   standard deviations above or below the mean of that cell). Values in this
#'   vector must be numeric. Default is \code{c(1, 1.5, 2)}.
#' @param percentiles Numeric vector containing wanted percentiles for \code{dvc}.
#'   Values in this vector must be decimal numbers between 0 to 1. Percentiles
#'   are calculated according to \code{type = 7} (see
#'   \code{\link[stats]{quantile}} for more information). Default is
#'   \code{c(0.05, 0.25, 0.75, 0.95)}.
#' @param outlier_removal Numeric. Specifies which outlier removal procedure
#'   with moving criterion to calculate for \code{dvc} according to procedures
#'   described by Van Selst & Jolicoeur (1994). If \code{1} then non-recursive
#'   procedure is calculated, if \code{2} then modified recursive procedure is
#'   calculated, if \code{3} then hybrid recursive procedure is calculated.
#'   Moving criterion is according to Table 4 in Van Selst & Jolicoeur (1994).
#'   If experimental cell has 4 trials or less it will result in \code{NA}.
#'   Default is \code{NULL}.
#' @param keep_trials_outlier A string. Allows deleting unnecessary
#'   observations and keeping necessary observations in \code{file_name} or in
#'   \code{dataset} for calculations and aggregation of the outlier removal
#'   procedures by Van Selst & Jolicoeur (1994). Logical conditions should be
#'   specified as a string as in the \code{keep_trials} argument (e.g.,
#'   \code{"raw_data$ac == 1"}). \code{outlier_removal} procedure will be
#'   calculated on the remaining observations. Defalut is \code{NULL}.
#' @param decimal_places Numeric. Specifies number of decimals to be written
#'   in \code{results_name} for each value of the dependent measures for
#'   \code{dvc}. Value must be numeric. Default is \code{4}.
#' @param notification Logical. If \code{TRUE}, prints messages about the
#'   progress of the function. Default is \code{TRUE}.
#' @param dm  String vector with names of dependent measures the function
#'   returns. If empty (i.e., \code{c()}) the function returns a data frame with
#'   all possible dependent measures in \code{prep()}. Values in this vector
#'   must be strings from the following list: "mdvc", "sdvc", "meddvc", "tdvc",
#'   "ntr", "ndvc", "ptr", "prt", "rminv", "mdvd", "merr". Default is
#'   \code{c()}. See Value section below for more details.
#' @param save_results Logical. If TRUE, the function creates a txt file
#'   containing the returned data frame. Default is \code{TRUE}.
#' @param results_name A string with the name of the file \code{prep}
#'   returns in case \code{save_results} is \code{TRUE}. Extension of the file
#'   can be txt or csv and should be included. Default is \code{"results.txt"}.
#' @param results_path A string with the path of the folder in which
#'   \code{results_name} will be saved. Default is the path provided in
#'   \code{file_path}. In case no path was provided in \code{file_path},
#'   \code{results_path} must be provided.
#' @param save_summary Logical. if \code{TRUE}, creates a summary file in the
#'   same format as \code{results_name}. Default is \code{TRUE}.
#' @references Grange, J.A. (2015). trimr: An implementation of common response
#'  time trimming methods. R Package Version 1.0.1.
#'  \url{https://CRAN.R-project.org/package=trimr}
#'
#' Van Selst, M., & Jolicoeur, P. (1994). A solution to the effect of sample
#' size on outlier elimination. \emph{The quarterly journal of experimental
#' psychology, 47}(3), 631-650.
#'
#'
#' @return A data frame with dependent measures for the dependent variables in
#'   \code{dvc} and \code{dvd} by \code{id} and grouping variables.
#'
#'   The first column in the finalized table is the \code{id} column.
#'   In case \code{id_properties} was used, the next columns will be the
#'   value of each \code{id_properties} for each \code{id}.
#'
#'   If \code{between_vars} was used then the next column\{\}s will be the value
#'   of each \code{beween_vars} for each \code{id}.
#'
#'   The next columns of the finalized table contain the dependent measures
#'   according to the design specified. If \code{within_vars} was used, then the
#'   data for each dependent measure was first divided according to the levels
#'   of the first grouping variable in \code{witin_vars()}, and then within each
#'   of those levels \code{prep()} divided the data according to the next
#'   variable in \code{within_vars()} and so forth.
#'   The dependent measures in the finalized table are:
#'
#'      \code{mdvc}: mean \code{dvc}.
#'
#'      \code{sdvc}: SD for \code{dvc}.
#'
#'      \code{meddvc}: median \code{dvc}.
#'
#'      \code{tdvc}: mean \code{dvc} after rejecting observations above
#'      standard deviation criteria specified in \code{sd_criterion}.
#'
#'      \code{ntr}: number of observations rejected for each standard deviation
#'      criterion specified in \code{sd_criterion}.
#'
#'      \code{ndvc}: number of observations before rejection.
#'
#'      \code{ptr}: proportion of observations rejected for each standard
#'      deviation criterion specified in \code{sd_criterion}.
#'
#'      \code{rminv}: harmonic mean of \code{dvc}.
#'
#'      \code{prt}: \code{dvc} according to each of the percentiles specified
#'      in \code{percentiles}.
#'
#'      \code{mdvd}: mean \code{dvd}.
#'
#'      \code{merr}: mean error.
#'
#'      \code{nrmc}: mean \code{dvc} according to non-recursive procedure with
#'      moving criterion.
#'
#'      \code{nnrmc}: number of observations rejected for \code{dvc} according
#'      to non-recursive procedure with moving criterion.
#'
#'      \code{pnrmc}: percent of observations rejected for \code{dvc} according
#'      to non-recursive procedure with moving criterion.
#'
#'      \code{tnrmc}: total number of observations upon which the non-recursive
#'      procedure with moving criterion was applied.
#'
#'      \code{mrmc}: mean \code{dvc} according to modified-recursive procedure
#'      with moving criterion.
#'
#'      \code{nmrmc}: number of observations rejected for \code{dvc} according
#'      to modified-recursive procedure with moving criterion.
#'
#'      \code{pmrmc}: percent of observations rejected for \code{dvc} according
#'      to modified-recursive procedure with moving criterion.
#'
#'      \code{tmrmc}: total number of observations upon which the
#'      modified-recursive procedure with moving criterion was applied.
#'
#'      \code{hrmc}: mean \code{dvc} according to hybrid-recursive procedure
#'      with moving criterion.
#'
#'      \code{nhrmc}: number of observations rejected for \code{dvc} according
#'      to hybrid-recursive procedure with moving criterion.
#'
#'      \code{thrmc}: total number of observations upon which the
#'      hybrid-recursive procedure with moving criterion was applied.
#' @export
#' @examples
#' data(stroopdata)
#' finalized_stroopdata <- prep(
#'            dataset = stroopdata
#'            , file_name = NULL
#'            , file_path = NULL
#'            , id = "subject"
#'            , within_vars = c("block", "target_type")
#'            , between_vars = c("order")
#'            , dvc = "rt"
#'            , dvd = "ac"
#'            , keep_trials = NULL
#'            , drop_vars = c()
#'            , keep_trials_dvc = "raw_data$rt > 100 & raw_data$rt < 3000 & raw_data$ac == 1"
#'            , keep_trials_dvd = "raw_data$rt > 100 & raw_data$rt < 3000"
#'            , id_properties = c()
#'            , sd_criterion = c(1, 1.5, 2)
#'            , percentiles = c(0.05, 0.25, 0.75, 0.95)
#'            , outlier_removal = 2
#'            , keep_trials_outlier = "raw_data$ac == 1"
#'            , decimal_places = 0
#'            , notification = TRUE
#'            , dm = c()
#'            , save_results = FALSE
#'            , results_name = "results.txt"
#'            , results_path = NULL
#'            , save_summary = FALSE
#'          )
#'
prep <- function(dataset = NULL, file_name = NULL, file_path = NULL,
                 id = NULL, within_vars =  c(), between_vars = c(), dvc = NULL,
                 dvd = NULL, keep_trials = NULL, drop_vars = c(),
                 keep_trials_dvc = NULL, keep_trials_dvd = NULL,
                 id_properties = c(), sd_criterion = c(1, 1.5, 2),
                 percentiles = c(0.05, 0.25, 0.75, 0.95),
                 outlier_removal = NULL, keep_trials_outlier = NULL,
                 decimal_places = 4, notification = TRUE, dm = c(),
                 save_results = TRUE, results_name = "results.txt",
                 results_path = NULL, save_summary = TRUE) {

  ## Error handling
  # Check if large dataset was provided
  if (is.null(dataset) & is.null(file_name)) {
    stop("Oops! Did not find dataset or file_name.\nEither dataset or file_name must be provided")
  }
  # Check if dataset or file_name argument was provided
  if (!is.null(dataset)) {
    # dataset argumnet was used
    if (!exists("dataset")) {
      # dataset does not exists
      stop("Oops! Object entered in dataset does not exists.\nPlease check argument dataset")
    }
  } else if(!is.null(file_name)) {
    # file_name argument was used
    # Check if file_path was provided
    if (is.null(file_path)) {
      # file_path was not provided
      stop("Oops! file_name was entered but file_path was not found.\nMust enter file_path")
    }
  }
  # Check results_name is correct in case save_results is TRUE
  if (save_results) {
    # Get results_name extension
    extension <- substr(results_name, nchar(results_name) - 3,
                        nchar(results_name))
    # Check if extension is correct
    if (!(extension %in% c(".txt", ".csv"))) {
      stop(paste("Oops!", results_name, "must end with txt or csv extension"))
    }
  }
  # Check if file_name argument is used
  if (!is.null(file_name)) {
    # Get file_name extension
    extension_fn <- substr(file_name, nchar(file_name) - 3, nchar(file_name))
    # Check if extension_fn is correct
    if (!(extension_fn %in% c(".txt", ".csv"))) {
      stop(paste("Oops!", file_name, "must end with txt or csv extenion and be in that format"))
    }
  }
  # Check if id was provided
  if (is.null(id)) {
    stop("Oops! id is missing. Please provide name of id column")
  }
  # Check if independent variables were provided
  if (length(within_vars) == 0 & length(between_vars) == 0) {
    stop("Oops! Did not find independent variables.\nYou must enter an independent varible to either within_vars, between_vars (or both)")
  }
  # Check if dependent variables were provided
  if (is.null(dvc) & is.null(dvd)) {
    stop("Oops! Did not find dvc or dvd. You must enter at least one dependent variable")
  }

  ## Number of decimal places for dvd
  decimal_dvd <- 3

  ## Read table
  if (!is.null(file_name)) {
    # Call read_data()
    raw_data <- read_data(file_name, file_path, notification)
    # For summary txt file (to be used later in the code)
    dataname <- file_name
    # Check if results_path is NULL
    if (is.null(results_path)) {
      # Set results_path to file_path
      results_path <- file_path
    }
  } else {
    # Take table from dataset
    raw_data <- dataset
    # For summary txt file (to be used laster in the code)
    dataname <- "dataset"
    if (save_results) {
      # Check if results_path is NULL
      if (is.null(results_path)) {
        # results_path was not provided
        stop("Oops! save_results is TRUE but results_path was not provided\nYou must provide results_path in order to save results in a file")
      }
    } else if (save_summary & is.null(results_path)) {
      stop("Oops! save_summary is TRUE but results_path was not provided\nYou must provide results_path in order to save summary file")
    }
  }

  # Create extension for summery file in case it was not created by now
  if (save_summary) {
    if (!save_results) {
      extension <- ".txt"
    }
    # Create name for summary file
    sum_file_name <- paste(substr(results_name, 0, nchar(results_name) - 4),
                           "_summary", extension, sep = "")
  }

  ## More error handling. This time after reading the the table
  # Check if name entered in id exists in raw_data
  if (!(id %in% names(raw_data))) {
    # id was not found in raw_data
    stop(paste("Oops!", id , "was not found in the table. Please check argument id"))
  } else if (!is.numeric(raw_data[[id]])) {
    # id was found but is not numeric
    stop(paste("Oops!", id, "values are not numeric.", id, "values must be numeric"))
  }
  # Check if name entered in within_vars exists in raw_data
  if (length(within_vars) > 0) {
    # Found within_vars
    for (w in within_vars) {
      if (!(w %in% names(raw_data))) {
        # w was not found in raw_data
        stop(paste("Oops!", w, "was not found in the table. Please check argument within_vars"))
      } else if (!is.numeric(raw_data[[w]])) {
        # w was found but is not numeric
        stop(paste("Oops!", w, "values are not numeric.", w, "values must be numeric"))
      }
    }
  }
  # Check if name entered in between_vars exists in raw_data
  if (length(between_vars) > 0) {
    # Found between_vars
    for (b in between_vars) {
      if (!(b %in% names(raw_data))) {
        # b was not found in raw_data
        stop(paste("Oops!", b, "was not found in the table. Please check argument between_vars"))
      } else if (!is.numeric(raw_data[[b]])) {
        # b was found but is not numeric
        stop(paste("Oops!", b, "values are not numeric.", b, "values must be numeric"))
      }
    }
  }
  # Check if name entered in dvc exists in raw_data
  if (!is.null(dvc)) {
    # dvc is not NULL
    if (!(dvc %in% names(raw_data))) {
      # dvc was not found in raw_data
      stop(paste("Oops!", dvc, "was not found in the table. Please check argumment dvc"))
    } else if (!is.numeric(raw_data[[dvc]])) {
      # dvc was found but is not numeric
      stop(paste("Oops!", dvc, "values are not numeric.", dvc, "values must be numeric"))
    }
  }
  # Check if name entered in dvd exists in raw_data
  if (!is.null(dvd)) {
    # dvd is not NULL
    if (!(dvd %in% names(raw_data))) {
      # dvd was not found in raw_data
      stop(paste("Oops!", dvd, "was not found in the table. Please check argument dvd"))
    } else if (!is.numeric(raw_data[[dvd]])) {
      # dvd was found but is not numeric
      stop(paste("Oops!", dvd, "values are not numeric.", dvd, "values must be numeric"))
    }
  }
  # Check for spelling mistakes in keep_trials
  if (!is.null(keep_trials)) {
    if (length(eval(parse(text = keep_trials))) == 0) {
      stop("Oops! Possible spelling error in keep_trials\nPlease check keep_trials arguemnt")
    }
  }
  # Check if name entered in drop_vars exists in raw_data
  if (length(drop_vars) > 0) {
    # Found drop_vars
    for (d in drop_vars) {
      if (!(d %in% names(raw_data))) {
        # d was not found in raw_data
        stop(paste("Oops!", d, "was not found in the table. Please check argument drop_vars"))
      }
    }
  }
  # Check for spelling mistakes in keep_trials_dvc
  if (!is.null(keep_trials_dvc)) {
    if (length(eval(parse(text = keep_trials_dvc))) == 0) {
      stop("Oops! Possible spelling error in keep_trials_dvc.\nPlease check argument keep_trials_dvc")
    }
  }
  #Check for spelling mistakes in keep_trials_dvd
  if (!is.null(keep_trials_dvd)) {
    if(length(eval(parse(text = keep_trials_dvd))) == 0) {
      stop("Oops! Possible spelling error in keep_trials_dvd.\nPlease check argument keep_trials_dvd")
    }
  }
  # Check if name entered in id_properties exists in raw_data
  if (length(id_properties) > 0) {
    for (p in id_properties) {
      if (!(p %in% names(raw_data))) {
        # p was not found in raw_data
        stop(paste("Oops!", p, "was not found in the table. Please check argument id_properties"))
      } else if (!is.numeric(raw_data[[p]])) {
        # p was found but is not numeric
        stop(paste("Oops!", p, "values are not numeric.", p, "values must be numeric"))
      }
    }
  }
  # Check if values in percentiles are between 0 and 1
  if (min(percentiles) <= 0 | max(percentiles) >= 1) {
    stop("Oops! Values in percentiles must be decimal numbers between 0 to 1.\nPlease check argument percentiles")
  }
  # Check for spelling mistakes in keep_trials_outlier
  if (!is.null(keep_trials_outlier)) {
    if (length(eval(parse(text = keep_trials_outlier))) == 0) {
      stop("Oops! Possible spelling error in keep_trials_outlier.\nPlease check argument keep_trials_outlier")
    }
  }

  # Checl if outlier_removal is used
  if (!is.null(outlier_removal)) {
    # In case outlier_removal is used, check that it is 1, 2, or 3
    if (!(outlier_removal %in% c(1, 2, 3))) {
      stop("Oops! In case you want outlier removal proceduer,\noutlier_removal argument must be 1, 2, or 3\nPlease check argument outlier_removal")

    }
    # For summary txt file (to be used later in the code)
    outlier_name <- c("non-recursive", "modified-recursive", "hybrid-recursive")
    outlier_name <- paste(outlier_name[outlier_removal], "procedure with moving criterion", sep = " ")
  }

  ## Print dimensions of the large dataset to console
  if (notification) {
    # Message
    message(paste("raw_data has", dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"))
    print(head(raw_data))
  }

  ## Save dimensions of the large dataset before doing anything
  dim_raw_data1 <- dim(raw_data)

  ## Delete unnecessary trials in case keep_trials is not NULL
  # All further calculations will be done on the remaining trials
  if (!is.null(keep_trials)) {
    if (notification) {
      # Message
      message("Keeping trials according to keep_trials and deleting unnecessary trials in raw_data")
      message("All further calculations will be done on the remaining trials")
    }
    # Save keep_trials as a string before parsing in order to later use when
    # writing Summary file
    keep_trials_sum <- keep_trials
    # Subset trials
    keep_trials <- eval(parse(text = keep_trials))
    raw_data <- raw_data[keep_trials, ]
    if (notification) {
      # Message
      message(paste("raw_data has", dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"))
      print(head(raw_data))
    }
  } else {
    keep_trials_sum <- class(keep_trials)
  }
  # End of !is.null(keep_trials)

  ## Delete unnecessary variables in case drop_vars is not NULL
  # All further calculations will be done on the remaining variables
  if (length(drop_vars) > 0) {
    if (notification) {
      # Message
      message("Dropping variables in raw_data according to drop_vars")
      message("All further calculations will be done on the remaining variables")
    }
    # Save drop_vars as a string before parsing in order to later use when
    # writing Summary file
    drop_col_sum <- drop_vars

    # Subset variables by keeping all columns except the ones in drop_vars
    raw_data <- raw_data[, !(colnames(raw_data) %in% drop_vars)]
    if (notification) {
      # Message
      message(paste("raw_data has", dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"))
      print(head(raw_data))
    }
  } else {
    drop_col_sum <- "NULL"
  }
  # End of if(length(drop_vars) > 0)

  ## Save dimensions of raw_data after removing unnecessary trials and
  # variables according to keep_trials and drop_vars
  dim_raw_data2 <- dim(raw_data)

  ## Creates an array in the size of number of ids in raw_data
  id_col <- tapply(raw_data[[id]], list(raw_data[[id]]), mean)

  ## Calculates independent between-subjects variables in case they exists
  if (length(between_vars) > 0) {
    # Found between-subjects variables
    if(notification) {
      # Message
      if (length(between_vars) == 1) {
        message(paste("Found", length(between_vars), "between-subjects independent variable:", between_vars))
      } else {
        message(paste("Found", length(between_vars), "between-subjects independent variables:", between_vars))
      }
    }
    # Creates a data frame for between-subject variables according to the
    # number of ids
    between_vars_df <- data.frame(id_col)
    if (notification) {
      # Message
      message("Calculating between-subjects independent variables")
    }
    # Calculates between-subjects variables
    # Reset i to 1
    i <- 1
    while (i <= length(between_vars)) {
      between_vars_df[i] <- tapply(raw_data[[between_vars[i]]],
                                   list(raw_data[[id]]), mean)
      i <- i + 1
    }
    if (notification) {
      # Message
      message("Giving names for between-subjects variables")
    }
    # Give names for between-subjects variables
    colnames(between_vars_df) <- between_vars
    if (notification) {
      # Message
      message("Finished between-subjects independent variables")
    }
  }

  ## Calculates id_properties in case they exists
  if (length(id_properties) > 0) {
    # Found id properties
    if (notification) {
      # Message
      message(paste("Found", length(id_properties), "id properties"))
    }
    # Creates a data frame for id_properties according to the number of subjects
    id_properties_df <- data.frame(id_col)
    if (notification) {
      # Message
      message("Calculating id_properties")
    }
    # Calculates id properties
    # Reset i to 1
    i <- 1
    while (i <= length(id_properties)) {
      id_properties_df[i] <- tapply(raw_data[[id_properties[i]]],
                                    list(raw_data[[id]]), mean)
      i <- i + 1
    }
    if (notification) {
      # Message
      message("Giving names for id_properties variables")
    }
    # Give names for id properties variables
    colnames(id_properties_df) <- id_properties
    if (notification) {
      # Message
      message("Finished id_properties")
    }
  }

  ## Creates within_vars column in case within-subject indpendent variables
  # exists
  if (length(within_vars) > 0) {
    # Creates within_col: a data frame for within_vars variables
    if (notification) {
      # Message
      message(paste("Found", length(within_vars), "within-subject independent variables"))
    }
    # within_col is in the same length as number of trials in raw_data
    within_col <- data.frame(1:length(raw_data[, 1]))
    # Reset l to 1
    l <- 1
    # Get all within_vars variables in raw_data into within_col
    while(l <= length(within_vars)) {
      within_col[l] <- raw_data[[within_vars[l]]]
      l <- l + 1
    }
    # Give names to within_col
    colnames(within_col) <- within_vars
    # Creates a column for within-subject independent variables
    within_condition <- do.call(interaction, c(within_col, sep = "_",
                                               lex.order = TRUE))
    # Append within_condition to raw_data
    raw_data$within_condition <- factor(within_condition)
    if (notification) {
      # Message
      message("within_condition column was added to raw_data")
      print(head(raw_data))
      message("Finished within_condition column")
    }
  }

  ## Creates a data frame for dvc data and delete unnecessary trials for dvc in
  # case dvc exists
  if (!is.null(dvc)) {
    # Found dvc data
    if (notification) {
      # Message
      message("Found dvc data")
      message("Creating data frame named raw_data_dvc for dvc data")
    }
    # Delete unnecessary trials in case keep_trials_dvc is not NULL
    # All further calculations on dvc except outlier removal procedures will
    # be done on the remaining trials
    if (!is.null(keep_trials_dvc)) {
      if (notification) {
        # Message
        message("Keeping trials according to keep_trials_dvc and deleting unnecessary trials in raw_data_dvc")
        message("All further calculations on dvc except outlier removal procedures will be done on the remaining trials")
      }
      # Save keep_trials_dvc as a string before parsing in order to later use
      # when writing Summary file
      keep_trials_dvc_sum <- keep_trials_dvc
      # Subsetting trials
      keep_trials_dvc <- eval(parse(text = keep_trials_dvc))
      raw_data_dvc <- raw_data[keep_trials_dvc, ]
    } else {
      # Save keep_trials_dvc as "NULL" in order to later use when writing
      # Summary file
      keep_trials_dvc_sum <- class(keep_trials_dvc)
      # Save raw_data as raw_data_dvc
      raw_data_dvc <- raw_data
    }
    if (notification) {
      # Message
      message(paste("raw_data_dvc has", dim(raw_data_dvc)[1], "observations and", dim(raw_data_dvc)[2], "variables"))
      print(head(raw_data_dvc))
    }
    # Get dimensions of raw_data_dvc for summary file
    dim_raw_data_dvc <- dim(raw_data_dvc)
  }

  ## Creates a data frame for dvd data and delete unnecessary trials for dvd
  # in case dvd exists
  if (!is.null(dvd)) {
    # Found dvd data
    if (notification) {
      # Message
      message("Found dvd data")
      message("Creating data frame named raw_data_dvd for dvd data")
    }
    # Delete unnecessary trials in case keep_trials_dvd is not NULL
    # All further calculations on dvd will be done on the remaining trials
    if (!is.null(keep_trials_dvd)) {
      # Delete unnecessary trials
      if (notification) {
        # Message
        message("Keeping trials according to keep_trials_dvd and deleting unnecessary trials in raw_data_dvd")
        message("All further calculations on dvd will be done on the remaining trials")
      }
      # Save keep_trials_dvd as a string before parsing in order to later use
      # when writing Summary file
      keep_trials_dvd_sum <- keep_trials_dvd
      # Subsetting trials
      keep_trials_dvd <- eval(parse(text = keep_trials_dvd))
      # Save raw_data after subsetting
      raw_data_dvd <- raw_data[keep_trials_dvd, ]
    } else {
      # Save keep_trials_dvd as "NULL" in order to later use when writing
      # Summary file
      keep_trials_dvd_sum <- class(keep_trials_dvd)
      # Save raw_data as raw_data_dvd
      raw_data_dvd <- raw_data
    }
    if (notification) {
      # Message
      message(paste("raw_data_dvd has", dim(raw_data_dvd)[1], "observations and", dim(raw_data_dvd)[2], "variables"))
      print(head(raw_data_dvd))
    }
    # Get dimensions of raw_data_dvd for summary file
    dim_raw_data_dvd <- dim(raw_data_dvd)
  }

  # Creates a data frame for dependent measures
  dm_df <- data.frame(1:length(id_col))

  # Reset counter for dependent measures to 1
  count <- 1

  ## Calculates dependent measures
  # Checks if the design is a between-subjects design, within-subjects design
  # or a mixed design
  if (length(between_vars) > 0 & length(within_vars) == 0) {
    # Between-subjects design: all dependent measures will be calculated by id
    if (notification) {
      message("Your design is a between-subjects design")
    }

    # dvc
    # Calculates dependent measures for dvc in case dvc exists
    if (!is.null(dvc)) {

      # mdvc: mean dvc
      # Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
      # or he wants "mdvc" among others (i.e., "mdvc" %in% dm equals to TRUE)
      if (length(dm) == 0 | "mdvc" %in% dm) {
        if (notification) {
          # Message
          message("Calculating mean dvc")
        }
        dm_df[count] <- tapply(raw_data_dvc[[dvc]], list(raw_data_dvc[[id]]),
                               mean)
        colnames(dm_df)[count] <- "mdvc"
        # Round mdvc according to decimal_places
        dm_df[count] <- round(dm_df[count], digits = decimal_places)
        count <- count + 1
      }

      # sdvc: standard deviation (SD) for dvc using denominator n
      # Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
      # or he wants "sdvc" among others (i.e., "sdvc" %in% dm equals to TRUE)
      if (length(dm) == 0 | "sdvc" %in% dm) {
        if (notification) {
          # Message
          message("Calculating SD for dvc using denominator n")
        }
        # SD using denominator n - 1
        dvc_sd_r <- tapply(raw_data_dvc[[dvc]], list(raw_data_dvc[[id]]), sd)
        # Length of each cell in the raw_data_dvc
        l_dvc <- tapply(raw_data_dvc[[dvc]], list(raw_data_dvc[[id]]), length)
        # Calculating sdvc using denominator n
        dm_df[count] <- dvc_sd_r * sqrt((l_dvc - 1) / (l_dvc))
        colnames(dm_df)[count] <- "sdvc"
        # Round sdvc according to decimal_places
        dm_df[count] <- round(dm_df[count], digits = decimal_places)
        count <- count + 1
      }

      # meddvc: median dvc
      # Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
      # or he wants "medrt" among others (i.e., "meddvc" %in% dm equals to TRUE)
      if (length(dm) == 0 | "meddvc" %in% dm) {
        if (notification) {
          # Message
          message("Calculating median dvc")
        }
        dm_df[count] <- tapply(raw_data_dvc[[dvc]], list(raw_data_dvc[[id]]),
                               median)
        colnames(dm_df)[count] <- "meddvc"
        # Round meddvc according to decimal_places
        dm_df[count] <- round(dm_df[count], digits = decimal_places)
        count <- count + 1
      }

      # Z scores (to be used later when removing values according to
      # sd_criterion)
      # deviation from the mean
      dev_from_mean  <- ave(raw_data_dvc[[dvc]], raw_data_dvc[[id]],
                            FUN = function(x) x - mean(x))
      raw_data_dvc$dev_from_mean <- dev_from_mean
      # sdvc for z scores (again using denominator n)
      sdvc_for_zscores <- ave(raw_data_dvc[[dvc]], raw_data_dvc[[id]],
                              FUN = function(x) sd(x) * sqrt((length(x) - 1 ) / (length(x))))
      raw_data_dvc$sdvc_for_zscores <- sdvc_for_zscores
      # z scores
      z_score <- raw_data_dvc$dev_from_mean / raw_data_dvc$sdvc_for_zscores
      raw_data_dvc$z_score <- z_score

      # tdvc: trimmed dvc according to SD in sd_criterion
      # Create an empty vector for names of sd_criterion coulmns
      sd_criterion_names <- c()
      # Reset l to 1
      l <- 1
      # Fill empty vector with names of sd_criterion columns
      while (l <= length(sd_criterion)) {
        sd_criterion_names[l] <- paste("t", sd_criterion[l], "dvc" ,sep = "")
        l <- l + 1
      }
      # Change names of id and dvc columns because later I use "dplyr" which
      # makes it easier
      # Change name of id column to "id"
      names(raw_data_dvc)[names(raw_data_dvc) == id] <- "id"
      # Change name of dvc column to "dvc"
      names(raw_data_dvc)[names(raw_data_dvc) == dvc] <- "dvc"
      # Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
      # or he wants "tdvc" among others (i.e., "tdvc" %in% dm equals to TRUE)
      if (length(dm) == 0 | "tdvc" %in% dm) {
        # Reset j to 1
        j <- 1
        # Calculating mean dvc after rejecting values above SD criterions
        # according to sd_criterion
        while (j <= length(sd_criterion)) {
          if (notification) {
            # Message
            message(paste("Calculating mean dvc after rejecting values above", sd_criterion[j], "SD",
                    sep = " "))
          }
          temp <- raw_data_dvc %>%
                  dplyr::group_by(id) %>%
                  dplyr::summarise(n = mean(dvc[abs(z_score) < sd_criterion[j]])) %>%
                  reshape2::dcast(id ~ sd_criterion_names[j], value.var = "n", fun = mean)
          dm_df[count] <- temp[-1]
          # Round according to decimal_places
          dm_df[count] <- round(dm_df[count], digits = decimal_places)
          count <- count + 1
          j <- j + 1
        }
      }

      # ntr: number of trimmed values for each SD in sd_criterion
      # Checks if the user does not want ntr in results (i.e., !("ntr" %in% dm)
      # equals to FALSE)
      if (!("ntr" %in% dm)) {
        # In case the user does not want ntr in the filnal table,
        # store ntr in temp_dm to be used later when calculating proportion
        temp_dm <- data.frame(1:length(id_col))
        count_temp <- 1
      }
      # Create an empty vector for names of ntr columns
      ntr_names <- c()
      # Rest l to 1
      l <- 1
      # Fill empty vector with names of ntr columns
      while (l <= length(sd_criterion)) {
        ntr_names[l] <- paste("n", sd_criterion[l], "tr" ,sep = "")
        l <- l + 1
      }
      # Reset j to 1 for next while loop
      j <- 1
      # Counting number of rejected values for each sd_criterion
      while (j <= length(sd_criterion)) {
        if (notification) {
        # Messgae
        message(paste("Counting number of rejected values for", sd_criterion[j], "SD", sep = " "))
        }
        temp <- raw_data_dvc %>%
                dplyr::group_by(id) %>%
                dplyr::summarise(n = sum(abs(z_score) > sd_criterion[j])) %>%
                reshape2::dcast(id ~ ntr_names[j], value.var = "n", fun = sum)
        if (length(dm) == 0 | "ntr" %in% dm) {
          # Store it in the dependent measures df
          dm_df[count] <- temp[-1]
          count <- count + 1
        } else {
          # Stroe it in a temporary df
          temp_dm[count_temp] <- temp[-1]
          count_temp <- count_temp + 1
        }
        j <- j + 1
      }
      if (exists("temp_dm")) {
        temp_dm <- temp_dm[-1]
      }
      # End of ntr

      # ndvc: number of values before rejecting values according to sd_criterion
      # Calculate ndvc anyway
      ndvc <- tapply(raw_data_dvc$dvc, list(raw_data_dvc$id), length)
      # Checks if to store ndvc in dm_df ( in case the user wants all dpendent
      # measures (i.e., length(dm) == 0) or he wants "ndvc" among others
      # (i.e., "ndvc" %in% dm equals to TRUE)
      if (length(dm) == 0 | "ndvc" %in% dm) {
        if (notification) {
          # Message
          message("Counting number of values before rejecting values according to sd_criterion")
        }
        dm_df[count] <- ndvc
        colnames(dm_df)[count] <- "ndvc"
        count <- count + 1
      }
      # End of ndvc

      # ptr: proportion of rejected values according to sd_criterion
      # Create an empty vector for names of ptr columns
      ptr_names <- c()
      # Rest l to 1
      l <- 1
      # Fill empty vector with names of ptr columns
      while (l <= length(sd_criterion)) {
        ptr_names[l] <- paste("p", sd_criterion[l], "tr" ,sep = "")
        l <- l + 1
      }
      # Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
      # or he wants "ptr" among others (i.e., "ptr" %in% dm equals to TRUE)
      if (length(dm) == 0 | "ptr" %in% dm) {
        # Reset j to 1 for next while loop
        j <- 1
        while (j <= length(sd_criterion)) {
          if (notification) {
            # Message
            message(paste("Calculating proportion of values rejected for", sd_criterion[j], "SD", sep = " "))
          }
          # Get ntr from temp_dm or from dm_df
          if (!(length(dm) == 0 | "ntr" %in% dm)) {
            temp <- temp_dm[[ntr_names[j]]] / ndvc
          } else {
            temp <- dm_df[[ntr_names[j]]] / ndvc
          }
          dm_df[count] <- temp
          colnames(dm_df)[count] <- ptr_names[j]
          # Round according to digits = 3
          dm_df[count] <- round(dm_df[count], digits = 3)
          count <- count + 1
          j <- j + 1
        }
      }
      # End of ptr

      # rminv: harmonic mean
      # Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
      # or he wants "rminv" among others (i.e., "rminv" %in% dm equals to TRUE)
      if (length(dm) == 0 | "rminv" %in% dm) {
        if (notification) {
          # Message
          message("Calculating harmonic mean for dvc")
        }
        dm_df[count] <- tapply(raw_data_dvc$dvc, list(raw_data_dvc$id),
                               psych::harmonic.mean)
        colnames(dm_df)[count] <- "rminv"
        # Round rminv according to decimal_places
        dm_df[count] <- round(dm_df[count], digits = decimal_places)
        count <- count + 1
      }
      # End of rminv

      # Percentiles: dvc according to requested percentiles
      # Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
      # or he wants "pdvc" among others (i.e., "pdvc" %in% dm equals to TRUE)
      if (length(dm) == 0 | "pdvc" %in% dm) {
        # Create an empty vector for names of pdvc columns
        percentails_names <- c()
        # Rest l to 1
        l <- 1
        # Fill vector
        while (l <= length(percentiles)) {
          percentails_names[l] <- paste("p", percentiles[l], "dvc" ,sep = "")
          l <- l + 1
        }
        # Reset j to 1 before next while loop
        j <- 1
        # Calculating percentiles
        while (j <= length(percentiles)) {
          if (notification) {
            # Message
            message(paste("Calculating the", percentiles[j], "percentail for dvc" ), sep = "")
          }
          dm_df[count] <- tapply(raw_data_dvc$dvc, list(raw_data_dvc$id),
                                 quantile, probs = percentiles[j])
          colnames(dm_df)[count] <- percentails_names[j]
          # Round according to decimal_places
          dm_df[count] <- round(dm_df[count], digits = decimal_places)
          count <- count + 1
          j <- j + 1
        }
      }
      # End of pdvc

      # Outlier removal procedures according to Van Selst & Jolicoeur (1994)
      # Checks if the user wants an outlier removal procedure
      if (!is.null(outlier_removal)) {
        # User wants an outlier removal procedure

        # Delete unnecessary trials in case keep_trials_outlier is not NULL
        # Outlier removal procedure will be calculated on the remaining trials
        if (!is.null(keep_trials_outlier)) {
          # Save keep_trials_outlier as a string before parsing in order to
          # later use when writing Summary file
          keep_trials_outlier_sum <- keep_trials_outlier
          # Delete unnecessary trials
          if (notification) {
            # Message
            message("Keeping trials according to keep_trials_outlier and deleting unnecessary trials in raw_data")
            message("Outlier removal procedure will be calculated on the remaining trials")
          }
          # Subsetting trials
          keep_trials_outlier <- eval(parse(text = keep_trials_outlier))
          # Save raw_data after subsetting
          raw_data <- raw_data[keep_trials_outlier, ]
        } else {
          # Save keep_trials_outlier as "NULL" in order to later use when
          # writing Summary file
          keep_trials_outlier_sum <- class(keep_trials_outlier)
          # Save raw_data
          raw_data <- raw_data
        }

        if (notification) {
          # Message
          message("Calculating mean dvc by id according to selected outlier removal procedure")
        }

        # Change name of id column to "id"
        names(raw_data)[names(raw_data) == id] <- "id"
        # Change name of dvc column to "dvc"
        names(raw_data)[names(raw_data) == dvc] <- "dvc"

        # Create final array that will contain trimmed means per id
        final_data <- matrix(0, nrow = length(id_col), ncol = 1)
        # Create final array that will contain number of trials trimmed per id
        numtrials_data <- matrix(0, nrow = length(id_col), ncol = 1)
        # Create final array that will contain percent of trials removed per id
        percent_data <- matrix(0, nrow = length(id_col), ncol = 1)
        # Create final array that will contain total number of trials per id
        totaltrials_data <- matrix(0, nrow = length(id_col), ncol = 1)

        # Give names to columns according to the procedure
        if (outlier_removal == 1) {
          # Non recursive moving criterion procedure
          # nrmc: means for dvc per id
          colnames(final_data) <- "nrmc"
          # nnrmc: number of trials trimmed per id
          colnames(numtrials_data) <- "nnrmc"
          # pnrmc: percent of trials removed per id
          colnames(percent_data) <- "pnrmc"
          # tnrmc: total number of trials per id
          colnames(totaltrials_data) <- "tnrmc"
        } else if (outlier_removal == 2) {
          # Modified recursive moving criterion procedure
          # mrmc: means for dvc per id
          colnames(final_data) <- "mrmc"
          # nmrmc: number of trials trimmed per id
          colnames(numtrials_data) <- "nmrmc"
          # pmrmc: percent of trials removed per id
          colnames(percent_data) <- "pmrmc"
          # tmrmc: total number of trials per id
          colnames(totaltrials_data) <- "tmrmc"
        } else if (outlier_removal == 3) {
          # Hybrid recursive moving criterion procedure
          # hrmc: means for dvc per id
          colnames(final_data) <- "hrmc"
          # nhrmc: number of trials trimmed per id
          colnames(numtrials_data) <- "nhrmc"
          # phrmc: percent of trials removed per id
          colnames(percent_data) <- "phrmc"
          # thrmc: total number of trials per id
          colnames(totaltrials_data) <- "thrmc"
        }

        # Do outlier removal procedure
        for (i in 1:length(id_col)) {
          # For each id (i.e., subject)
          # Isolate the current participant's data
          temp_data <- raw_data[raw_data$id == id_col[i], ]

          if (outlier_removal == 1) {
            # Non recursive procedure with moving criterion
            # Do current id trimming
            current_id <- non_recursive_mc(temp_data$dvc)
            final_data[i, 1] <- current_id[1]
            percent_data[i, 1] <- current_id[2]
            numtrials_data[i, 1] <- current_id[3]
            totaltrials_data[i, 1] <- current_id[4]
          } else if (outlier_removal == 2) {
            # Modified recursive procedure with moving criterion
            # Do current id trimming
            current_id <- modified_recursive_mc(temp_data$dvc)
            final_data[i, 1] <- current_id[1]
            percent_data[i, 1] <- current_id[2]
            numtrials_data[i, 1] <- current_id[3]
            totaltrials_data[i, 1] <- current_id[4]
          } else if (outlier_removal == 3) {
            # Hybrid recursive procedure with moving criterion
            # Do current id trimming
            current_id <- hybrid_recursive_mc(temp_data$dvc)
            final_data[i, 1] <- current_id[1]
            percent_data[i, 1] <- current_id[2]
            totaltrials_data[i, 1] <- current_id[3]  # Should be 3. Do not change
          }
        }
        # End of for (i in 1:length(id_col)) loop

        # Round final_data according to decimal_places
        final_data <- round(final_data, digits = decimal_places)
        # Round percent_data according to digits = 3
        percent_data <- round(percent_data, digits = 3)

        if (notification) {
          # Messgae
          if (outlier_removal == 1) {
            message("Finished calculating means for dvc by id according to non-recursive procedure with moving criterion ")
          } else if (outlier_removal == 2) {
            message("Finished calculating means for dvc by id according to modified-recursive procedure with moving criterion")
          } else if(outlier_removal == 3) {
            message("Finished calculating means for dvc by id according to hybrid procedure with moving criterion")
          }
        }
      }
      # End of outlier removal

      # Finished dvc
      if (notification) {
        # Message
        message("Finished calculating dependent measures for dvc")
      }
    }  # End of !is.null(dvc)

    # dvd
    # Check if dvd exists
    if(!is.null(dvd)) {
      # Change name of id column to "id"
      names(raw_data_dvd)[names(raw_data_dvd) == id] <- "id"

      # mdvd: mean dvd
      # Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
      # or he wants "mdvd" among others (i.e., "mdvd" %in% dm equals to TRUE)
      if (length(dm) == 0 | "mdvd" %in% dm) {
        if (notification) {
          # Message
          message("Calculating mean dvd")
        }
        dm_df[count] <- tapply(raw_data_dvd[[dvd]], list(raw_data_dvd$id), mean)
        colnames(dm_df)[count] <- "mdvd"
        # Round mdvd according to decimal_dvd
        dm_df[count] <- round(dm_df[count], digits = decimal_dvd)
        count <- count + 1
      }

      # merr: mean error
      # Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
      # or he wants "merr" among others (i.e., "merr" %in% dm equals to TRUE)
      if (length(dm) == 0 | "merr" %in% dm) {
        if (notification) {
         # Message
         message("Calculating mean error")
        }
       dm_df[count] <- 1 - dm_df[["mdvd"]]
        colnames(dm_df)[count] <- "merr"
        # Round merr according to decimal_dvd
        dm_df[count] <- round(dm_df[count], digits = decimal_dvd)
        if (notification) {
          # Message
          message("Finished calculating dependent measures for dvd")
        }
      }
    } # End of !is.null(dvd)

    ## Creates results file
    if (notification) {
      # Message
      message("Creating results")
    }

    # Check if to bind id_properties_df
    if (length(id_properties) > 0) {
      results <- cbind(id_col, id_properties_df, between_vars_df, dm_df)
    } else {
      results <- cbind(id_col, between_vars_df, dm_df)
    }

    # Check if to bind results of outlier removal procedure
    if (!is.null(outlier_removal)) {
      if (outlier_removal == 1 | outlier_removal == 2) {
        results <- cbind(results, final_data, percent_data, numtrials_data,
                         totaltrials_data)
      } else if (outlier_removal == 3) {
        results <- cbind(results, final_data, percent_data, totaltrials_data)
      }
    }

    # Change back name of id to original
    names(results)[names(results) == "id_col"] <- id
    if (notification) {
      # Message
      message("Printing head results to console")
      print(head(results))
    }

    ## Summary file
    if (save_summary) {
      # Write a summary txt file
      if (notification) {
        # Message
        message("Creating summary file")
      }
      # Dim of raw_data, raw_data_dvc and raw_data_dvd
      cat("====================================================================", "\n", file = paste(results_path, "/", sum_file_name, sep = ""))
      cat(paste("Summary:", dataname), date(), file = paste(results_path, "/", sum_file_name, sep = ""), sep = "\n", append = TRUE)
      cat("====================================================================", "\n", file = paste(results_path, "/", paste(results_path, "/", sum_file_name, sep = "")), append = TRUE)
      cat(paste(dataname, "has", dim_raw_data1[1], "observations and", dim_raw_data1[2], "variables"),
          "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      if (!is.null(keep_trials) | length(drop_vars) > 0) {
        cat(paste("* keep_trials:", keep_trials_sum, "drop_vars:", drop_col_sum), "\n", file = paste(results_path, "/", paste(results_path, "/", sum_file_name, sep = "")), append = TRUE)
        cat(paste("* After deleting unnecessary trials and variables according to keep_trials and drop_vars", dataname, "has", dim_raw_data2[1],
                  "observations and", dim_raw_data2[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)

      }
      if (!is.null(dvc)) {
        cat(paste("* keep_trials_dvc:", keep_trials_dvc_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        cat(paste("* After deleting unnecessary trials according to keep_trials_dvc", dvc, "data has", dim_raw_data_dvc[1], "observations and",
                  dim_raw_data_dvc[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        if(!is.null(outlier_removal)) {
          cat(paste("* keep_trials_outlier:", keep_trials_outlier_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
          cat(paste("* After deleting unnecessary trials and variables according to keep_trials_outlier", dvc, "data for outlier removal procedures has",
                    dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
          cat(paste("*", outlier_name, "was calculated on", dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"), "\n",
              file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        }
      } else {
        cat("* No dvc was found", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      }
      if(!is.null(dvd)) {
        cat(paste("* keep_trials_dvd:", keep_trials_dvd_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        cat(paste("* After deleting unnecessary trials", dvd, "data has", dim_raw_data_dvd[1],
                        "observations and", dim_raw_data_dvd[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      } else {
        cat("* No dvd was found", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      }
      # Dim of results_name
      cat(paste("*", results_name, "has", dim(results)[1], "ids and", dim(results)[2], "variables"), "\n",
          file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      # Write ids
      cat("* ids:", levels(factor(results[[id]])), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      # Write levels of between-subjects indpendent variables
      if (length(between_vars) > 0) {
        cat("* Between-subject independent variables:", "\n",file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        for (lev in 1:length(between_vars)) {
          cat(paste("  ", between_vars[[lev]], ": ", sep = ""), file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
          cat(levels(factor(raw_data[[between_vars[[lev]]]])), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        }
      } else {
        cat("* No between-subjects variables were found", "\n",file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      }
      if (length(id_properties) > 0) {
        # Write names of id_properties
        cat("* id properties:", id_properties, "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      } else {
        cat("* No id properties were found", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      }
      if (length(within_vars) == 0) {
        # Write to Summary file
        cat("* No within-subjects variables were found", "\n",file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      }
      if (notification) {
        message("Summary file finished")
      }
    }

    # Save results
    if (save_results) {
      if (extension == ".txt") {
        # Save results as a txt file
        write.table(results, row.names = FALSE, file = paste(results_path, "/", results_name, sep = ""))
      } else {
        # Save results as a csv file
        write.csv(results, row.names = FALSE, file = paste(results_path, "/", results_name, sep = ""))
      }
      # Message
      message(paste(results_name, "has", dim(results)[1], "observations and", dim(results)[2], "variables"))
    }
    message("prep() returned a data frame to console")
    message("Hip Hip Hooray! prep() finished")
    message("Have a great day and may all your results be significant!")

    # Return
    return(results)

  # Checks if there are within-subject independent variables
  } else if (length(within_vars) > 0) {
    if (notification) {
      if (length(between_vars) > 0) {
        # Message
        message("Your design is a mixed design (i.e., includes both between-subjects and within-subjects independent variables")
      } else {
        # Message
        message("Your design is a within-subject design")
      }
    }

    # Creats a temporay df to hold all dependent measures
    temp_dm <- cbind(id_col)

    ## All dependent measures will be calculated by id by within_condition
    # dvc
    # Calculates dependent measures for dvc in case dvc exists
    if (!is.null(dvc)) {

      # Change name of id column to "id"
      names(raw_data_dvc)[names(raw_data_dvc) == id] <- "id"
      # Change name of dvc column to "dvc"
      names(raw_data_dvc)[names(raw_data_dvc) == dvc] <- "dvc"

      # mdvc: mean dvc
      if (length(dm) == 0 | "mdvc" %in% dm) {
        if (notification) {
          # Message
          message("Calculating mean dvc by id by within_condition")
        }
        mdvc <- reshape2::dcast(raw_data_dvc, id ~ within_condition, mean,
                                value.var = "dvc")
        mdvc <- mdvc[-1]
        colnames(mdvc)[1:length(mdvc)] <- paste("mdvc", 1:length(mdvc), sep = "")
        # Round mdvc according to decimal_places
        mdvc <- round(mdvc, digits = decimal_places)
        # Bind
        temp_dm <- cbind(temp_dm, mdvc)
      }

      # sdvc: SD according to denominator n - 1
      if (length(dm) == 0 | "sdvc" %in% dm) {
        if (notification) {
          # Message
          message("Calculating SD for dvc by id by within_condition using denominator n")
        }
        # SD using denominator n - 1
        dvc_sd_r <- reshape2::dcast(raw_data_dvc, id ~ within_condition, sd,
                                    value.var = "dvc")
        dvc_sd_r <- dvc_sd_r[-1]
        # Length of each cell in the raw_data_dvc
        l_dvc <- reshape2::dcast(raw_data_dvc, id ~ within_condition, length,
                                 value.var = "dvc")
        l_dvc <- l_dvc[-1]
        # Calculating SD using denominator n
        sdvc <- dvc_sd_r * sqrt((l_dvc - 1) / (l_dvc))
        colnames(sdvc) <- paste("sdvc", 1:dim(sdvc)[2], sep = "")
        # Round sdvc according to decimal_places
        sdvc <- round(sdvc, digits = decimal_places)
        # Bind
        temp_dm <- cbind(temp_dm, sdvc)
      }

      # meddvc: median dvc
      if (length(dm) == 0 | "meddvc" %in% dm) {
        if (notification) {
          # Message
          message("Calculating median dvc by id by within_condition")
        }
        meddvc <- reshape2::dcast(raw_data_dvc, id ~ within_condition, median,
                                  value.var = "dvc", fill = NaN)
        meddvc <- meddvc[-1]
        colnames(meddvc) <- paste("meddvc", 1:dim(meddvc)[2], sep = "")
        # Round meddvc according to decimal_places
        meddvc <- round(meddvc, digits = decimal_places)
        # Bind
        temp_dm <- cbind(temp_dm, meddvc)
      }

      # Deviation from the mean for z scores
      dev_from_mean <- ave(raw_data_dvc$dvc, raw_data_dvc$id,
                           raw_data_dvc$within_condition,
                           FUN = function(x) x - mean(x))
      # sdvc for z scores
      sdvc_for_zscores <- ave(raw_data_dvc$dvc, raw_data_dvc$id,
                              raw_data_dvc$within_condition,
                              FUN = function(x) sd(x) * sqrt((length(x) - 1) / length(x)))
      # Z scores
      z_score <- dev_from_mean / sdvc_for_zscores
      raw_data_dvc$z_score <- z_score

      # tdvc: trimmed dvc according SD in sd_criterion
      # Create names for sd_criterion coulmn
      sd_criterion_names <- c()
      # Reset l to 1
      l <- 1
      while (l <= length(sd_criterion)) {
        sd_criterion_names[l] <- paste("t", sd_criterion[l], "dvc" ,sep = "")
        l <- l + 1
      }
      if (length(dm) == 0 | "tdvc" %in% dm) {
        # Create a data frame for sd_criterion
        tdvc_df <- data.frame(1:length(id_col))
        # Reset j to 1
        j <- 1
        # Calculating mean dvc by id by within_condition after rejecting values
        # above SD specified in sd_criterions according to sd_criterion
        while (j <= length(sd_criterion)) {
          if (notification) {
            # Message
            message(paste("Calculating mean dvc by id by within_condition after rejecting values above", sd_criterion[j], "SD", sep = " "))
          }
          temp <- raw_data_dvc %>%
                  dplyr::group_by(id, within_condition) %>%
                  dplyr::summarise(n = mean(dvc[abs(z_score) < sd_criterion[j]])) %>%
                  reshape2::dcast(id ~ within_condition, value.var = "n")
          temp <- temp[-1]
          colnames(temp) <- paste(sd_criterion_names[j], 1:dim(temp)[2], sep = "")
          if (j == 1) {
            tdvc_df[2:(dim(temp)[2] + 1)] <- temp
          } else {
            tdvc_df[(dim(tdvc_df)[2] + 1):(dim(tdvc_df)[2] + dim(temp)[2])] <- temp
          }
          j <- j + 1
        }
        tdvc_df <- tdvc_df[-1]
        # Round tdvc_df according to decimal_places
        tdvc_df <- round(tdvc_df, digits = decimal_places)
        # Bind
        temp_dm <- cbind(temp_dm, tdvc_df)
      }

      # ntr: number of trimmed values for each SD in sd_criterion
      ntr_names <- c()
      # Reset l to 1
      l <- 1
      while (l <= length(sd_criterion)) {
        ntr_names[l] <- paste("n", sd_criterion[l], "tr" ,sep = "")
        l <- l + 1
      }
      # Create a data frame for ntr
      ntr_df <- data.frame(1:length(id_col))
      # Reset j to 1 for next while loop
      j <- 1
      # Counting number of rejected values for each sd_criterion
      while (j <= length(sd_criterion)) {
        if(length(dm) == 0 | "ntr" %in% dm) {
          if (notification) {
            # Messgae
            message(paste("Counting number of rejected values for", sd_criterion[j], "SD", sep = " "))
          }
        }
        temp <- raw_data_dvc %>%
                dplyr::group_by(id, within_condition) %>%
                dplyr::summarise(n = length(dvc[abs(z_score) > sd_criterion[j]])) %>%
                reshape2::dcast(id ~ within_condition, value.var = "n")
        temp <- temp[-1]
        colnames(temp) <- paste(ntr_names[j], 1:dim(temp)[2], sep = "")
        if (j == 1) {
          ntr_df[2:(dim(temp)[2] + 1)] <- temp
        } else {
          ntr_df[(dim(ntr_df)[2] + 1):(dim(ntr_df)[2] + dim(temp)[2])] <- temp
        }
        j <- j + 1
      }
      ntr_df <- ntr_df[-1]
      if (length(dm) == 0 | "ntr" %in% dm) {
        # Bind
        temp_dm <- cbind(temp_dm, ntr_df)
      }

      # ndvc: number of values before removing values according to SD in
      # sd_criterion
      if (notification) {
        # Message
        message("Counting number of values for dvc by id by within_condition before rejecting values according to sd_criterion")
      }
      ndvc <- reshape2::dcast(raw_data_dvc, id ~ within_condition, length,
                              value.var = "dvc")
      ndvc <- ndvc[-1]
      colnames(ndvc) <- paste("ndvc", 1:dim(ndvc)[2], sep = "")
      if (length(dm) == 0 | "ndvc" %in% dm) {
        # Bind
        temp_dm <- cbind(temp_dm, ndvc)
      }

      # ptr: proportion of rejected values according to SD in sd_criterion
      if (length(dm) == 0 | "ptr" %in% dm) {
        # Create names for ptr
        ptr_names <- c()
        # Reset l to 1
        l <- 1
        while (l <= length(sd_criterion)) {
          ptr_names[l] <- paste("p", sd_criterion[l], "tr" ,sep = "")
          l <- l + 1
        }
        # Create a data frame for ptr
        ptr_df <- data.frame(1:length(id_col))
        # Reset counter j for the next while loop
        j <- 1
        # Reset i to 1
        i <- 1
        while (j <= length(sd_criterion)) {
          if (notification) {
            # Message
            message(paste("Calculating proportion of rejected values for dvc by id by within_condition for", sd_criterion[j], "SD", sep = " "))
          }
          if (j == 1) {
            temp <- ntr_df[i:dim(ndvc)[2]]
          } else {
            temp <- ntr_df[i:(i + dim(ndvc)[2] - 1)]
          }
          ptr <- temp / ndvc
          colnames(ptr) <- paste(ptr_names[j], 1:dim(ndvc)[2], sep = "")
          if (j == 1) {
            ptr_df[2:(dim(ndvc)[2] + 1)] <- ptr
            ptr_df <- ptr_df[-1]
          } else {
            ptr_df[(dim(ptr_df)[2] + 1):(dim(ptr_df)[2] + dim(ptr)[2])] <- ptr
          }
          i <- i + dim(ndvc)[2]
          j <- j + 1
        }
        # Round ptr_df according to digits = 3
        ptr_df <- round(ptr_df, digits = 3)
        # Bind
        temp_dm <- cbind(temp_dm, ptr_df)
      }

      # rminv: harmonic mean
      if (length(dm) == 0 | "rminv" %in% dm) {
        if (notification) {
          # Message
          message("Calculating harmonic mean for dvc by id by condition")
        }
        rminv <- reshape2::dcast(raw_data_dvc, id ~ within_condition,
                                 psych::harmonic.mean, value.var = "dvc")
        rminv <- rminv[-1]
        colnames(rminv) <- paste("rminv", 1:dim(rminv)[2], sep = "")
        # Round rminv according to decimal_places
        rminv <- round(rminv, digits = decimal_places)
        # Bind
        temp_dm <- cbind(temp_dm, rminv)
      }

      # pdvc: Percentiles
      if (length(dm) == 0 | "pdvc" %in% dm) {
        # Create names for pdvc
        percentails_names <- c()
        # Rest counter l
        l <- 1
        while (l <= length(percentiles)) {
          percentails_names[l] <- paste("p", percentiles[l], "dvc" ,sep = "")
          l <- l + 1
        }
        # Create a data frame for percentiles
        percentails_df <- data.frame(1:length(id_col))
        # Reset j to 1 before next while loop
        j <- 1
        while (j <= length(percentiles)) {
          if (notification) {
            # Message
            message(paste("Calculating the", percentiles[j], "percentail for dvc by id by within_condition" ), sep = "")
          }
          temp <- reshape2::dcast(raw_data_dvc, id ~ within_condition,
                                  quantile, probs = percentiles[j],
                                  value.var = "dvc")
          temp <- temp[-1]
          colnames(temp) <- paste(percentails_names[j], 1:dim(temp)[2], sep = "")
          if (j == 1) {
            percentails_df[2:(dim(temp)[2] + 1)] <- temp
          } else {
            percentails_df[(dim(percentails_df)[2] + 1):(dim(percentails_df)[2] + dim(temp)[2])] <- temp
          }
          j <- j + 1
        }
        percentails_df <- percentails_df[-1]
        # Round percentails_df according to decimal_places
        percentails_df <- round(percentails_df, digits = decimal_places)
        # Bind
        temp_dm <- cbind(temp_dm, percentails_df)
      }

      ## Outlier removal procedures according to Van Selst & Jolicoeur (1994)
      # Check to see if the user wants an outlier removal procedure
      if (!is.null(outlier_removal)) {
        # User wants an outlier removal procedure

        # Save keep_trials_outlier as a string before parsing in order to later
        # use when writing Summary file
        keep_trials_outlier_sum <- keep_trials_outlier

        if (!is.null(keep_trials_outlier)) {
          # Delete unnecessary trials
          if (notification) {
            # Message
            message("Keeping trials according to keep_trials_outlier and deleting unnecessary trials in raw_data")
            message("Outlier removal procedure will be calculated on the remaining trials")
          }
          # Subsetting trials
          keep_trials_outlier <- eval(parse(text = keep_trials_outlier))
          # Save raw_data after subsetting
          raw_data <- raw_data[keep_trials_outlier, ]
        } else {
          # Save keep_trials_outlier as "NULL" in order to later use when
          # writing Summary file
          keep_trials_outlier_sum <- class(keep_trials_outlier)
          # Save raw_data as raw_data
          raw_data <- raw_data
        }

        if (notification) {
          # Message
          message("Calculating mean dvc by id by within_condition according to selected outlier removal procedure")
        }

        # Change name of id column to "id"
        names(raw_data)[names(raw_data) == id] <- "id"
        # Change name of dvc column to "dvc"
        names(raw_data)[names(raw_data) == dvc] <- "dvc"

        # Create final array that will contain trimmed means per id per within_condition
        final_data <- matrix(0, nrow = length(id_col), ncol = length(levels(raw_data$within_condition)))
        # Create final array that will contain number of trials trimmed per id per within_condition
        numtrials_data <- matrix(0, nrow = length(id_col), ncol = length(levels(raw_data$within_condition)))
        # Create final array that will contain percent of trials removed per id per within_condition
        percent_data <- matrix(0, nrow = length(id_col), ncol = length(levels(raw_data$within_condition)))
        # Create final array that will contain total number of trials per id per within_condition before removal
        totaltrials_data <- matrix(0, nrow = length(id_col), ncol = length(levels(raw_data$within_condition)))

        # Give name to columns according to the procedure
          if (outlier_removal == 1) {
            # Non recursive moving criterion procedure
            # nrmc: means for dvc per id per within_condition
            colnames(final_data) <- paste("nrmc", 1:length(levels(raw_data$within_condition)), sep = "")
            # nnrmc: number of trials trimmed per id per within_condition
            colnames(numtrials_data) <- paste("nnrmc", 1:length(levels(raw_data$within_condition)), sep = "")
            # pnrmc: percent of trials removed per id per within_condition
            colnames(percent_data) <- paste("pnrmc", 1:length(levels(raw_data$within_condition)), sep = "")
            # tnrmc: total number of trials per id per within_condition before removal
            colnames(totaltrials_data) <- paste("tnrmc", 1:length(levels(raw_data$within_condition)), sep = "")
          } else if (outlier_removal == 2) {
            # Modified recursive moving criterion procedure
            # mrmc: means for dvc per id per within_condition
            colnames(final_data) <- paste("mrmc", 1:length(levels(raw_data$within_condition)), sep = "")
            # nmrmc: number of trials trimmed per id per within_condition
            colnames(numtrials_data) <- paste("nmrmc", 1:length(levels(raw_data$within_condition)), sep = "")
            # pmrmc: percent of trials removed per id per within_condition
            colnames(percent_data) <- paste("pmrmc", 1:length(levels(raw_data$within_condition)), sep = "")
            # tmrmc: total number of trials per id per within_condition before removal
            colnames(totaltrials_data) <- paste("tmrmc", 1:length(levels(raw_data$within_condition)), sep = "")
          } else if (outlier_removal == 3) {
            # Hybrid recursive moving criterion procedure
            # hrmc: means for dvc per id per within_condition
            colnames(final_data) <- paste("hrmc", 1:length(levels(raw_data$within_condition)), sep = "")
            # nhrmc: number of trials trimmed per id per within_condition
            colnames(numtrials_data) <- paste("nhrmc", 1:length(levels(raw_data$within_condition)), sep = "")
            # phrmc: percent of trials removed per id per within_condition
            colnames(percent_data) <- paste("phrmc", 1:length(levels(raw_data$within_condition)), sep = "")
            # thrmc: total number of trials per id per within_condition before removal
            colnames(totaltrials_data) <- paste("thrmc", 1:length(levels(raw_data$within_condition)), sep = "")
          }

        for (i in 1:length(id_col)) {
          # For each id (i.e., subject)

          # Reset j to 1
          j <- 1

          # Do outlier removal procedure
          for (cond in levels(raw_data$within_condition)) {
            # For each within_condition

            # Isolate the current participant & condition's data
            temp_data <- raw_data[raw_data$id == id_col[i] & raw_data$within_condition == levels(raw_data$within_condition)[j], ]

            if (outlier_removal == 1) {
              # Non recursive procedure with moving criterion
              # Do current id trimming
              current_id <- non_recursive_mc(temp_data$dvc)
              final_data[i, j] <- current_id[1]
              percent_data[i, j] <- current_id[2]
              numtrials_data[i, j] <- current_id[3]
              totaltrials_data[i, j] <- current_id[4]
            } else if (outlier_removal == 2) {
              # Modified recursive procedure with moving criterion
              # Do current id trimming
              current_id <- modified_recursive_mc(temp_data$dvc)
              final_data[i, j] <- current_id[1]
              percent_data[i, j] <- current_id[2]
              numtrials_data[i, j] <- current_id[3]
              totaltrials_data[i, j] <- current_id[4]
            } else if (outlier_removal == 3) {
              # Hybrid recursive procedure with moving criterion
              # Do current id trimming
              current_id <- hybrid_recursive_mc(temp_data$dvc)
              final_data[i, j] <- current_id[1]
              percent_data[i, j] <- current_id[2]
              totaltrials_data[i, j] <- current_id[3]  # Should be 3. Do not change
            }
            j <- j + 1
          }
          # End of condition for loop
        }
        # End of id for loop

        # Round final data according to decimal_places
        final_data <- round(final_data, digits = decimal_places)
        # Round percent_data according to digits = 3
        percent_data <- round(percent_data, digits = 3)

        if (notification) {
          # Messgae
          if (outlier_removal == 1) {
            message("Finished calculating means for dvc by id by within_condition according to non-recursive procedure with moving criterion ")
          } else if (outlier_removal == 2) {
            message("Finished calculating means for dvc by id by within_condition according to modified-recursive procedure with moving criterion")
          } else if(outlier_removal == 3) {
            message("Finished calculating means for dvc by id by within_condition according to hybrid procedure with moving criterion")
          }
        }
      }
      # End of outlier removal procedures

      # Finished dvc
      if (notification) {
          # Message
          message("Finished calculating dependent measures for dvc")
      }
    }  # End of !is.null(dvc)

    # dvd
    # Check if dvd exists
    if (!is.null(dvd)) {

      # Change name of id column to "id"
      names(raw_data_dvd)[names(raw_data_dvd) == id] <- "id"
      # Change name of dvd column to "ac"
      names(raw_data_dvd)[names(raw_data_dvd) == dvd] <- "dvd"

      # mdvd: mean dvd
      if (length(dm) == 0 | "mdvd" %in% dm) {
        if (notification) {
          # Message
          message("Calculating mean dvd by id by within_condition")
        }
        mdvd <- reshape2::dcast(raw_data_dvd, id ~ within_condition, mean, value.var = "dvd")
        mdvd <- mdvd[-1]
        colnames(mdvd) <- paste("mdvd", 1:dim(mdvd)[2], sep = "")
        # Round mdvd according to decimal_dvd
        mdvd <- round(mdvd, digits = decimal_dvd)
        # Bind
        temp_dm <- cbind(temp_dm, mdvd)
      }

      # merr: mean error
      if (length(dm) == 0 | "merr" %in% dm) {
        if (notification) {
          # Message
          message("Calculating mean error by id by within_condition")
        }
        merr <- 1 - mdvd
        colnames(merr) <- paste("merr", 1:dim(merr)[2], sep = "")
        # Round merr according to decimal_dvd
        merr <- round(merr, digits = decimal_dvd)
        # Bind
        temp_dm <- cbind(temp_dm, merr)
      }
      if (notification) {
        # Message
        message("Finished calculating dvd data")
      }
    } # End of !is.null(dvd)

    # Remove the first column of the temporary df
    temp_dm <- temp_dm[-1]

    ## Creates results file
    if (notification) {
      # Message
      message("Creating results")
    }
    if (length(id_properties) > 0) {
      # Bind also id_properties_df
      if (length(between_vars) > 0) {
        # Bind also between_bars_df
        results <- cbind(id_col, id_properties_df, between_vars_df, temp_dm)
      } else {
        results <- cbind(id_col, id_properties_df, temp_dm)
      }
    } else {
      if (length(between_vars) > 0) {
        # Bind also between_vars_df
        results <- cbind(id_col, between_vars_df, temp_dm)
      } else {
        results <- cbind(id_col, temp_dm)
      }
    }

    # In case of an outlier procedure, bind also results for that
    if (!is.null(outlier_removal)) {
      if (outlier_removal == 1 | outlier_removal == 2) {
        results <- cbind(results, final_data, percent_data, numtrials_data, totaltrials_data)
      } else if (outlier_removal == 3) {
        results <- cbind(results, final_data, percent_data, totaltrials_data)
      }
    }

    names(results)[names(results) == "id_col"] <- id
    if (notification) {
      # Message
      message("Printing head results")
      print(head(results))
    }

    ## Summary
    if (save_summary == TRUE) {
      # Write a summary txt file
      if (notification) {
        # Message
        message("Creating summary file")
      }
      # Dim of raw_data, raw_data_dvc and raw_data_dvd
      cat("====================================================================", "\n", file = paste(results_path, "/", sum_file_name, sep = ""))
      cat(paste("Summary:", dataname), date(), file = paste(results_path, "/", sum_file_name, sep = ""), sep = "\n", append = TRUE)
      cat("====================================================================", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      cat(paste(file_name, "has", dim_raw_data1[1], "observations and", dim_raw_data1[2], "variables"),
          "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      if (!is.null(keep_trials) | length(drop_vars) >0) {
        cat(paste("* keep_trials:", keep_trials_sum, "drop_vars:", drop_col_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        cat(paste("* After deleting unnecessary trials and variables", file_name, "has", dim_raw_data2[1],
                  "observations and", dim_raw_data2[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      }
      if (!is.null(dvc)) {
        cat(paste("* keep_trials_dvc:", keep_trials_dvc_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        cat(paste("* After deleting unnecessary trials and variables", dvc, "data has", dim_raw_data_dvc[1],
                        "observations and", dim_raw_data_dvc[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        if (!is.null(outlier_removal)) {
          cat(paste("* keep_trials_outlier:", keep_trials_outlier_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
          cat(paste("* After deleting unnecessary trials and variables according to keep_trials_outlier", dvc, "data for outlier removal procedures has",
                    dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
          cat(paste("*", outlier_name, "was calculated on", dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"), "\n",
              file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        }
      } else {
        cat("* No dvc was found", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      }
      if(!is.null(dvd)) {
        cat(paste("* keep_trials_dvd:", keep_trials_dvd_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        cat(paste("* After deleting unnecessary trials and variables", dvd, "data has", dim_raw_data_dvd[1],
                  "observations and", dim_raw_data_dvd[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      } else {
        cat("* No dvd was found", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      }
      # Dim of results_name
      cat(paste("*", results_name, "has", dim(results)[1], "ids and", dim(results)[2], "variables"), "\n",
          file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      # Write ids
      cat( "* ids:", levels(factor(results[[id]])), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      # Write levels of between-subjects indpendent variables
      if (length(between_vars) > 0) {
        cat("* Between-subject independent variables:", "\n",file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        for (lev in 1:length(between_vars)) {
          cat(paste("  ", between_vars[[lev]], ": ", sep = ""), file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
          cat(levels(factor(raw_data[[between_vars[[lev]]]])), file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
          cat("\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        }
      } else {
        cat("* No between-subjects variables were found", "\n",file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      }
      if (length(id_properties) > 0) {
        # Write names of id_properties
        cat("* id properties:", id_properties, "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      } else {
        cat("*", "No id properties were found", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      }
      if (notification) {
        message("Summary file finished")
      }
      # Write levels of within-subject indpendent variables
      cat("* Within-subject independent variables:", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      for (lev in 1:length(within_vars)) {
        cat(paste("  ", within_vars[[lev]], ": ", sep = ""), file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        cat(levels(factor(raw_data[[within_vars[[lev]]]])), file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
        cat("\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
      }
    }  # End of if(save_summary == TRUE)

    # Save results
    if (save_results) {
      if (extension == ".txt") {
        # Write results in txt format
        write.table(results, row.names = FALSE, file = paste(results_path, "/", results_name, sep = ""))
      } else {
        # Write results in a csv format
        write.csv(results, row.names = FALSE, file = paste(results_path, "/", results_name, sep = ""))
      }
      # Message
      message(paste(results_name, "has", dim(results)[1], "observations and", dim(results)[2], "variables"))
    }

    message("prep() returned a data frame to console")
    message("Hip Hip Hooray! prep() finished")
    message("Have a great day and may all your results be significant!")
    # Return
    return(results)
  }  # End of if (length(between_vars) > 0 & length(within_vars) == 0)
}  # End of prep()

Try the prepdat package in your browser

Any scripts or data that you put into this service are public.

prepdat documentation built on May 1, 2019, 11:24 p.m.