Nothing
#' Creates One Finalized Table Ready for Statistical Analysis
#'
#'@description \code{prep()} aggregates a single dataset in a long format
#' according to any number of grouping variables. This makes \code{prep()}
#' suitable for aggregating data from various types of experimental designs
#' such as between-subjects, within-subjects (i.e., repeated measures), and
#' mixed designs (i.e., experimental designs that include both between- and
#' within- subjects independent variables). \code{prep()} returns a data
#' frame with a number of dependent measures for further analysis for each
#' aggregated cell (i.e., experimental cell) according to the provided
#' grouping variables (i.e., independent variables). Dependent measures for
#' each experimental cell include among others means before and after
#' rejecting observations according to a flexible standard deviation
#' criteria, number of rejected observations according to the flexible
#' standard deviation criteria, proportions of rejected observations
#' according to the flexible standard deviation criteria, number of
#' observations before rejection, means after rejecting observations
#' according to procedures described in Van Selst & Jolicoeur (1994;
#' suitable when measuring reaction-times), standard deviations, medians,
#' means according to any percentile (e.g., 0.05, 0.25, 0.75, 0.95) and
#' harmonic means. The data frame \code{prep()} returns can also be exported
#' as a txt or csv file to be used for statistical analysis in other
#' statistical programs.
#'
#' @usage prep(
#' dataset = NULL
#' , file_name = NULL
#' , file_path = NULL
#' , id = NULL
#' , within_vars = c()
#' , between_vars = c()
#' , dvc = NULL
#' , dvd = NULL
#' , keep_trials = NULL
#' , drop_vars = c()
#' , keep_trials_dvc = NULL
#' , keep_trials_dvd = NULL
#' , id_properties = c()
#' , sd_criterion = c(1, 1.5, 2)
#' , percentiles = c(0.05, 0.25, 0.75, 0.95)
#' , outlier_removal = NULL
#' , keep_trials_outlier = NULL
#' , decimal_places = 4
#' , notification = TRUE
#' , dm = c()
#' , save_results = TRUE
#' , results_name = "results.txt"
#' , results_path = NULL
#' , save_summary = TRUE
#' )
#' @param dataset Name of the data frame in R that contains the long format
#' table after merging the individual data files using
#' \code{file_merge()}. Either \code{dataset} or \code{file_name} must be
#' provided. Default is \code{NULL}.
#' @param file_name A string with the name of a txt or csv file (including the
#' file extension, e.g. \code{"my_data.txt"}) with the merged table in case
#' the user already merged the individual data files. Either \code{dataset} or
#' \code{file_name} must be provided. Default is \code{NULL}.
#' @param file_path A string with the path of the folder in which
#' \code{file_name} is located. If \code{file_name} was used, then
#' \code{file_path} must be provided. Default is \code{NULL}.
#' @param id A string with the name of the column in \code{file_name} or in
#' \code{dataset} that contains the variable specifying the case identifier
#' (i.e., the variable upon which the measurement took place; e.g.,
#' \code{"subject_number"}). This should be a unique value per case. Values
#' in this column must be numeric. Argument must be provided. Default is
#' \code{NULL}.
#' @param within_vars String vector with names of grouping variables in
#' \code{file_name} or in \code{dataset} that contain independent variables
#' manipulated (or observed) within-ids (i.e., within-subjects, repeated
#' measures). Single or multiple values must be specified as a string (e.g.,
#' \code{c("SOA", "condition")}) according to the hierarchical order you
#' wish. Note that the order of the names in \code{within_vars()} is
#' important because \code{prep()} aggregates the data for the dependent
#' measures by first dividing them to the levels of the first grouping
#' variable in \code{witin_vars()}, and then within each of those levels
#' \code{prep()} divides the data according to the next variable in
#' \code{within_vars()} and so forth. Values in these columns must be
#' numeric. Either \code{within_vars} or \code{between_vars} (or both)
#' arguments must be provided. Default is \code{c()}.
#' @param between_vars String vector with names of grouping variables in
#' \code{file_name} or in \code{dataset} that contain independent variables
#' manipulated (or observed) between-ids (i.e., between-subjects). Single
#' or multiple values must be specified as a string (e.g., \code{c("order")}).
#' Order of the names in \code{between_vars()} does not matter. Values in
#' this column must be numeric. Either \code{between_vars} or
#' \code{within_vars} (or both) arguments must be provided. Default is
#' \code{c()}.
#' @param dvc A string with the name of the column in \code{file_name} or in
#' \code{dataset} that contains the dependent variable (e.g., "rt" for
#' reaction-time as a dependent variable). Values in this column must be in
#' an interval or ratio scale. Either \code{dvc} or \code{dvd} (or both)
#' arguments must be provided. Default is \code{NULL}.
#' @param dvd A string with the name of the column in \code{file_name} or in
#' \code{dataset} that contains the dependent variable (e.g., \code{"ac"}
#' for accuracy as a dependent variable). Values in this column must be
#' numeric and discrete (e.g., 0 and 1). Either \code{dvc} or \code{dvd} (or
#' both) arguments must be provided. Default is \code{NULL}.
#' @param keep_trials A string. Allows deleting unnecessary observations and
#' keeping necessary observations in \code{file_name} or in \code{dataset}
#' according to logical conditions specified as a string. For example, if the
#' dataset contains practice trials for each subject, these trials should not
#' be included in the aggregation. The user should remove these trials by
#' specifying how they were coded in the raw data (i.e., data before
#' aggregation). For example, if practice trials are the ones for which
#' the "block" column in the raw data tables equals to zero, the
#' \code{keep_trials} argument should be \code{"raw_data$block !== 0"}.
#' \code{raw_data} is the internal object in \code{prep()} representing the
#' merged table. All logical conditions in \code{keep_trials} should be put
#' in the same string and be concatenated by \code{&} or \code{|}. Logical
#' conditions for this argument can relate to different columns in the merged
#' table. Note that all further arguments of \code{prep()} will relate to the
#' remaining observations in the merged table. Default is \code{NULL}.
#' @param drop_vars String vector with names of columns to delete in \code{file_name}
#' or in \code{dataset}. Single or multiple values must be specified as a
#' string (e.g., \code{c("font_size")}). Order of the names in
#' \code{drop_vars} does not matter. Note that all further arguments of
#' \code{prep()} will relate to the remaining variables in the merged table.
#' Default is \code{c()}.
#' @param keep_trials_dvc A string. Allows deleting unnecessary observations
#' and keeping necessary observations in \code{file_name} or in \code{dataset}
#' for calculations and aggregation of the dependent variable in \code{dvc}
#' according to logical conditions specified as a string. Logical conditions
#' should be specified as a string as in the \code{keep_trials} argument
#' (e.g., \code{"raw_data$rt > 100 & raw_data$rt < 3000 & raw_dada$ac == 1"}).
#' All dependent measures for \code{dvc} except for those specified in
#' \code{outlier_removal} will be calculated on the remaining observations.
#' Defalut is \code{NULL}.
#' @param keep_trials_dvd A string. Allows deleting unnecessary observations
#' and keeping necessary observations in \code{file_name} or in \code{dataset}
#' for calculations and aggregation of the dependent variable in \code{dvd}
#' according to logical conditions specified as a string. Logical conditions
#' should be specified as a string as in the \code{keep_trials} argument
#' (e.g., \code{raw_data$rt > 100 & raw_data$rt < 3000}). All dependent
#' measures for \code{dvd} (i.e., \code{"mdvd"} and \code{"merr"}) will be
#' calculated on the remaining observations. Default is \code{NULL}.
#' @param id_properties String vector with names of columns in \code{dataset} or in
#' \code{file_name} that describe the ids (e.g., subjects) in the data and
#' were not manipulated within-or between-ids. For example, in case the user
#' logged for each observation and for each id in an experiment also the age
#' and the gender of the subject, this argument will be
#' \code{c("age", "gender")}. Order of the names in \code{id_properties} does
#' not matter. Single or multiple values must be specified as a string.
#' Values in these columns must be numeric. Default is \code{c()}.
#' @param sd_criterion Numeric vector specifying a number of standard deviation
#' criteria for which \code{prep()} will calculate the mean \code{dvc} for
#' each cell in the finalized table after rejecting observations that did not
#' meet the criterion (e.g., rejecting observations that were more than 2
#' standard deviations above or below the mean of that cell). Values in this
#' vector must be numeric. Default is \code{c(1, 1.5, 2)}.
#' @param percentiles Numeric vector containing wanted percentiles for \code{dvc}.
#' Values in this vector must be decimal numbers between 0 to 1. Percentiles
#' are calculated according to \code{type = 7} (see
#' \code{\link[stats]{quantile}} for more information). Default is
#' \code{c(0.05, 0.25, 0.75, 0.95)}.
#' @param outlier_removal Numeric. Specifies which outlier removal procedure
#' with moving criterion to calculate for \code{dvc} according to procedures
#' described by Van Selst & Jolicoeur (1994). If \code{1} then non-recursive
#' procedure is calculated, if \code{2} then modified recursive procedure is
#' calculated, if \code{3} then hybrid recursive procedure is calculated.
#' Moving criterion is according to Table 4 in Van Selst & Jolicoeur (1994).
#' If experimental cell has 4 trials or less it will result in \code{NA}.
#' Default is \code{NULL}.
#' @param keep_trials_outlier A string. Allows deleting unnecessary
#' observations and keeping necessary observations in \code{file_name} or in
#' \code{dataset} for calculations and aggregation of the outlier removal
#' procedures by Van Selst & Jolicoeur (1994). Logical conditions should be
#' specified as a string as in the \code{keep_trials} argument (e.g.,
#' \code{"raw_data$ac == 1"}). \code{outlier_removal} procedure will be
#' calculated on the remaining observations. Defalut is \code{NULL}.
#' @param decimal_places Numeric. Specifies number of decimals to be written
#' in \code{results_name} for each value of the dependent measures for
#' \code{dvc}. Value must be numeric. Default is \code{4}.
#' @param notification Logical. If \code{TRUE}, prints messages about the
#' progress of the function. Default is \code{TRUE}.
#' @param dm String vector with names of dependent measures the function
#' returns. If empty (i.e., \code{c()}) the function returns a data frame with
#' all possible dependent measures in \code{prep()}. Values in this vector
#' must be strings from the following list: "mdvc", "sdvc", "meddvc", "tdvc",
#' "ntr", "ndvc", "ptr", "prt", "rminv", "mdvd", "merr". Default is
#' \code{c()}. See Value section below for more details.
#' @param save_results Logical. If TRUE, the function creates a txt file
#' containing the returned data frame. Default is \code{TRUE}.
#' @param results_name A string with the name of the file \code{prep}
#' returns in case \code{save_results} is \code{TRUE}. Extension of the file
#' can be txt or csv and should be included. Default is \code{"results.txt"}.
#' @param results_path A string with the path of the folder in which
#' \code{results_name} will be saved. Default is the path provided in
#' \code{file_path}. In case no path was provided in \code{file_path},
#' \code{results_path} must be provided.
#' @param save_summary Logical. if \code{TRUE}, creates a summary file in the
#' same format as \code{results_name}. Default is \code{TRUE}.
#' @references Grange, J.A. (2015). trimr: An implementation of common response
#' time trimming methods. R Package Version 1.0.1.
#' \url{https://CRAN.R-project.org/package=trimr}
#'
#' Van Selst, M., & Jolicoeur, P. (1994). A solution to the effect of sample
#' size on outlier elimination. \emph{The quarterly journal of experimental
#' psychology, 47}(3), 631-650.
#'
#'
#' @return A data frame with dependent measures for the dependent variables in
#' \code{dvc} and \code{dvd} by \code{id} and grouping variables.
#'
#' The first column in the finalized table is the \code{id} column.
#' In case \code{id_properties} was used, the next columns will be the
#' value of each \code{id_properties} for each \code{id}.
#'
#' If \code{between_vars} was used then the next column\{\}s will be the value
#' of each \code{beween_vars} for each \code{id}.
#'
#' The next columns of the finalized table contain the dependent measures
#' according to the design specified. If \code{within_vars} was used, then the
#' data for each dependent measure was first divided according to the levels
#' of the first grouping variable in \code{witin_vars()}, and then within each
#' of those levels \code{prep()} divided the data according to the next
#' variable in \code{within_vars()} and so forth.
#' The dependent measures in the finalized table are:
#'
#' \code{mdvc}: mean \code{dvc}.
#'
#' \code{sdvc}: SD for \code{dvc}.
#'
#' \code{meddvc}: median \code{dvc}.
#'
#' \code{tdvc}: mean \code{dvc} after rejecting observations above
#' standard deviation criteria specified in \code{sd_criterion}.
#'
#' \code{ntr}: number of observations rejected for each standard deviation
#' criterion specified in \code{sd_criterion}.
#'
#' \code{ndvc}: number of observations before rejection.
#'
#' \code{ptr}: proportion of observations rejected for each standard
#' deviation criterion specified in \code{sd_criterion}.
#'
#' \code{rminv}: harmonic mean of \code{dvc}.
#'
#' \code{prt}: \code{dvc} according to each of the percentiles specified
#' in \code{percentiles}.
#'
#' \code{mdvd}: mean \code{dvd}.
#'
#' \code{merr}: mean error.
#'
#' \code{nrmc}: mean \code{dvc} according to non-recursive procedure with
#' moving criterion.
#'
#' \code{nnrmc}: number of observations rejected for \code{dvc} according
#' to non-recursive procedure with moving criterion.
#'
#' \code{pnrmc}: percent of observations rejected for \code{dvc} according
#' to non-recursive procedure with moving criterion.
#'
#' \code{tnrmc}: total number of observations upon which the non-recursive
#' procedure with moving criterion was applied.
#'
#' \code{mrmc}: mean \code{dvc} according to modified-recursive procedure
#' with moving criterion.
#'
#' \code{nmrmc}: number of observations rejected for \code{dvc} according
#' to modified-recursive procedure with moving criterion.
#'
#' \code{pmrmc}: percent of observations rejected for \code{dvc} according
#' to modified-recursive procedure with moving criterion.
#'
#' \code{tmrmc}: total number of observations upon which the
#' modified-recursive procedure with moving criterion was applied.
#'
#' \code{hrmc}: mean \code{dvc} according to hybrid-recursive procedure
#' with moving criterion.
#'
#' \code{nhrmc}: number of observations rejected for \code{dvc} according
#' to hybrid-recursive procedure with moving criterion.
#'
#' \code{thrmc}: total number of observations upon which the
#' hybrid-recursive procedure with moving criterion was applied.
#' @export
#' @examples
#' data(stroopdata)
#' finalized_stroopdata <- prep(
#' dataset = stroopdata
#' , file_name = NULL
#' , file_path = NULL
#' , id = "subject"
#' , within_vars = c("block", "target_type")
#' , between_vars = c("order")
#' , dvc = "rt"
#' , dvd = "ac"
#' , keep_trials = NULL
#' , drop_vars = c()
#' , keep_trials_dvc = "raw_data$rt > 100 & raw_data$rt < 3000 & raw_data$ac == 1"
#' , keep_trials_dvd = "raw_data$rt > 100 & raw_data$rt < 3000"
#' , id_properties = c()
#' , sd_criterion = c(1, 1.5, 2)
#' , percentiles = c(0.05, 0.25, 0.75, 0.95)
#' , outlier_removal = 2
#' , keep_trials_outlier = "raw_data$ac == 1"
#' , decimal_places = 0
#' , notification = TRUE
#' , dm = c()
#' , save_results = FALSE
#' , results_name = "results.txt"
#' , results_path = NULL
#' , save_summary = FALSE
#' )
#'
prep <- function(dataset = NULL, file_name = NULL, file_path = NULL,
id = NULL, within_vars = c(), between_vars = c(), dvc = NULL,
dvd = NULL, keep_trials = NULL, drop_vars = c(),
keep_trials_dvc = NULL, keep_trials_dvd = NULL,
id_properties = c(), sd_criterion = c(1, 1.5, 2),
percentiles = c(0.05, 0.25, 0.75, 0.95),
outlier_removal = NULL, keep_trials_outlier = NULL,
decimal_places = 4, notification = TRUE, dm = c(),
save_results = TRUE, results_name = "results.txt",
results_path = NULL, save_summary = TRUE) {
## Error handling
# Check if large dataset was provided
if (is.null(dataset) & is.null(file_name)) {
stop("Oops! Did not find dataset or file_name.\nEither dataset or file_name must be provided")
}
# Check if dataset or file_name argument was provided
if (!is.null(dataset)) {
# dataset argumnet was used
if (!exists("dataset")) {
# dataset does not exists
stop("Oops! Object entered in dataset does not exists.\nPlease check argument dataset")
}
} else if(!is.null(file_name)) {
# file_name argument was used
# Check if file_path was provided
if (is.null(file_path)) {
# file_path was not provided
stop("Oops! file_name was entered but file_path was not found.\nMust enter file_path")
}
}
# Check results_name is correct in case save_results is TRUE
if (save_results) {
# Get results_name extension
extension <- substr(results_name, nchar(results_name) - 3,
nchar(results_name))
# Check if extension is correct
if (!(extension %in% c(".txt", ".csv"))) {
stop(paste("Oops!", results_name, "must end with txt or csv extension"))
}
}
# Check if file_name argument is used
if (!is.null(file_name)) {
# Get file_name extension
extension_fn <- substr(file_name, nchar(file_name) - 3, nchar(file_name))
# Check if extension_fn is correct
if (!(extension_fn %in% c(".txt", ".csv"))) {
stop(paste("Oops!", file_name, "must end with txt or csv extenion and be in that format"))
}
}
# Check if id was provided
if (is.null(id)) {
stop("Oops! id is missing. Please provide name of id column")
}
# Check if independent variables were provided
if (length(within_vars) == 0 & length(between_vars) == 0) {
stop("Oops! Did not find independent variables.\nYou must enter an independent varible to either within_vars, between_vars (or both)")
}
# Check if dependent variables were provided
if (is.null(dvc) & is.null(dvd)) {
stop("Oops! Did not find dvc or dvd. You must enter at least one dependent variable")
}
## Number of decimal places for dvd
decimal_dvd <- 3
## Read table
if (!is.null(file_name)) {
# Call read_data()
raw_data <- read_data(file_name, file_path, notification)
# For summary txt file (to be used later in the code)
dataname <- file_name
# Check if results_path is NULL
if (is.null(results_path)) {
# Set results_path to file_path
results_path <- file_path
}
} else {
# Take table from dataset
raw_data <- dataset
# For summary txt file (to be used laster in the code)
dataname <- "dataset"
if (save_results) {
# Check if results_path is NULL
if (is.null(results_path)) {
# results_path was not provided
stop("Oops! save_results is TRUE but results_path was not provided\nYou must provide results_path in order to save results in a file")
}
} else if (save_summary & is.null(results_path)) {
stop("Oops! save_summary is TRUE but results_path was not provided\nYou must provide results_path in order to save summary file")
}
}
# Create extension for summery file in case it was not created by now
if (save_summary) {
if (!save_results) {
extension <- ".txt"
}
# Create name for summary file
sum_file_name <- paste(substr(results_name, 0, nchar(results_name) - 4),
"_summary", extension, sep = "")
}
## More error handling. This time after reading the the table
# Check if name entered in id exists in raw_data
if (!(id %in% names(raw_data))) {
# id was not found in raw_data
stop(paste("Oops!", id , "was not found in the table. Please check argument id"))
} else if (!is.numeric(raw_data[[id]])) {
# id was found but is not numeric
stop(paste("Oops!", id, "values are not numeric.", id, "values must be numeric"))
}
# Check if name entered in within_vars exists in raw_data
if (length(within_vars) > 0) {
# Found within_vars
for (w in within_vars) {
if (!(w %in% names(raw_data))) {
# w was not found in raw_data
stop(paste("Oops!", w, "was not found in the table. Please check argument within_vars"))
} else if (!is.numeric(raw_data[[w]])) {
# w was found but is not numeric
stop(paste("Oops!", w, "values are not numeric.", w, "values must be numeric"))
}
}
}
# Check if name entered in between_vars exists in raw_data
if (length(between_vars) > 0) {
# Found between_vars
for (b in between_vars) {
if (!(b %in% names(raw_data))) {
# b was not found in raw_data
stop(paste("Oops!", b, "was not found in the table. Please check argument between_vars"))
} else if (!is.numeric(raw_data[[b]])) {
# b was found but is not numeric
stop(paste("Oops!", b, "values are not numeric.", b, "values must be numeric"))
}
}
}
# Check if name entered in dvc exists in raw_data
if (!is.null(dvc)) {
# dvc is not NULL
if (!(dvc %in% names(raw_data))) {
# dvc was not found in raw_data
stop(paste("Oops!", dvc, "was not found in the table. Please check argumment dvc"))
} else if (!is.numeric(raw_data[[dvc]])) {
# dvc was found but is not numeric
stop(paste("Oops!", dvc, "values are not numeric.", dvc, "values must be numeric"))
}
}
# Check if name entered in dvd exists in raw_data
if (!is.null(dvd)) {
# dvd is not NULL
if (!(dvd %in% names(raw_data))) {
# dvd was not found in raw_data
stop(paste("Oops!", dvd, "was not found in the table. Please check argument dvd"))
} else if (!is.numeric(raw_data[[dvd]])) {
# dvd was found but is not numeric
stop(paste("Oops!", dvd, "values are not numeric.", dvd, "values must be numeric"))
}
}
# Check for spelling mistakes in keep_trials
if (!is.null(keep_trials)) {
if (length(eval(parse(text = keep_trials))) == 0) {
stop("Oops! Possible spelling error in keep_trials\nPlease check keep_trials arguemnt")
}
}
# Check if name entered in drop_vars exists in raw_data
if (length(drop_vars) > 0) {
# Found drop_vars
for (d in drop_vars) {
if (!(d %in% names(raw_data))) {
# d was not found in raw_data
stop(paste("Oops!", d, "was not found in the table. Please check argument drop_vars"))
}
}
}
# Check for spelling mistakes in keep_trials_dvc
if (!is.null(keep_trials_dvc)) {
if (length(eval(parse(text = keep_trials_dvc))) == 0) {
stop("Oops! Possible spelling error in keep_trials_dvc.\nPlease check argument keep_trials_dvc")
}
}
#Check for spelling mistakes in keep_trials_dvd
if (!is.null(keep_trials_dvd)) {
if(length(eval(parse(text = keep_trials_dvd))) == 0) {
stop("Oops! Possible spelling error in keep_trials_dvd.\nPlease check argument keep_trials_dvd")
}
}
# Check if name entered in id_properties exists in raw_data
if (length(id_properties) > 0) {
for (p in id_properties) {
if (!(p %in% names(raw_data))) {
# p was not found in raw_data
stop(paste("Oops!", p, "was not found in the table. Please check argument id_properties"))
} else if (!is.numeric(raw_data[[p]])) {
# p was found but is not numeric
stop(paste("Oops!", p, "values are not numeric.", p, "values must be numeric"))
}
}
}
# Check if values in percentiles are between 0 and 1
if (min(percentiles) <= 0 | max(percentiles) >= 1) {
stop("Oops! Values in percentiles must be decimal numbers between 0 to 1.\nPlease check argument percentiles")
}
# Check for spelling mistakes in keep_trials_outlier
if (!is.null(keep_trials_outlier)) {
if (length(eval(parse(text = keep_trials_outlier))) == 0) {
stop("Oops! Possible spelling error in keep_trials_outlier.\nPlease check argument keep_trials_outlier")
}
}
# Checl if outlier_removal is used
if (!is.null(outlier_removal)) {
# In case outlier_removal is used, check that it is 1, 2, or 3
if (!(outlier_removal %in% c(1, 2, 3))) {
stop("Oops! In case you want outlier removal proceduer,\noutlier_removal argument must be 1, 2, or 3\nPlease check argument outlier_removal")
}
# For summary txt file (to be used later in the code)
outlier_name <- c("non-recursive", "modified-recursive", "hybrid-recursive")
outlier_name <- paste(outlier_name[outlier_removal], "procedure with moving criterion", sep = " ")
}
## Print dimensions of the large dataset to console
if (notification) {
# Message
message(paste("raw_data has", dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"))
print(head(raw_data))
}
## Save dimensions of the large dataset before doing anything
dim_raw_data1 <- dim(raw_data)
## Delete unnecessary trials in case keep_trials is not NULL
# All further calculations will be done on the remaining trials
if (!is.null(keep_trials)) {
if (notification) {
# Message
message("Keeping trials according to keep_trials and deleting unnecessary trials in raw_data")
message("All further calculations will be done on the remaining trials")
}
# Save keep_trials as a string before parsing in order to later use when
# writing Summary file
keep_trials_sum <- keep_trials
# Subset trials
keep_trials <- eval(parse(text = keep_trials))
raw_data <- raw_data[keep_trials, ]
if (notification) {
# Message
message(paste("raw_data has", dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"))
print(head(raw_data))
}
} else {
keep_trials_sum <- class(keep_trials)
}
# End of !is.null(keep_trials)
## Delete unnecessary variables in case drop_vars is not NULL
# All further calculations will be done on the remaining variables
if (length(drop_vars) > 0) {
if (notification) {
# Message
message("Dropping variables in raw_data according to drop_vars")
message("All further calculations will be done on the remaining variables")
}
# Save drop_vars as a string before parsing in order to later use when
# writing Summary file
drop_col_sum <- drop_vars
# Subset variables by keeping all columns except the ones in drop_vars
raw_data <- raw_data[, !(colnames(raw_data) %in% drop_vars)]
if (notification) {
# Message
message(paste("raw_data has", dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"))
print(head(raw_data))
}
} else {
drop_col_sum <- "NULL"
}
# End of if(length(drop_vars) > 0)
## Save dimensions of raw_data after removing unnecessary trials and
# variables according to keep_trials and drop_vars
dim_raw_data2 <- dim(raw_data)
## Creates an array in the size of number of ids in raw_data
id_col <- tapply(raw_data[[id]], list(raw_data[[id]]), mean)
## Calculates independent between-subjects variables in case they exists
if (length(between_vars) > 0) {
# Found between-subjects variables
if(notification) {
# Message
if (length(between_vars) == 1) {
message(paste("Found", length(between_vars), "between-subjects independent variable:", between_vars))
} else {
message(paste("Found", length(between_vars), "between-subjects independent variables:", between_vars))
}
}
# Creates a data frame for between-subject variables according to the
# number of ids
between_vars_df <- data.frame(id_col)
if (notification) {
# Message
message("Calculating between-subjects independent variables")
}
# Calculates between-subjects variables
# Reset i to 1
i <- 1
while (i <= length(between_vars)) {
between_vars_df[i] <- tapply(raw_data[[between_vars[i]]],
list(raw_data[[id]]), mean)
i <- i + 1
}
if (notification) {
# Message
message("Giving names for between-subjects variables")
}
# Give names for between-subjects variables
colnames(between_vars_df) <- between_vars
if (notification) {
# Message
message("Finished between-subjects independent variables")
}
}
## Calculates id_properties in case they exists
if (length(id_properties) > 0) {
# Found id properties
if (notification) {
# Message
message(paste("Found", length(id_properties), "id properties"))
}
# Creates a data frame for id_properties according to the number of subjects
id_properties_df <- data.frame(id_col)
if (notification) {
# Message
message("Calculating id_properties")
}
# Calculates id properties
# Reset i to 1
i <- 1
while (i <= length(id_properties)) {
id_properties_df[i] <- tapply(raw_data[[id_properties[i]]],
list(raw_data[[id]]), mean)
i <- i + 1
}
if (notification) {
# Message
message("Giving names for id_properties variables")
}
# Give names for id properties variables
colnames(id_properties_df) <- id_properties
if (notification) {
# Message
message("Finished id_properties")
}
}
## Creates within_vars column in case within-subject indpendent variables
# exists
if (length(within_vars) > 0) {
# Creates within_col: a data frame for within_vars variables
if (notification) {
# Message
message(paste("Found", length(within_vars), "within-subject independent variables"))
}
# within_col is in the same length as number of trials in raw_data
within_col <- data.frame(1:length(raw_data[, 1]))
# Reset l to 1
l <- 1
# Get all within_vars variables in raw_data into within_col
while(l <= length(within_vars)) {
within_col[l] <- raw_data[[within_vars[l]]]
l <- l + 1
}
# Give names to within_col
colnames(within_col) <- within_vars
# Creates a column for within-subject independent variables
within_condition <- do.call(interaction, c(within_col, sep = "_",
lex.order = TRUE))
# Append within_condition to raw_data
raw_data$within_condition <- factor(within_condition)
if (notification) {
# Message
message("within_condition column was added to raw_data")
print(head(raw_data))
message("Finished within_condition column")
}
}
## Creates a data frame for dvc data and delete unnecessary trials for dvc in
# case dvc exists
if (!is.null(dvc)) {
# Found dvc data
if (notification) {
# Message
message("Found dvc data")
message("Creating data frame named raw_data_dvc for dvc data")
}
# Delete unnecessary trials in case keep_trials_dvc is not NULL
# All further calculations on dvc except outlier removal procedures will
# be done on the remaining trials
if (!is.null(keep_trials_dvc)) {
if (notification) {
# Message
message("Keeping trials according to keep_trials_dvc and deleting unnecessary trials in raw_data_dvc")
message("All further calculations on dvc except outlier removal procedures will be done on the remaining trials")
}
# Save keep_trials_dvc as a string before parsing in order to later use
# when writing Summary file
keep_trials_dvc_sum <- keep_trials_dvc
# Subsetting trials
keep_trials_dvc <- eval(parse(text = keep_trials_dvc))
raw_data_dvc <- raw_data[keep_trials_dvc, ]
} else {
# Save keep_trials_dvc as "NULL" in order to later use when writing
# Summary file
keep_trials_dvc_sum <- class(keep_trials_dvc)
# Save raw_data as raw_data_dvc
raw_data_dvc <- raw_data
}
if (notification) {
# Message
message(paste("raw_data_dvc has", dim(raw_data_dvc)[1], "observations and", dim(raw_data_dvc)[2], "variables"))
print(head(raw_data_dvc))
}
# Get dimensions of raw_data_dvc for summary file
dim_raw_data_dvc <- dim(raw_data_dvc)
}
## Creates a data frame for dvd data and delete unnecessary trials for dvd
# in case dvd exists
if (!is.null(dvd)) {
# Found dvd data
if (notification) {
# Message
message("Found dvd data")
message("Creating data frame named raw_data_dvd for dvd data")
}
# Delete unnecessary trials in case keep_trials_dvd is not NULL
# All further calculations on dvd will be done on the remaining trials
if (!is.null(keep_trials_dvd)) {
# Delete unnecessary trials
if (notification) {
# Message
message("Keeping trials according to keep_trials_dvd and deleting unnecessary trials in raw_data_dvd")
message("All further calculations on dvd will be done on the remaining trials")
}
# Save keep_trials_dvd as a string before parsing in order to later use
# when writing Summary file
keep_trials_dvd_sum <- keep_trials_dvd
# Subsetting trials
keep_trials_dvd <- eval(parse(text = keep_trials_dvd))
# Save raw_data after subsetting
raw_data_dvd <- raw_data[keep_trials_dvd, ]
} else {
# Save keep_trials_dvd as "NULL" in order to later use when writing
# Summary file
keep_trials_dvd_sum <- class(keep_trials_dvd)
# Save raw_data as raw_data_dvd
raw_data_dvd <- raw_data
}
if (notification) {
# Message
message(paste("raw_data_dvd has", dim(raw_data_dvd)[1], "observations and", dim(raw_data_dvd)[2], "variables"))
print(head(raw_data_dvd))
}
# Get dimensions of raw_data_dvd for summary file
dim_raw_data_dvd <- dim(raw_data_dvd)
}
# Creates a data frame for dependent measures
dm_df <- data.frame(1:length(id_col))
# Reset counter for dependent measures to 1
count <- 1
## Calculates dependent measures
# Checks if the design is a between-subjects design, within-subjects design
# or a mixed design
if (length(between_vars) > 0 & length(within_vars) == 0) {
# Between-subjects design: all dependent measures will be calculated by id
if (notification) {
message("Your design is a between-subjects design")
}
# dvc
# Calculates dependent measures for dvc in case dvc exists
if (!is.null(dvc)) {
# mdvc: mean dvc
# Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
# or he wants "mdvc" among others (i.e., "mdvc" %in% dm equals to TRUE)
if (length(dm) == 0 | "mdvc" %in% dm) {
if (notification) {
# Message
message("Calculating mean dvc")
}
dm_df[count] <- tapply(raw_data_dvc[[dvc]], list(raw_data_dvc[[id]]),
mean)
colnames(dm_df)[count] <- "mdvc"
# Round mdvc according to decimal_places
dm_df[count] <- round(dm_df[count], digits = decimal_places)
count <- count + 1
}
# sdvc: standard deviation (SD) for dvc using denominator n
# Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
# or he wants "sdvc" among others (i.e., "sdvc" %in% dm equals to TRUE)
if (length(dm) == 0 | "sdvc" %in% dm) {
if (notification) {
# Message
message("Calculating SD for dvc using denominator n")
}
# SD using denominator n - 1
dvc_sd_r <- tapply(raw_data_dvc[[dvc]], list(raw_data_dvc[[id]]), sd)
# Length of each cell in the raw_data_dvc
l_dvc <- tapply(raw_data_dvc[[dvc]], list(raw_data_dvc[[id]]), length)
# Calculating sdvc using denominator n
dm_df[count] <- dvc_sd_r * sqrt((l_dvc - 1) / (l_dvc))
colnames(dm_df)[count] <- "sdvc"
# Round sdvc according to decimal_places
dm_df[count] <- round(dm_df[count], digits = decimal_places)
count <- count + 1
}
# meddvc: median dvc
# Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
# or he wants "medrt" among others (i.e., "meddvc" %in% dm equals to TRUE)
if (length(dm) == 0 | "meddvc" %in% dm) {
if (notification) {
# Message
message("Calculating median dvc")
}
dm_df[count] <- tapply(raw_data_dvc[[dvc]], list(raw_data_dvc[[id]]),
median)
colnames(dm_df)[count] <- "meddvc"
# Round meddvc according to decimal_places
dm_df[count] <- round(dm_df[count], digits = decimal_places)
count <- count + 1
}
# Z scores (to be used later when removing values according to
# sd_criterion)
# deviation from the mean
dev_from_mean <- ave(raw_data_dvc[[dvc]], raw_data_dvc[[id]],
FUN = function(x) x - mean(x))
raw_data_dvc$dev_from_mean <- dev_from_mean
# sdvc for z scores (again using denominator n)
sdvc_for_zscores <- ave(raw_data_dvc[[dvc]], raw_data_dvc[[id]],
FUN = function(x) sd(x) * sqrt((length(x) - 1 ) / (length(x))))
raw_data_dvc$sdvc_for_zscores <- sdvc_for_zscores
# z scores
z_score <- raw_data_dvc$dev_from_mean / raw_data_dvc$sdvc_for_zscores
raw_data_dvc$z_score <- z_score
# tdvc: trimmed dvc according to SD in sd_criterion
# Create an empty vector for names of sd_criterion coulmns
sd_criterion_names <- c()
# Reset l to 1
l <- 1
# Fill empty vector with names of sd_criterion columns
while (l <= length(sd_criterion)) {
sd_criterion_names[l] <- paste("t", sd_criterion[l], "dvc" ,sep = "")
l <- l + 1
}
# Change names of id and dvc columns because later I use "dplyr" which
# makes it easier
# Change name of id column to "id"
names(raw_data_dvc)[names(raw_data_dvc) == id] <- "id"
# Change name of dvc column to "dvc"
names(raw_data_dvc)[names(raw_data_dvc) == dvc] <- "dvc"
# Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
# or he wants "tdvc" among others (i.e., "tdvc" %in% dm equals to TRUE)
if (length(dm) == 0 | "tdvc" %in% dm) {
# Reset j to 1
j <- 1
# Calculating mean dvc after rejecting values above SD criterions
# according to sd_criterion
while (j <= length(sd_criterion)) {
if (notification) {
# Message
message(paste("Calculating mean dvc after rejecting values above", sd_criterion[j], "SD",
sep = " "))
}
temp <- raw_data_dvc %>%
dplyr::group_by(id) %>%
dplyr::summarise(n = mean(dvc[abs(z_score) < sd_criterion[j]])) %>%
reshape2::dcast(id ~ sd_criterion_names[j], value.var = "n", fun = mean)
dm_df[count] <- temp[-1]
# Round according to decimal_places
dm_df[count] <- round(dm_df[count], digits = decimal_places)
count <- count + 1
j <- j + 1
}
}
# ntr: number of trimmed values for each SD in sd_criterion
# Checks if the user does not want ntr in results (i.e., !("ntr" %in% dm)
# equals to FALSE)
if (!("ntr" %in% dm)) {
# In case the user does not want ntr in the filnal table,
# store ntr in temp_dm to be used later when calculating proportion
temp_dm <- data.frame(1:length(id_col))
count_temp <- 1
}
# Create an empty vector for names of ntr columns
ntr_names <- c()
# Rest l to 1
l <- 1
# Fill empty vector with names of ntr columns
while (l <= length(sd_criterion)) {
ntr_names[l] <- paste("n", sd_criterion[l], "tr" ,sep = "")
l <- l + 1
}
# Reset j to 1 for next while loop
j <- 1
# Counting number of rejected values for each sd_criterion
while (j <= length(sd_criterion)) {
if (notification) {
# Messgae
message(paste("Counting number of rejected values for", sd_criterion[j], "SD", sep = " "))
}
temp <- raw_data_dvc %>%
dplyr::group_by(id) %>%
dplyr::summarise(n = sum(abs(z_score) > sd_criterion[j])) %>%
reshape2::dcast(id ~ ntr_names[j], value.var = "n", fun = sum)
if (length(dm) == 0 | "ntr" %in% dm) {
# Store it in the dependent measures df
dm_df[count] <- temp[-1]
count <- count + 1
} else {
# Stroe it in a temporary df
temp_dm[count_temp] <- temp[-1]
count_temp <- count_temp + 1
}
j <- j + 1
}
if (exists("temp_dm")) {
temp_dm <- temp_dm[-1]
}
# End of ntr
# ndvc: number of values before rejecting values according to sd_criterion
# Calculate ndvc anyway
ndvc <- tapply(raw_data_dvc$dvc, list(raw_data_dvc$id), length)
# Checks if to store ndvc in dm_df ( in case the user wants all dpendent
# measures (i.e., length(dm) == 0) or he wants "ndvc" among others
# (i.e., "ndvc" %in% dm equals to TRUE)
if (length(dm) == 0 | "ndvc" %in% dm) {
if (notification) {
# Message
message("Counting number of values before rejecting values according to sd_criterion")
}
dm_df[count] <- ndvc
colnames(dm_df)[count] <- "ndvc"
count <- count + 1
}
# End of ndvc
# ptr: proportion of rejected values according to sd_criterion
# Create an empty vector for names of ptr columns
ptr_names <- c()
# Rest l to 1
l <- 1
# Fill empty vector with names of ptr columns
while (l <= length(sd_criterion)) {
ptr_names[l] <- paste("p", sd_criterion[l], "tr" ,sep = "")
l <- l + 1
}
# Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
# or he wants "ptr" among others (i.e., "ptr" %in% dm equals to TRUE)
if (length(dm) == 0 | "ptr" %in% dm) {
# Reset j to 1 for next while loop
j <- 1
while (j <= length(sd_criterion)) {
if (notification) {
# Message
message(paste("Calculating proportion of values rejected for", sd_criterion[j], "SD", sep = " "))
}
# Get ntr from temp_dm or from dm_df
if (!(length(dm) == 0 | "ntr" %in% dm)) {
temp <- temp_dm[[ntr_names[j]]] / ndvc
} else {
temp <- dm_df[[ntr_names[j]]] / ndvc
}
dm_df[count] <- temp
colnames(dm_df)[count] <- ptr_names[j]
# Round according to digits = 3
dm_df[count] <- round(dm_df[count], digits = 3)
count <- count + 1
j <- j + 1
}
}
# End of ptr
# rminv: harmonic mean
# Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
# or he wants "rminv" among others (i.e., "rminv" %in% dm equals to TRUE)
if (length(dm) == 0 | "rminv" %in% dm) {
if (notification) {
# Message
message("Calculating harmonic mean for dvc")
}
dm_df[count] <- tapply(raw_data_dvc$dvc, list(raw_data_dvc$id),
psych::harmonic.mean)
colnames(dm_df)[count] <- "rminv"
# Round rminv according to decimal_places
dm_df[count] <- round(dm_df[count], digits = decimal_places)
count <- count + 1
}
# End of rminv
# Percentiles: dvc according to requested percentiles
# Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
# or he wants "pdvc" among others (i.e., "pdvc" %in% dm equals to TRUE)
if (length(dm) == 0 | "pdvc" %in% dm) {
# Create an empty vector for names of pdvc columns
percentails_names <- c()
# Rest l to 1
l <- 1
# Fill vector
while (l <= length(percentiles)) {
percentails_names[l] <- paste("p", percentiles[l], "dvc" ,sep = "")
l <- l + 1
}
# Reset j to 1 before next while loop
j <- 1
# Calculating percentiles
while (j <= length(percentiles)) {
if (notification) {
# Message
message(paste("Calculating the", percentiles[j], "percentail for dvc" ), sep = "")
}
dm_df[count] <- tapply(raw_data_dvc$dvc, list(raw_data_dvc$id),
quantile, probs = percentiles[j])
colnames(dm_df)[count] <- percentails_names[j]
# Round according to decimal_places
dm_df[count] <- round(dm_df[count], digits = decimal_places)
count <- count + 1
j <- j + 1
}
}
# End of pdvc
# Outlier removal procedures according to Van Selst & Jolicoeur (1994)
# Checks if the user wants an outlier removal procedure
if (!is.null(outlier_removal)) {
# User wants an outlier removal procedure
# Delete unnecessary trials in case keep_trials_outlier is not NULL
# Outlier removal procedure will be calculated on the remaining trials
if (!is.null(keep_trials_outlier)) {
# Save keep_trials_outlier as a string before parsing in order to
# later use when writing Summary file
keep_trials_outlier_sum <- keep_trials_outlier
# Delete unnecessary trials
if (notification) {
# Message
message("Keeping trials according to keep_trials_outlier and deleting unnecessary trials in raw_data")
message("Outlier removal procedure will be calculated on the remaining trials")
}
# Subsetting trials
keep_trials_outlier <- eval(parse(text = keep_trials_outlier))
# Save raw_data after subsetting
raw_data <- raw_data[keep_trials_outlier, ]
} else {
# Save keep_trials_outlier as "NULL" in order to later use when
# writing Summary file
keep_trials_outlier_sum <- class(keep_trials_outlier)
# Save raw_data
raw_data <- raw_data
}
if (notification) {
# Message
message("Calculating mean dvc by id according to selected outlier removal procedure")
}
# Change name of id column to "id"
names(raw_data)[names(raw_data) == id] <- "id"
# Change name of dvc column to "dvc"
names(raw_data)[names(raw_data) == dvc] <- "dvc"
# Create final array that will contain trimmed means per id
final_data <- matrix(0, nrow = length(id_col), ncol = 1)
# Create final array that will contain number of trials trimmed per id
numtrials_data <- matrix(0, nrow = length(id_col), ncol = 1)
# Create final array that will contain percent of trials removed per id
percent_data <- matrix(0, nrow = length(id_col), ncol = 1)
# Create final array that will contain total number of trials per id
totaltrials_data <- matrix(0, nrow = length(id_col), ncol = 1)
# Give names to columns according to the procedure
if (outlier_removal == 1) {
# Non recursive moving criterion procedure
# nrmc: means for dvc per id
colnames(final_data) <- "nrmc"
# nnrmc: number of trials trimmed per id
colnames(numtrials_data) <- "nnrmc"
# pnrmc: percent of trials removed per id
colnames(percent_data) <- "pnrmc"
# tnrmc: total number of trials per id
colnames(totaltrials_data) <- "tnrmc"
} else if (outlier_removal == 2) {
# Modified recursive moving criterion procedure
# mrmc: means for dvc per id
colnames(final_data) <- "mrmc"
# nmrmc: number of trials trimmed per id
colnames(numtrials_data) <- "nmrmc"
# pmrmc: percent of trials removed per id
colnames(percent_data) <- "pmrmc"
# tmrmc: total number of trials per id
colnames(totaltrials_data) <- "tmrmc"
} else if (outlier_removal == 3) {
# Hybrid recursive moving criterion procedure
# hrmc: means for dvc per id
colnames(final_data) <- "hrmc"
# nhrmc: number of trials trimmed per id
colnames(numtrials_data) <- "nhrmc"
# phrmc: percent of trials removed per id
colnames(percent_data) <- "phrmc"
# thrmc: total number of trials per id
colnames(totaltrials_data) <- "thrmc"
}
# Do outlier removal procedure
for (i in 1:length(id_col)) {
# For each id (i.e., subject)
# Isolate the current participant's data
temp_data <- raw_data[raw_data$id == id_col[i], ]
if (outlier_removal == 1) {
# Non recursive procedure with moving criterion
# Do current id trimming
current_id <- non_recursive_mc(temp_data$dvc)
final_data[i, 1] <- current_id[1]
percent_data[i, 1] <- current_id[2]
numtrials_data[i, 1] <- current_id[3]
totaltrials_data[i, 1] <- current_id[4]
} else if (outlier_removal == 2) {
# Modified recursive procedure with moving criterion
# Do current id trimming
current_id <- modified_recursive_mc(temp_data$dvc)
final_data[i, 1] <- current_id[1]
percent_data[i, 1] <- current_id[2]
numtrials_data[i, 1] <- current_id[3]
totaltrials_data[i, 1] <- current_id[4]
} else if (outlier_removal == 3) {
# Hybrid recursive procedure with moving criterion
# Do current id trimming
current_id <- hybrid_recursive_mc(temp_data$dvc)
final_data[i, 1] <- current_id[1]
percent_data[i, 1] <- current_id[2]
totaltrials_data[i, 1] <- current_id[3] # Should be 3. Do not change
}
}
# End of for (i in 1:length(id_col)) loop
# Round final_data according to decimal_places
final_data <- round(final_data, digits = decimal_places)
# Round percent_data according to digits = 3
percent_data <- round(percent_data, digits = 3)
if (notification) {
# Messgae
if (outlier_removal == 1) {
message("Finished calculating means for dvc by id according to non-recursive procedure with moving criterion ")
} else if (outlier_removal == 2) {
message("Finished calculating means for dvc by id according to modified-recursive procedure with moving criterion")
} else if(outlier_removal == 3) {
message("Finished calculating means for dvc by id according to hybrid procedure with moving criterion")
}
}
}
# End of outlier removal
# Finished dvc
if (notification) {
# Message
message("Finished calculating dependent measures for dvc")
}
} # End of !is.null(dvc)
# dvd
# Check if dvd exists
if(!is.null(dvd)) {
# Change name of id column to "id"
names(raw_data_dvd)[names(raw_data_dvd) == id] <- "id"
# mdvd: mean dvd
# Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
# or he wants "mdvd" among others (i.e., "mdvd" %in% dm equals to TRUE)
if (length(dm) == 0 | "mdvd" %in% dm) {
if (notification) {
# Message
message("Calculating mean dvd")
}
dm_df[count] <- tapply(raw_data_dvd[[dvd]], list(raw_data_dvd$id), mean)
colnames(dm_df)[count] <- "mdvd"
# Round mdvd according to decimal_dvd
dm_df[count] <- round(dm_df[count], digits = decimal_dvd)
count <- count + 1
}
# merr: mean error
# Checks if the user wants all dpendent measures (i.e., length(dm) == 0)
# or he wants "merr" among others (i.e., "merr" %in% dm equals to TRUE)
if (length(dm) == 0 | "merr" %in% dm) {
if (notification) {
# Message
message("Calculating mean error")
}
dm_df[count] <- 1 - dm_df[["mdvd"]]
colnames(dm_df)[count] <- "merr"
# Round merr according to decimal_dvd
dm_df[count] <- round(dm_df[count], digits = decimal_dvd)
if (notification) {
# Message
message("Finished calculating dependent measures for dvd")
}
}
} # End of !is.null(dvd)
## Creates results file
if (notification) {
# Message
message("Creating results")
}
# Check if to bind id_properties_df
if (length(id_properties) > 0) {
results <- cbind(id_col, id_properties_df, between_vars_df, dm_df)
} else {
results <- cbind(id_col, between_vars_df, dm_df)
}
# Check if to bind results of outlier removal procedure
if (!is.null(outlier_removal)) {
if (outlier_removal == 1 | outlier_removal == 2) {
results <- cbind(results, final_data, percent_data, numtrials_data,
totaltrials_data)
} else if (outlier_removal == 3) {
results <- cbind(results, final_data, percent_data, totaltrials_data)
}
}
# Change back name of id to original
names(results)[names(results) == "id_col"] <- id
if (notification) {
# Message
message("Printing head results to console")
print(head(results))
}
## Summary file
if (save_summary) {
# Write a summary txt file
if (notification) {
# Message
message("Creating summary file")
}
# Dim of raw_data, raw_data_dvc and raw_data_dvd
cat("====================================================================", "\n", file = paste(results_path, "/", sum_file_name, sep = ""))
cat(paste("Summary:", dataname), date(), file = paste(results_path, "/", sum_file_name, sep = ""), sep = "\n", append = TRUE)
cat("====================================================================", "\n", file = paste(results_path, "/", paste(results_path, "/", sum_file_name, sep = "")), append = TRUE)
cat(paste(dataname, "has", dim_raw_data1[1], "observations and", dim_raw_data1[2], "variables"),
"\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
if (!is.null(keep_trials) | length(drop_vars) > 0) {
cat(paste("* keep_trials:", keep_trials_sum, "drop_vars:", drop_col_sum), "\n", file = paste(results_path, "/", paste(results_path, "/", sum_file_name, sep = "")), append = TRUE)
cat(paste("* After deleting unnecessary trials and variables according to keep_trials and drop_vars", dataname, "has", dim_raw_data2[1],
"observations and", dim_raw_data2[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
if (!is.null(dvc)) {
cat(paste("* keep_trials_dvc:", keep_trials_dvc_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat(paste("* After deleting unnecessary trials according to keep_trials_dvc", dvc, "data has", dim_raw_data_dvc[1], "observations and",
dim_raw_data_dvc[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
if(!is.null(outlier_removal)) {
cat(paste("* keep_trials_outlier:", keep_trials_outlier_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat(paste("* After deleting unnecessary trials and variables according to keep_trials_outlier", dvc, "data for outlier removal procedures has",
dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat(paste("*", outlier_name, "was calculated on", dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"), "\n",
file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
} else {
cat("* No dvc was found", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
if(!is.null(dvd)) {
cat(paste("* keep_trials_dvd:", keep_trials_dvd_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat(paste("* After deleting unnecessary trials", dvd, "data has", dim_raw_data_dvd[1],
"observations and", dim_raw_data_dvd[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
} else {
cat("* No dvd was found", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
# Dim of results_name
cat(paste("*", results_name, "has", dim(results)[1], "ids and", dim(results)[2], "variables"), "\n",
file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
# Write ids
cat("* ids:", levels(factor(results[[id]])), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
# Write levels of between-subjects indpendent variables
if (length(between_vars) > 0) {
cat("* Between-subject independent variables:", "\n",file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
for (lev in 1:length(between_vars)) {
cat(paste(" ", between_vars[[lev]], ": ", sep = ""), file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat(levels(factor(raw_data[[between_vars[[lev]]]])), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
} else {
cat("* No between-subjects variables were found", "\n",file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
if (length(id_properties) > 0) {
# Write names of id_properties
cat("* id properties:", id_properties, "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
} else {
cat("* No id properties were found", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
if (length(within_vars) == 0) {
# Write to Summary file
cat("* No within-subjects variables were found", "\n",file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
if (notification) {
message("Summary file finished")
}
}
# Save results
if (save_results) {
if (extension == ".txt") {
# Save results as a txt file
write.table(results, row.names = FALSE, file = paste(results_path, "/", results_name, sep = ""))
} else {
# Save results as a csv file
write.csv(results, row.names = FALSE, file = paste(results_path, "/", results_name, sep = ""))
}
# Message
message(paste(results_name, "has", dim(results)[1], "observations and", dim(results)[2], "variables"))
}
message("prep() returned a data frame to console")
message("Hip Hip Hooray! prep() finished")
message("Have a great day and may all your results be significant!")
# Return
return(results)
# Checks if there are within-subject independent variables
} else if (length(within_vars) > 0) {
if (notification) {
if (length(between_vars) > 0) {
# Message
message("Your design is a mixed design (i.e., includes both between-subjects and within-subjects independent variables")
} else {
# Message
message("Your design is a within-subject design")
}
}
# Creats a temporay df to hold all dependent measures
temp_dm <- cbind(id_col)
## All dependent measures will be calculated by id by within_condition
# dvc
# Calculates dependent measures for dvc in case dvc exists
if (!is.null(dvc)) {
# Change name of id column to "id"
names(raw_data_dvc)[names(raw_data_dvc) == id] <- "id"
# Change name of dvc column to "dvc"
names(raw_data_dvc)[names(raw_data_dvc) == dvc] <- "dvc"
# mdvc: mean dvc
if (length(dm) == 0 | "mdvc" %in% dm) {
if (notification) {
# Message
message("Calculating mean dvc by id by within_condition")
}
mdvc <- reshape2::dcast(raw_data_dvc, id ~ within_condition, mean,
value.var = "dvc")
mdvc <- mdvc[-1]
colnames(mdvc)[1:length(mdvc)] <- paste("mdvc", 1:length(mdvc), sep = "")
# Round mdvc according to decimal_places
mdvc <- round(mdvc, digits = decimal_places)
# Bind
temp_dm <- cbind(temp_dm, mdvc)
}
# sdvc: SD according to denominator n - 1
if (length(dm) == 0 | "sdvc" %in% dm) {
if (notification) {
# Message
message("Calculating SD for dvc by id by within_condition using denominator n")
}
# SD using denominator n - 1
dvc_sd_r <- reshape2::dcast(raw_data_dvc, id ~ within_condition, sd,
value.var = "dvc")
dvc_sd_r <- dvc_sd_r[-1]
# Length of each cell in the raw_data_dvc
l_dvc <- reshape2::dcast(raw_data_dvc, id ~ within_condition, length,
value.var = "dvc")
l_dvc <- l_dvc[-1]
# Calculating SD using denominator n
sdvc <- dvc_sd_r * sqrt((l_dvc - 1) / (l_dvc))
colnames(sdvc) <- paste("sdvc", 1:dim(sdvc)[2], sep = "")
# Round sdvc according to decimal_places
sdvc <- round(sdvc, digits = decimal_places)
# Bind
temp_dm <- cbind(temp_dm, sdvc)
}
# meddvc: median dvc
if (length(dm) == 0 | "meddvc" %in% dm) {
if (notification) {
# Message
message("Calculating median dvc by id by within_condition")
}
meddvc <- reshape2::dcast(raw_data_dvc, id ~ within_condition, median,
value.var = "dvc", fill = NaN)
meddvc <- meddvc[-1]
colnames(meddvc) <- paste("meddvc", 1:dim(meddvc)[2], sep = "")
# Round meddvc according to decimal_places
meddvc <- round(meddvc, digits = decimal_places)
# Bind
temp_dm <- cbind(temp_dm, meddvc)
}
# Deviation from the mean for z scores
dev_from_mean <- ave(raw_data_dvc$dvc, raw_data_dvc$id,
raw_data_dvc$within_condition,
FUN = function(x) x - mean(x))
# sdvc for z scores
sdvc_for_zscores <- ave(raw_data_dvc$dvc, raw_data_dvc$id,
raw_data_dvc$within_condition,
FUN = function(x) sd(x) * sqrt((length(x) - 1) / length(x)))
# Z scores
z_score <- dev_from_mean / sdvc_for_zscores
raw_data_dvc$z_score <- z_score
# tdvc: trimmed dvc according SD in sd_criterion
# Create names for sd_criterion coulmn
sd_criterion_names <- c()
# Reset l to 1
l <- 1
while (l <= length(sd_criterion)) {
sd_criterion_names[l] <- paste("t", sd_criterion[l], "dvc" ,sep = "")
l <- l + 1
}
if (length(dm) == 0 | "tdvc" %in% dm) {
# Create a data frame for sd_criterion
tdvc_df <- data.frame(1:length(id_col))
# Reset j to 1
j <- 1
# Calculating mean dvc by id by within_condition after rejecting values
# above SD specified in sd_criterions according to sd_criterion
while (j <= length(sd_criterion)) {
if (notification) {
# Message
message(paste("Calculating mean dvc by id by within_condition after rejecting values above", sd_criterion[j], "SD", sep = " "))
}
temp <- raw_data_dvc %>%
dplyr::group_by(id, within_condition) %>%
dplyr::summarise(n = mean(dvc[abs(z_score) < sd_criterion[j]])) %>%
reshape2::dcast(id ~ within_condition, value.var = "n")
temp <- temp[-1]
colnames(temp) <- paste(sd_criterion_names[j], 1:dim(temp)[2], sep = "")
if (j == 1) {
tdvc_df[2:(dim(temp)[2] + 1)] <- temp
} else {
tdvc_df[(dim(tdvc_df)[2] + 1):(dim(tdvc_df)[2] + dim(temp)[2])] <- temp
}
j <- j + 1
}
tdvc_df <- tdvc_df[-1]
# Round tdvc_df according to decimal_places
tdvc_df <- round(tdvc_df, digits = decimal_places)
# Bind
temp_dm <- cbind(temp_dm, tdvc_df)
}
# ntr: number of trimmed values for each SD in sd_criterion
ntr_names <- c()
# Reset l to 1
l <- 1
while (l <= length(sd_criterion)) {
ntr_names[l] <- paste("n", sd_criterion[l], "tr" ,sep = "")
l <- l + 1
}
# Create a data frame for ntr
ntr_df <- data.frame(1:length(id_col))
# Reset j to 1 for next while loop
j <- 1
# Counting number of rejected values for each sd_criterion
while (j <= length(sd_criterion)) {
if(length(dm) == 0 | "ntr" %in% dm) {
if (notification) {
# Messgae
message(paste("Counting number of rejected values for", sd_criterion[j], "SD", sep = " "))
}
}
temp <- raw_data_dvc %>%
dplyr::group_by(id, within_condition) %>%
dplyr::summarise(n = length(dvc[abs(z_score) > sd_criterion[j]])) %>%
reshape2::dcast(id ~ within_condition, value.var = "n")
temp <- temp[-1]
colnames(temp) <- paste(ntr_names[j], 1:dim(temp)[2], sep = "")
if (j == 1) {
ntr_df[2:(dim(temp)[2] + 1)] <- temp
} else {
ntr_df[(dim(ntr_df)[2] + 1):(dim(ntr_df)[2] + dim(temp)[2])] <- temp
}
j <- j + 1
}
ntr_df <- ntr_df[-1]
if (length(dm) == 0 | "ntr" %in% dm) {
# Bind
temp_dm <- cbind(temp_dm, ntr_df)
}
# ndvc: number of values before removing values according to SD in
# sd_criterion
if (notification) {
# Message
message("Counting number of values for dvc by id by within_condition before rejecting values according to sd_criterion")
}
ndvc <- reshape2::dcast(raw_data_dvc, id ~ within_condition, length,
value.var = "dvc")
ndvc <- ndvc[-1]
colnames(ndvc) <- paste("ndvc", 1:dim(ndvc)[2], sep = "")
if (length(dm) == 0 | "ndvc" %in% dm) {
# Bind
temp_dm <- cbind(temp_dm, ndvc)
}
# ptr: proportion of rejected values according to SD in sd_criterion
if (length(dm) == 0 | "ptr" %in% dm) {
# Create names for ptr
ptr_names <- c()
# Reset l to 1
l <- 1
while (l <= length(sd_criterion)) {
ptr_names[l] <- paste("p", sd_criterion[l], "tr" ,sep = "")
l <- l + 1
}
# Create a data frame for ptr
ptr_df <- data.frame(1:length(id_col))
# Reset counter j for the next while loop
j <- 1
# Reset i to 1
i <- 1
while (j <= length(sd_criterion)) {
if (notification) {
# Message
message(paste("Calculating proportion of rejected values for dvc by id by within_condition for", sd_criterion[j], "SD", sep = " "))
}
if (j == 1) {
temp <- ntr_df[i:dim(ndvc)[2]]
} else {
temp <- ntr_df[i:(i + dim(ndvc)[2] - 1)]
}
ptr <- temp / ndvc
colnames(ptr) <- paste(ptr_names[j], 1:dim(ndvc)[2], sep = "")
if (j == 1) {
ptr_df[2:(dim(ndvc)[2] + 1)] <- ptr
ptr_df <- ptr_df[-1]
} else {
ptr_df[(dim(ptr_df)[2] + 1):(dim(ptr_df)[2] + dim(ptr)[2])] <- ptr
}
i <- i + dim(ndvc)[2]
j <- j + 1
}
# Round ptr_df according to digits = 3
ptr_df <- round(ptr_df, digits = 3)
# Bind
temp_dm <- cbind(temp_dm, ptr_df)
}
# rminv: harmonic mean
if (length(dm) == 0 | "rminv" %in% dm) {
if (notification) {
# Message
message("Calculating harmonic mean for dvc by id by condition")
}
rminv <- reshape2::dcast(raw_data_dvc, id ~ within_condition,
psych::harmonic.mean, value.var = "dvc")
rminv <- rminv[-1]
colnames(rminv) <- paste("rminv", 1:dim(rminv)[2], sep = "")
# Round rminv according to decimal_places
rminv <- round(rminv, digits = decimal_places)
# Bind
temp_dm <- cbind(temp_dm, rminv)
}
# pdvc: Percentiles
if (length(dm) == 0 | "pdvc" %in% dm) {
# Create names for pdvc
percentails_names <- c()
# Rest counter l
l <- 1
while (l <= length(percentiles)) {
percentails_names[l] <- paste("p", percentiles[l], "dvc" ,sep = "")
l <- l + 1
}
# Create a data frame for percentiles
percentails_df <- data.frame(1:length(id_col))
# Reset j to 1 before next while loop
j <- 1
while (j <= length(percentiles)) {
if (notification) {
# Message
message(paste("Calculating the", percentiles[j], "percentail for dvc by id by within_condition" ), sep = "")
}
temp <- reshape2::dcast(raw_data_dvc, id ~ within_condition,
quantile, probs = percentiles[j],
value.var = "dvc")
temp <- temp[-1]
colnames(temp) <- paste(percentails_names[j], 1:dim(temp)[2], sep = "")
if (j == 1) {
percentails_df[2:(dim(temp)[2] + 1)] <- temp
} else {
percentails_df[(dim(percentails_df)[2] + 1):(dim(percentails_df)[2] + dim(temp)[2])] <- temp
}
j <- j + 1
}
percentails_df <- percentails_df[-1]
# Round percentails_df according to decimal_places
percentails_df <- round(percentails_df, digits = decimal_places)
# Bind
temp_dm <- cbind(temp_dm, percentails_df)
}
## Outlier removal procedures according to Van Selst & Jolicoeur (1994)
# Check to see if the user wants an outlier removal procedure
if (!is.null(outlier_removal)) {
# User wants an outlier removal procedure
# Save keep_trials_outlier as a string before parsing in order to later
# use when writing Summary file
keep_trials_outlier_sum <- keep_trials_outlier
if (!is.null(keep_trials_outlier)) {
# Delete unnecessary trials
if (notification) {
# Message
message("Keeping trials according to keep_trials_outlier and deleting unnecessary trials in raw_data")
message("Outlier removal procedure will be calculated on the remaining trials")
}
# Subsetting trials
keep_trials_outlier <- eval(parse(text = keep_trials_outlier))
# Save raw_data after subsetting
raw_data <- raw_data[keep_trials_outlier, ]
} else {
# Save keep_trials_outlier as "NULL" in order to later use when
# writing Summary file
keep_trials_outlier_sum <- class(keep_trials_outlier)
# Save raw_data as raw_data
raw_data <- raw_data
}
if (notification) {
# Message
message("Calculating mean dvc by id by within_condition according to selected outlier removal procedure")
}
# Change name of id column to "id"
names(raw_data)[names(raw_data) == id] <- "id"
# Change name of dvc column to "dvc"
names(raw_data)[names(raw_data) == dvc] <- "dvc"
# Create final array that will contain trimmed means per id per within_condition
final_data <- matrix(0, nrow = length(id_col), ncol = length(levels(raw_data$within_condition)))
# Create final array that will contain number of trials trimmed per id per within_condition
numtrials_data <- matrix(0, nrow = length(id_col), ncol = length(levels(raw_data$within_condition)))
# Create final array that will contain percent of trials removed per id per within_condition
percent_data <- matrix(0, nrow = length(id_col), ncol = length(levels(raw_data$within_condition)))
# Create final array that will contain total number of trials per id per within_condition before removal
totaltrials_data <- matrix(0, nrow = length(id_col), ncol = length(levels(raw_data$within_condition)))
# Give name to columns according to the procedure
if (outlier_removal == 1) {
# Non recursive moving criterion procedure
# nrmc: means for dvc per id per within_condition
colnames(final_data) <- paste("nrmc", 1:length(levels(raw_data$within_condition)), sep = "")
# nnrmc: number of trials trimmed per id per within_condition
colnames(numtrials_data) <- paste("nnrmc", 1:length(levels(raw_data$within_condition)), sep = "")
# pnrmc: percent of trials removed per id per within_condition
colnames(percent_data) <- paste("pnrmc", 1:length(levels(raw_data$within_condition)), sep = "")
# tnrmc: total number of trials per id per within_condition before removal
colnames(totaltrials_data) <- paste("tnrmc", 1:length(levels(raw_data$within_condition)), sep = "")
} else if (outlier_removal == 2) {
# Modified recursive moving criterion procedure
# mrmc: means for dvc per id per within_condition
colnames(final_data) <- paste("mrmc", 1:length(levels(raw_data$within_condition)), sep = "")
# nmrmc: number of trials trimmed per id per within_condition
colnames(numtrials_data) <- paste("nmrmc", 1:length(levels(raw_data$within_condition)), sep = "")
# pmrmc: percent of trials removed per id per within_condition
colnames(percent_data) <- paste("pmrmc", 1:length(levels(raw_data$within_condition)), sep = "")
# tmrmc: total number of trials per id per within_condition before removal
colnames(totaltrials_data) <- paste("tmrmc", 1:length(levels(raw_data$within_condition)), sep = "")
} else if (outlier_removal == 3) {
# Hybrid recursive moving criterion procedure
# hrmc: means for dvc per id per within_condition
colnames(final_data) <- paste("hrmc", 1:length(levels(raw_data$within_condition)), sep = "")
# nhrmc: number of trials trimmed per id per within_condition
colnames(numtrials_data) <- paste("nhrmc", 1:length(levels(raw_data$within_condition)), sep = "")
# phrmc: percent of trials removed per id per within_condition
colnames(percent_data) <- paste("phrmc", 1:length(levels(raw_data$within_condition)), sep = "")
# thrmc: total number of trials per id per within_condition before removal
colnames(totaltrials_data) <- paste("thrmc", 1:length(levels(raw_data$within_condition)), sep = "")
}
for (i in 1:length(id_col)) {
# For each id (i.e., subject)
# Reset j to 1
j <- 1
# Do outlier removal procedure
for (cond in levels(raw_data$within_condition)) {
# For each within_condition
# Isolate the current participant & condition's data
temp_data <- raw_data[raw_data$id == id_col[i] & raw_data$within_condition == levels(raw_data$within_condition)[j], ]
if (outlier_removal == 1) {
# Non recursive procedure with moving criterion
# Do current id trimming
current_id <- non_recursive_mc(temp_data$dvc)
final_data[i, j] <- current_id[1]
percent_data[i, j] <- current_id[2]
numtrials_data[i, j] <- current_id[3]
totaltrials_data[i, j] <- current_id[4]
} else if (outlier_removal == 2) {
# Modified recursive procedure with moving criterion
# Do current id trimming
current_id <- modified_recursive_mc(temp_data$dvc)
final_data[i, j] <- current_id[1]
percent_data[i, j] <- current_id[2]
numtrials_data[i, j] <- current_id[3]
totaltrials_data[i, j] <- current_id[4]
} else if (outlier_removal == 3) {
# Hybrid recursive procedure with moving criterion
# Do current id trimming
current_id <- hybrid_recursive_mc(temp_data$dvc)
final_data[i, j] <- current_id[1]
percent_data[i, j] <- current_id[2]
totaltrials_data[i, j] <- current_id[3] # Should be 3. Do not change
}
j <- j + 1
}
# End of condition for loop
}
# End of id for loop
# Round final data according to decimal_places
final_data <- round(final_data, digits = decimal_places)
# Round percent_data according to digits = 3
percent_data <- round(percent_data, digits = 3)
if (notification) {
# Messgae
if (outlier_removal == 1) {
message("Finished calculating means for dvc by id by within_condition according to non-recursive procedure with moving criterion ")
} else if (outlier_removal == 2) {
message("Finished calculating means for dvc by id by within_condition according to modified-recursive procedure with moving criterion")
} else if(outlier_removal == 3) {
message("Finished calculating means for dvc by id by within_condition according to hybrid procedure with moving criterion")
}
}
}
# End of outlier removal procedures
# Finished dvc
if (notification) {
# Message
message("Finished calculating dependent measures for dvc")
}
} # End of !is.null(dvc)
# dvd
# Check if dvd exists
if (!is.null(dvd)) {
# Change name of id column to "id"
names(raw_data_dvd)[names(raw_data_dvd) == id] <- "id"
# Change name of dvd column to "ac"
names(raw_data_dvd)[names(raw_data_dvd) == dvd] <- "dvd"
# mdvd: mean dvd
if (length(dm) == 0 | "mdvd" %in% dm) {
if (notification) {
# Message
message("Calculating mean dvd by id by within_condition")
}
mdvd <- reshape2::dcast(raw_data_dvd, id ~ within_condition, mean, value.var = "dvd")
mdvd <- mdvd[-1]
colnames(mdvd) <- paste("mdvd", 1:dim(mdvd)[2], sep = "")
# Round mdvd according to decimal_dvd
mdvd <- round(mdvd, digits = decimal_dvd)
# Bind
temp_dm <- cbind(temp_dm, mdvd)
}
# merr: mean error
if (length(dm) == 0 | "merr" %in% dm) {
if (notification) {
# Message
message("Calculating mean error by id by within_condition")
}
merr <- 1 - mdvd
colnames(merr) <- paste("merr", 1:dim(merr)[2], sep = "")
# Round merr according to decimal_dvd
merr <- round(merr, digits = decimal_dvd)
# Bind
temp_dm <- cbind(temp_dm, merr)
}
if (notification) {
# Message
message("Finished calculating dvd data")
}
} # End of !is.null(dvd)
# Remove the first column of the temporary df
temp_dm <- temp_dm[-1]
## Creates results file
if (notification) {
# Message
message("Creating results")
}
if (length(id_properties) > 0) {
# Bind also id_properties_df
if (length(between_vars) > 0) {
# Bind also between_bars_df
results <- cbind(id_col, id_properties_df, between_vars_df, temp_dm)
} else {
results <- cbind(id_col, id_properties_df, temp_dm)
}
} else {
if (length(between_vars) > 0) {
# Bind also between_vars_df
results <- cbind(id_col, between_vars_df, temp_dm)
} else {
results <- cbind(id_col, temp_dm)
}
}
# In case of an outlier procedure, bind also results for that
if (!is.null(outlier_removal)) {
if (outlier_removal == 1 | outlier_removal == 2) {
results <- cbind(results, final_data, percent_data, numtrials_data, totaltrials_data)
} else if (outlier_removal == 3) {
results <- cbind(results, final_data, percent_data, totaltrials_data)
}
}
names(results)[names(results) == "id_col"] <- id
if (notification) {
# Message
message("Printing head results")
print(head(results))
}
## Summary
if (save_summary == TRUE) {
# Write a summary txt file
if (notification) {
# Message
message("Creating summary file")
}
# Dim of raw_data, raw_data_dvc and raw_data_dvd
cat("====================================================================", "\n", file = paste(results_path, "/", sum_file_name, sep = ""))
cat(paste("Summary:", dataname), date(), file = paste(results_path, "/", sum_file_name, sep = ""), sep = "\n", append = TRUE)
cat("====================================================================", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat(paste(file_name, "has", dim_raw_data1[1], "observations and", dim_raw_data1[2], "variables"),
"\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
if (!is.null(keep_trials) | length(drop_vars) >0) {
cat(paste("* keep_trials:", keep_trials_sum, "drop_vars:", drop_col_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat(paste("* After deleting unnecessary trials and variables", file_name, "has", dim_raw_data2[1],
"observations and", dim_raw_data2[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
if (!is.null(dvc)) {
cat(paste("* keep_trials_dvc:", keep_trials_dvc_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat(paste("* After deleting unnecessary trials and variables", dvc, "data has", dim_raw_data_dvc[1],
"observations and", dim_raw_data_dvc[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
if (!is.null(outlier_removal)) {
cat(paste("* keep_trials_outlier:", keep_trials_outlier_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat(paste("* After deleting unnecessary trials and variables according to keep_trials_outlier", dvc, "data for outlier removal procedures has",
dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat(paste("*", outlier_name, "was calculated on", dim(raw_data)[1], "observations and", dim(raw_data)[2], "variables"), "\n",
file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
} else {
cat("* No dvc was found", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
if(!is.null(dvd)) {
cat(paste("* keep_trials_dvd:", keep_trials_dvd_sum), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat(paste("* After deleting unnecessary trials and variables", dvd, "data has", dim_raw_data_dvd[1],
"observations and", dim_raw_data_dvd[2], "variables"), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
} else {
cat("* No dvd was found", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
# Dim of results_name
cat(paste("*", results_name, "has", dim(results)[1], "ids and", dim(results)[2], "variables"), "\n",
file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
# Write ids
cat( "* ids:", levels(factor(results[[id]])), "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
# Write levels of between-subjects indpendent variables
if (length(between_vars) > 0) {
cat("* Between-subject independent variables:", "\n",file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
for (lev in 1:length(between_vars)) {
cat(paste(" ", between_vars[[lev]], ": ", sep = ""), file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat(levels(factor(raw_data[[between_vars[[lev]]]])), file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat("\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
} else {
cat("* No between-subjects variables were found", "\n",file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
if (length(id_properties) > 0) {
# Write names of id_properties
cat("* id properties:", id_properties, "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
} else {
cat("*", "No id properties were found", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
if (notification) {
message("Summary file finished")
}
# Write levels of within-subject indpendent variables
cat("* Within-subject independent variables:", "\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
for (lev in 1:length(within_vars)) {
cat(paste(" ", within_vars[[lev]], ": ", sep = ""), file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat(levels(factor(raw_data[[within_vars[[lev]]]])), file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
cat("\n", file = paste(results_path, "/", sum_file_name, sep = ""), append = TRUE)
}
} # End of if(save_summary == TRUE)
# Save results
if (save_results) {
if (extension == ".txt") {
# Write results in txt format
write.table(results, row.names = FALSE, file = paste(results_path, "/", results_name, sep = ""))
} else {
# Write results in a csv format
write.csv(results, row.names = FALSE, file = paste(results_path, "/", results_name, sep = ""))
}
# Message
message(paste(results_name, "has", dim(results)[1], "observations and", dim(results)[2], "variables"))
}
message("prep() returned a data frame to console")
message("Hip Hip Hooray! prep() finished")
message("Have a great day and may all your results be significant!")
# Return
return(results)
} # End of if (length(between_vars) > 0 & length(within_vars) == 0)
} # End of prep()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.