Nothing
#' Check experiment data.
#'
#' Checks that the experiment description is well-formed and complete.
#'
#' Information about a full experiment can be assembled into a spreadsheet
#' (Excel, CSV and tab-delimited text formats are supported) and used to process
#' large numbers of files in one batch. This function checks the spreadsheet to
#' make sure that it is properly formed and that all the data files referred to
#' are present.
#'
#' The function can (and ideally should) be run with the same parameters as will
#' be used to call \code{\link{read_experiment}}, although many of the
#' parameters are not required for the check.
#'
#' The content of the spreadsheet, the presence and the content of any
#' supporting files are also checked. Checks do not cover validity of the raw
#' data, so it is still possible to have invalid data even if
#' \code{\link{check_experiment}} returns \code{TRUE} (although this suggests an
#' underlying problem with the raw data). Warning and error messages are
#' intended to be useful and help any format issues be quickly resolved.
#'
#' @param filename A spreadsheet file containing a description of the experiment
#' or a trackxf file containing an exported experiment archive.
#' @param format An experiment description for reading raw data can be provided
#' as an Excel spreadsheet ("excel") or as a comma-delimited ("csv") or
#' tab-delimited ("tab", "tsv", "txt" or "text") text file. The value
#' "trackxf" indicates that the file is an archived experiment in the trackxf
#' format (as generated by \code{\link{export_data}}). Default (\code{NA}) is
#' to guess the format from the file extension.
#' @param interpolate Ignored. For compatibility with
#' \code{\link{read_experiment}}.
#' @param project.dir A directory path specifying where the files needed for
#' processing the experiment are stored. Ignored if \code{format = "trackxf"}.
#' @param data.dir A directory path specifying where the raw data are stored.
#' This is a folder root and all paths specified in the spreadsheet. Ignored
#' if \code{format = "trackxf"}.
#' @param author.note Ignored. For compatibility with
#' \code{\link{read_experiment}}.
#' @param threads Ignored. For compatibility with \code{\link{read_experiment}}.
#' @param verbose Ignored. For compatibility with \code{\link{read_experiment}}.
#'
#' @return Invisibly returns \code{TRUE} for a successful check or \code{FALSE}
#' otherwise.
#'
#' @seealso \code{\link{read_experiment}}, \code{\link{export_data}}.
#'
#' @examples
#' require(Rtrack)
#' experiment.description <- system.file("extdata", "Minimal_experiment.xlsx",
#' package = "Rtrack")
#' check_experiment(experiment.description)
#' @importFrom readxl read_excel
#' @importFrom utils read.csv read.table
#' @importFrom stats na.omit
#' @importFrom crayon bold red green
#' @importFrom rjson fromJSON
#' @importFrom RCurl url.exists
#'
#' @export
check_experiment = function(filename, format = NA, interpolate = FALSE, project.dir = NA, data.dir = project.dir, author.note = "", threads = NULL, verbose = FALSE){
if(is.na(project.dir)) project.dir = dirname(filename)
if(is.na(data.dir)) data.dir = dirname(filename)
if(unlist(strsplit(project.dir, ""))[nchar(project.dir)] != "/") project.dir = paste0(project.dir, "/")
if(unlist(strsplit(data.dir, ""))[nchar(data.dir)] != "/") data.dir = paste0(data.dir, "/")
# Check if this is actually a raw experiment or if it is an archive of some sort (not supported).
# Support for checking validity of trackxf archives may be added in the future - but is not currently implemented.
# It would be necessary to restore the entire archive, so there doesn't seem to be any gain over 'read_experiment'.
# Also, archives should have been generated by a sane algorithm from a functioning experiment, so checking is no really useful.
format = tolower(format)
if(is.na(format)){
if(RCurl::url.exists(filename)){
format = "http"
}else if(tools::file_ext(filename) %in% c("trackxf")){
format = "trackxf"
}else if(tools::file_ext(filename) %in% c("json")){
format = "json"
}else if(tools::file_ext(filename) %in% c("xls", "xlsx")){
format = "excel"
}else if(tools::file_ext(filename) %in% c("csv")){
if(ncol(read.csv(filename)) > 4){
format = "csv"
}else{
format = "csv2"
}
}else if(tools::file_ext(filename) %in% c("tab", "tsv", "txt")){
format = "tab"
}else{
message("The file format cannot be established automatically. Please specify the 'format' parameter.")
check.pass = FALSE
}
}
experiment.data = NULL
experiment.info = NULL
check.pass = TRUE
if(format == "json"){
if(!file.exists(filename)){
message(paste0("The experiment archive file ('", filename, "') does not exist!"))
check.pass = FALSE
}else{
error.message = tryCatch({
experiment.data = rjson::fromJSON(file = filename, simplify = FALSE)
TRUE
}, error = function(e){
FALSE
})
message(paste0("The file '", filename, "' is not properly formed."))
check.pass = FALSE
}
experiment.info = experiment.data[[1]]
experiment.data = experiment.data[[2]]
# Run check for required features
required.fields = c("id", "target", "day", "trial", "arena_name", "raw.t", "raw.x", "raw.y", "t", "x", "y", "arena")
for(field in required.fields){
field.data = sapply(experiment.data, "[[", field)
field.check = !(field.data == "" | sapply(field.data, is.null))
if(!all(field.check)){
if(length(which(!field.check)) > 1){
message(paste0("The experiment is missing the field '", field, "' for elements: ", paste(which(!field.check), collapse = "', '"), "."))
check.pass = FALSE
}else if(length(which(!field.check)) == 1){
message(paste0("The experiment is missing the field '", field, "' for element ", which(!field.check), "."))
check.pass = FALSE
}
}
}
required.arena.fields = c("type", "time.units", "arena.bounds") # goal and old.goal are not actually required
arenas = lapply(experiment.data, "[[", "arena")
for(field in required.arena.fields){
field.data = sapply(arenas, "[[", field)
field.check = !(field.data == "" | sapply(field.data, is.null))
if(!all(field.check)){
if(length(which(!field.check)) > 1){
message(paste0("The experiment is missing the field 'arena$", field, "' for elements: ", paste(which(!field.check), collapse = "', '"), "."))
check.pass = FALSE
}else if(length(which(!field.check)) == 1){
message(paste0("The experiment is missing the field 'arena$", field, "' for element ", which(!field.check), "."))
check.pass = FALSE
}
}
}
} # end json check
if(format != "json"){
# if(!(dir.exists(project.dir) & dir.exists(data.dir))){
# message(paste0("The project and/or data directories do not exist."))
# check.pass = FALSE
# }
if(format == "xls" | format == "xlsx" | format == "excel"){
if(!file.exists(filename)){
message(paste0("The experiment description file ('", filename, "') does not exist!"))
check.pass = FALSE
}else{
experiment.data = suppressMessages(as.data.frame(readxl::read_excel(filename, col_types = 'text'), stringsAsFactors = F))
rownames(experiment.data) = experiment.data$TrackID
}
}else if(format == "csv"){
if(!file.exists(filename)){
message(paste0("The experiment description file ('", filename, "') does not exist!"))
check.pass = FALSE
}else{
experiment.data = utils::read.csv(filename, stringsAsFactors = F, check.names = FALSE)
rownames(experiment.data) = experiment.data$TrackID
}
}else if(format == "csv2"){
if(!file.exists(filename)){
message(paste0("The experiment description file ('", filename, "') does not exist!"))
check.pass = FALSE
}else{
experiment.data = utils::read.csv2(filename, stringsAsFactors = F, check.names = FALSE)
rownames(experiment.data) = experiment.data$TrackID
}
}else if(format == "tab"){
if(!file.exists(filename)){
message(paste0("The experiment description file ('", filename, "') does not exist!"))
check.pass = FALSE
}else{
experiment.data = utils::read.delim(filename, stringsAsFactors = F, check.names = FALSE)
rownames(experiment.data) = experiment.data$TrackID
}
}else if(format == "trackxf"){
message("Checking trackxf archives is not currently supported.")
check.pass = FALSE
}else if(format == "http"){
message("Checking URLs is not supported. The experiment files must be downloaded (and the archive unpacked if necessary) first.")
check.pass = FALSE
}else{
if(!file.exists(filename)){
message(paste0("The experiment description file ('", filename, "') does not exist!"))
check.pass = FALSE
}else{ # An unknown file extension will be assumed to be a tab-delimited file. If that fails, crash out.
success = tryCatch({
utils::read.delim(filename, stringsAsFactors = F)
TRUE
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
if(!success){
message(paste0("The file '", filename, "' is not properly formed."))
}else{
experiment.data = utils::read.delim(filename, stringsAsFactors = F, check.names = FALSE)
rownames(experiment.data) = experiment.data$TrackID
}
check.pass = FALSE
}
} # end file type check
if(check.pass & !is.null(experiment.data)){
# Run check for required features
required.fields = c("_TrackID", "_TargetID", "_Day", "_Trial", "_Arena", "_TrackFile", "_TrackFileFormat")
field.check = required.fields %in% colnames(experiment.data)
if(!all(field.check)){
if(length(which(!field.check)) > 1){
message(paste0("The experiment description is missing the required columns: ", paste(required.fields[!field.check], collapse = "', '"), "."))
check.pass = FALSE
}else if(length(which(!field.check)) == 1){
message(paste0("The experiment description is missing the required column ", paste(required.fields[!field.check], collapse = "', '"), "."))
check.pass = FALSE
}
} # end field.check
optional.fields = c("_TrackIndex") # Valid but not always required
# Check remaining columns for valid names (compatible with JSON schema)
user.fields = colnames(experiment.data)[!(colnames(experiment.data) %in% c(required.fields, optional.fields))]
user.field.check = grepl("^[a-zA-Z][a-zA-Z0-9_]*$", user.fields) & !duplicated(user.fields)
if(!all(user.field.check)){
if(length(which(!user.field.check)) > 1){
user.field.suggestions = gsub("^([0-9])", "X\\1", gsub("([^a-zA-Z0-9_])", "_", make.unique(user.fields)))[!user.field.check]
message(paste0("The following column names are not valid: ", paste0("'", user.fields[!user.field.check], "'", collapse = ", "), ". Consider replacing them with: ", paste0("'", user.field.suggestions, "'", collapse = ", "), "."))
check.pass = FALSE
}else if(length(which(!user.field.check)) == 1){
user.field.suggestions = gsub("^([0-9])", "X\\1", gsub("([^a-zA-Z0-9_])", "_", make.unique(user.fields)))[!user.field.check]
message(paste0("The following column name '", user.fields[!user.field.check], "' is not valid. Consider replacing it with '", user.field.suggestions, "'."))
check.pass = FALSE
}
}
if(!file.exists(project.dir)){
message(paste0("The project directory '", project.dir, "'does not exist."))
check.pass = FALSE
}else{
arenas.present = sapply(paste0(project.dir, stats::na.omit(unique(experiment.data[, "_Arena"]))), simplify = T, USE.NAMES = T, function(arenafile) file.exists(arenafile) )
if(length(which(!arenas.present)) > 1){
message(paste0("The following arena description files cannot be found: ", paste(paste0("'", project.dir, names(arenas.present)[!arenas.present], "'"), collapse = ", "), "."))
check.pass = FALSE
}else if(length(which(!arenas.present)) == 1){
message(paste0("The arena description file '", names(arenas.present)[!arenas.present], "' does not exist."))
check.pass = FALSE
}else{
# Check the arena files
for(arena.name in stats::na.omit(unique(experiment.data[, "_Arena"]))){
description = tryCatch(
as.data.frame(t(utils::read.delim(paste0(project.dir, arena.name), header = F, sep = "=", strip.white = T, comment.char = "#", stringsAsFactors = F)), stringsAsFactors = F),
error = function(e){
message(paste0("There was a problem reading the arena description file '", filename, "'."))
check.pass = FALSE
}
)
# Workaround because multiple 'hole' definitions lead to duplicated names.
if(nrow(description) == 2) description = setNames(description[2, , drop = FALSE], as.character(description[1, ]))
# Convert time.units info into seconds.
if(description$time.units == "us" | description$time.units == "micros"){
description$time.units = 1e-6
}else if(description$time.units == "ms"){
description$time.units = 0.001
}else if(description$time.units == "s"){
description$time.units = 1
}else if(description$time.units == "min"){
description$time.units = 60
}else if(description$time.units == "h"){
description$time.units = 3600
}else if(description$time.units == "d"){
description$time.units = 86400
}else if(description$time.units == "y"){
description$time.units = 31536000
}else{
x = eval(parse(text = description$time.units))
if(is.numeric(x)){
description$time.units = x
}else{
message("The 'time.units' parameter is not valid. Please check the documentation for creating the arena file.")
}
}
#
zones.check = NULL
if(description$type == "mwm"){
time.units.check = tryCatch({
!is.na(as.numeric(description$time.units))
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
zones = intersect(c("arena.bounds", "goal", "old.goal"), colnames(description))
required.zones.present = TRUE
if(!"arena.bounds" %in% zones){
message(paste0("The 'arena.bounds' definition for arena '", arena.name, "' is missing."))
required.zones.present = FALSE
}
if(!"goal" %in% zones){
message(paste0("The 'goal' definition for arena '", arena.name, "' is missing."))
required.zones.present = FALSE
}
#
zones.check = sapply(zones, function(zone){
bounds = unlist(strsplit(description[[zone]], "\\s+"))
if(bounds[1] == "circle"){
bounds.check = tryCatch({
all(length(bounds) == 4, !is.na(as.numeric(bounds[2])), !is.na(as.numeric(bounds[3])), !is.na(as.numeric(bounds[4])))
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
}else{
message(paste0("The '", zone, "' definition for arena '", arena.name, "' is not valid."))
bounds.check = FALSE
}
bounds.check
})
}else if(description$type == "oft"){
time.units.check = tryCatch({
!is.na(as.numeric(description$time.units))
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
zones = intersect(c("arena.bounds"), colnames(description))
required.zones.present = TRUE
if(!"arena.bounds" %in% zones){
message(paste0("The 'arena.bounds' definition for arena '", arena.name, "' is missing."))
required.zones.present = FALSE
}
#
zones.check = sapply(zones, function(zone){
bounds = unlist(strsplit(description[[zone]], "\\s+"))
if(bounds[1] == "square"){
bounds.check = tryCatch({
all(length(bounds) %in% c(4, 5, 9), all(suppressWarnings(!is.na(as.numeric(bounds[-1])))))
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
}else{
message(paste0("The '", zone, "' definition for arena '", arena.name, "' is not valid."))
bounds.check = FALSE
}
bounds.check
})
}else if(description$type == "nor"){
time.units.check = tryCatch({
!is.na(as.numeric(description$time.units))
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
zones = intersect(c("arena.bounds", "object.1", "object.2"), colnames(description))
required.zones.present = TRUE
if(!"arena.bounds" %in% zones){
message(paste0("The 'arena.bounds' definition for arena '", arena.name, "' is missing."))
required.zones.present = FALSE
}
if(!"object.1" %in% zones){
message(paste0("The definition of 'object.1' for arena '", arena.name, "' is missing."))
required.zones.present = FALSE
}
if(!"object.2" %in% zones){
message(paste0("The definition of 'object.2' for arena '", arena.name, "' is missing."))
required.zones.present = FALSE
}
#
zones.check = sapply(zones, function(zone){
bounds = unlist(strsplit(description[[zone]], "\\s+"))
if(bounds[1] == "circle"){
bounds.check = tryCatch({
all(length(bounds) == 4, !is.na(as.numeric(bounds[2])), !is.na(as.numeric(bounds[3])), !is.na(as.numeric(bounds[4])))
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
}else if(bounds[1] == "square"){
bounds.check = tryCatch({
all(length(bounds) %in% c(4, 5, 9), all(suppressWarnings(!is.na(as.numeric(bounds[-1])))))
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
}else{
message(paste0("The '", zone, "' definition for arena '", arena.name, "' is not valid."))
check.pass = FALSE
}
bounds.check
})
}else if(description$type == "barnes"){
time.units.check = tryCatch({
!is.na(as.numeric(description$time.units))
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
zones = intersect(c("arena.bounds", "hole", "holes", "goal", "old.goal"), colnames(description))
required.zones.present = TRUE
if(!"arena.bounds" %in% zones){
message(paste0("The 'arena.bounds' definition for arena '", arena.name, "' is missing."))
required.zones.present = FALSE
}
if(!("holes" %in% zones | "hole" %in% zones)){
message(paste0("The 'hole' definitions for arena '", arena.name, "' are missing."))
required.zones.present = FALSE
}
if(!"goal" %in% zones){
message(paste0("The 'goal' definition for arena '", arena.name, "' is missing."))
required.zones.present = FALSE
}
#
zones.check = sapply(zones, function(zone){
bounds = unlist(strsplit(description[[zone]], "\\s+"))
if(bounds[1] == "circle"){
bounds.check = tryCatch({
all(length(bounds) == 4, !is.na(as.numeric(bounds[2])), !is.na(as.numeric(bounds[3])), !is.na(as.numeric(bounds[4])))
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
}else if(bounds[1] == "square"){
bounds.check = tryCatch({
all(length(bounds) %in% c(4, 5, 9), all(suppressWarnings(!is.na(as.numeric(bounds[-1])))))
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
}else{
message(paste0("The '", zone, "' definition for arena '", arena.name, "' is not valid."))
check.pass = FALSE
}
bounds.check
})
}else if(description$type == "apa"){
time.units.check = tryCatch({
!is.na(as.numeric(description$time.units))
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
zones = intersect(c("arena.bounds", "aversive.zone"), colnames(description))
required.zones.present = TRUE
if(!"arena.bounds" %in% zones){
message(paste0("The 'arena.bounds' definition for arena '", arena.name, "' is missing."))
required.zones.present = FALSE
}
if(!"aversive.zone" %in% zones){
message(paste0("The 'aversive.zone' definition for arena '", arena.name, "' is missing."))
required.zones.present = FALSE
}
#
zones.check = sapply(zones, function(zone){
bounds = unlist(strsplit(description[[zone]], "\\s+"))
if(bounds[1] == "circle"){
bounds.check = tryCatch({
all(length(bounds) == 4, !is.na(as.numeric(bounds[2])), !is.na(as.numeric(bounds[3])), !is.na(as.numeric(bounds[4])))
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
}else if(bounds[1] == "sector"){
bounds.check = tryCatch({
all(length(bounds) == 3, !is.na(as.numeric(bounds[2])), !is.na(as.numeric(bounds[3])))
}, error = function(e){
FALSE
}, warning = function(e){
FALSE
})
}else{
message(paste0("The '", zone, "' definition for arena '", arena.name, "' is not valid."))
check.pass = FALSE
}
bounds.check
})
}else{
message(paste0("In '", arena.name, "', the arena type '", description$type, "' is not supported."))
check.pass = FALSE
}
#
if(!all(zones.check)){
if(length(which(!zones.check)) > 1){
message(paste0("In '", arena.name, "', the zone descriptions for the following zones are not valid: ", paste0("'", names(zones.check)[!zones.check], "'", collapse = ", "), "."))
check.pass = FALSE
}else if(length(which(!zones.check)) == 1){
message(paste0("In '", arena.name, "', the zone description for '", names(zones.check)[!zones.check], "' is not valid."))
check.pass = FALSE
}
}
#
}
}
} # end project.dir check
if(!file.exists(data.dir)){
message(paste0("The data directory '", data.dir, "'does not exist."))
check.pass = FALSE
}else{
track.format.info = unique(experiment.data[, "_TrackFileFormat"])
supported.formats = suppressMessages(identify_track_format())
track.formats = sapply(track.format.info, function(info) strsplit(info, "_")[[1]][1])
paths.formatted = sapply(track.formats, simplify = T, USE.NAMES = T, function(trackformat) trackformat %in% supported.formats )
if(length(which(!paths.formatted)) > 1){
message(paste0("The track formats ", paste(paste0("'", names(paths.formatted)[!paths.formatted], "'"), collapse = ", "), " are not supported."))
check.pass = FALSE
}else if(length(which(!paths.formatted)) == 1){
message(paste0("The track format '", names(paths.formatted)[!paths.formatted], "' is not supported."))
check.pass = FALSE
}
paths.present = sapply(paste0(data.dir, stats::na.omit(unique(experiment.data[, "_TrackFile"]))), simplify = T, USE.NAMES = T, function(trackfile) file.exists(trackfile) )
if(length(which(!paths.present)) > 1){
message(paste0("The following track files cannot be found: ", paste(paste0("'", data.dir, names(paths.present)[!paths.present], "'"), collapse = ", "), "."))
check.pass = FALSE
}else if(length(which(!paths.present)) == 1){
message(paste0("The track file ", paste(paste0("'", data.dir, names(paths.present)[!paths.present], "'"), collapse = ", "), " cannot be found."))
check.pass = FALSE
}
} # end data.dir check
}else if(check.pass){
message(paste0("There was a problem reading the experiment file.")) # This should never happen. The file is valid but the reading into a data frame returned NULL.
check.pass = FALSE
} # end experiment.data check
} # end non-json check
if(check.pass){
message(paste0(crayon::green("\u2714"), crayon::green(" This experiment appears to be valid and complete.")))
}else{
if(format != "trackxf"){ # This is not an error - just not supported.
message(paste0(crayon::red("\u2716"), crayon::red(" This experiment has some problems. Please check the documentation at https://rupertoverall.net/Rtrack.")))
}
}
invisible(check.pass)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.