#' Read and parse an ascii (.asc) file.
#'
#' @param path_ascii path to a .asc file
#' @import data.table
#' @export
read_ascii = function(path_ascii) {
dat = readLines(path_ascii)
## id_lines_star = grep("^[*]{2}", dat)
## id_lines_input = grep("^[INPUT]", dat)
id_lines_start = grep("^START", dat)
id_lines_end = grep("^END", dat)
## remove last few lines (becuase of invalid file end)
## n = length(dat)
## dat = dat[-((n-100):n)]
## for now, I'll assume that every file has exactly 7 occurences of INPUT
## stopifnot(length(id_lines_input) %in% 6:8)
## not true generally...
## for our experiments, if there is just one occurrence of START and END,
## thats delineating the whole experiment.
## otherwise its delineating the trials
stopifnot(length(id_lines_start) > 0 && length(id_lines_end) > 0)
if (length(id_lines_start) == 1 & length(id_lines_end) == 1) {
## seperate header and data
header = dat[1:id_lines_start[1]]
dat = dat[(id_lines_start[1]+2):(id_lines_end-2)]
} else if (length(id_lines_start) > 1 & length(id_lines_end) > 1) {
if (length(id_lines_start) != length(id_lines_end))
stop(sprintf("Unequal number of START and END messages in .asc file.\nSTART: %d, END: %d",
length(id_lines_start), length(id_lines_end)))
stop("Not implemented yet.")
} else {
stop(sprintf("Unexpected number of START and END messages in .asc file.\nSTART: %d, END: %d",
length(id_lines_start), length(id_lines_end)))
}
## old:
## seperate header and data
## header = dat[1:id_lines_input[length(id_lines_input) - 1]]
## dat = dat[(id_lines_input[length(id_lines_input) - 1]+1):(id_lines_input[length(id_lines_input)]-1)]
## parse events
events = dat[grep("^[0-9]+", dat, invert = TRUE)]
## reduce data to regular rows
dat = dat[grep("^[0-9]+", dat)]
## read in raw data
rawdat = data.table::fread(paste(dat, collapse = "\n"),
select = 1:4,
colClasses = list(integer = 1, numeric = 2:4),
na.strings = ".",
stringsAsFactors = FALSE,
col.names = c("time", "x", "y", "ps"),
nrows = length(dat))
## for now, assume that all rows have one millisecond
## ... or at least: are the same
if (! length(unique(diff(rawdat$time))) == 1)
warning("Jump in raw time vector.")
##=========================
## sort out events
##=========================
msg = data.table::fread(paste(events[grep("^MSG", events)], collapse = "\n"),
colClasses = list(integer = 1, character = 2),
stringsAsFactors = FALSE,
col.names = c("time", "text"))
msg[, time := as.integer(gsub("^([0-9]+).*", "\\1", text))]
msg[, text := gsub("^([0-9]+ )(.*)", "\\2", text)]
fix = data.table::fread(paste(events[grep("^EFIX", events)], collapse = "\n"),
select = 1:6,
colClasses = list(integer = 1:3, numeric = 4:6),
stringsAsFactors = FALSE,
col.names = c("t1", "t2", "dur", "x", "y", "ps"))
fix[, t1 := as.numeric(gsub("^.* ([0-9]+)$", "\\1", t1))]
sacc = data.table::fread(paste(events[grep("^ESACC", events)], collapse = "\n"),
colClasses = list(integer = 1:3, numeric = 4:7),
stringsAsFactors = FALSE,
col.names = c("t1", "t2", "dur", "x1", "y1", "x2", "y2", "ampl", "velo"))
sacc[, t1 := as.numeric(gsub("^.* ([0-9]+)$", "\\1", t1))]
blink = data.table::fread(paste(events[grep("^EBLINK", events)], collapse = "\n"),
colClasses = list(integer = 1:3),
stringsAsFactors = FALSE,
col.names = c("t1", "t2", "dur"))
blink[, t1 := as.numeric(gsub("^.* ([0-9]+)$", "\\1", t1))]
##=========================
## warnings and checks
##=========================
## TODO: write more sanity checks
if (sacc[, any(dur > 10000)]) {
warning(sprintf("Removed saccade starting at %d and ending at time %d, hence with a length > 10000. \n", sacc[dur > 10000, t1], sacc[dur > 10000, t2]))
## remove saccades with duration > 10000
sacc = sacc[dur < 10000]
}
if (blink[, any(dur > 10000)]) {
warning(sprintf("There are blinks with durations > 10.000."))
}
##=========================
## return
##=========================
obj = list(raw = rawdat, msg = msg, fix = fix, sacc = sacc, blink = blink, header = header)
class(obj) = c(class(obj), "edar_data")
obj
}
#' Read and parse an ascii (.asc) file.
#'
#' @param obj [edar_data] object as returned from read_ascii function
#' @param trial_seperators [list] with entries 'trial_start' and 'trial_end'. Either these entries contain
#' a character vector of length 1 with the message delineating the trials, or a character with one message
#' per trial. For example, the whole time series of the experiment is cut between the first entries of
#' `trial_start` and `trial_end` to obtain the first trial.
#' `trial_end` can also be [numeric] and indicating the length trials.
#' @import data.table
#' @export
slice_into_trials = function(obj, trial_seperators, number_trials = NULL) {
stopifnot(inherits(obj, "edar_data"))
if (is.null(number_trials)) {
## try to infer number of trials
if (! is.null(trial_seperators$grep) && trial_seperators$grep == TRUE) {
number_trials <- length(obj$msg[grep(trial_seperators$trial_start, text), text])
} else {
number_trials <- length(obj$msg[, text[text %in% trial_seperators$trial_start]])
}
}
if (! all(trial_seperators$trial_start %in% obj$msg[, text])) {
found = trial_seperators$trial_start %in% obj$msg[, text]
warning(sprintf("Could only find the following messages provided in 'trial_seperators$trial_start': \n%s\nMessages not found: \n%s\n",
paste(trial_seperators$trial_start[found], collapse = ", "),
paste(trial_seperators$trial_start[!found], collapse = ", ")))
}
## starting times of all trials
if (!is.null(trial_seperators$grep) && trial_seperators$grep == TRUE) {
tstart_vec <- obj$msg[grep(trial_seperators$trial_start, text), time]
} else {
tstart_vec <- obj$msg[, time[text %in% trial_seperators$trial_start]]
}
if (length(tstart_vec) == 0)
stop(c("'trial_start' messages not found. Please make sure to input messages",
" identical to those in the raw .asc file."))
## trial_end is a vector of numbers, use these as the length
## if they are character, regard them as messages
if (is.numeric(trial_seperators$trial_end[1])) {
if (! (length(trial_seperators$trial_end) == 1 ||
length(trial_seperators$trial_end) == number_trials))
stop("trial_seperators$trial_end has to have length 1 or equal to number of trials.")
tend_vec <- tstart_vec + trial_seperators$trial_end
} else {
if (!is.null(trial_seperators$grep) && trial_seperators$grep == TRUE) {
tend_vec <- obj$msg[grep(trial_seperators$trial_end, text), time]
} else {
tend_vec <- obj$msg[, time[text %in% trial_seperators$trial_end]]
}
}
system.time(
for(i in 1:number_trials) {
tstart <- tstart_vec[i]
tend <- tend_vec[i]
## at last trial, let everyting til the end be in that trial
if (is.na(tend))
tend <- tstart + obj$raw[ , max(time, na.rm = TRUE)]
## add trial index and time_in_trial: relative to start of current trial
obj$raw[ time >= tstart & time < tend,
`:=`(trial = i,
time_in_trial = time - tstart)]
obj$msg[ time >= tstart & time <= tend,
`:=`(trial = i,
time_in_trial = time - tstart)]
obj$fix[ t1 >= tstart & t2 <= tend,
`:=`(trial = i,
time_in_trial = t1 - tstart)]
obj$sacc[ t1 >= tstart & t2 <= tend,
`:=`(trial = i,
time_in_trial = t1 - tstart)]
obj$blink[ t1 >= tstart & t2 <= tend,
`:=`(trial = i,
time_in_trial = t1 - tstart)]
})
## format(object.size(obj), units = "auto")
obj$raw_notrial = obj$raw[is.na(trial)]
obj$raw = obj$raw[!is.na(trial)]
## ## set class
## class(obj) = c(class(obj), "edar_data")
obj
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.