R/load-ethoscope.R

Defines functions load_ethoscope

Documented in load_ethoscope

#' Load data from ethoscope result files
#'
#' This function is used to import behavioural data generated
#' by the [ethoscope platform](https://www.notion.so/giorgiogilestro/Ethoscope-User-Manual-a9739373ae9f4840aa45b277f2f0e3a7).
#' That is it loads multiple `.db` files into a single `R` [behavr::behavr] table.
#'
#' @param metadata [data.table::data.table] used to load data (see detail)
#' @param min_time,max_time load only data between `min_time` and `max_time` (in seconds).
#' This time is *relative to the start of the experiment*.
#' @param reference_hour hour, in the day, to use as ZT0 reference.
#' When unspecified, time will be relative to the start of the experiment.
#' @param verbose whether to print progress (a logical)
#' @param columns optional vector of columns to be selected from the db file.
#' Time (t) is always implicitly selected.
#' When `NULL` and if `FUN` is set, columns can be retrieved automatically (from the attributes of `FUN`).
#' @param cache the name of a local directory to cache results for faster subsequent data loading.
#' @param ncores number of cores to use for optional parallel processing (experimental).
#' @param FUN function (optional) to transform the data from each individual
#' immediately after is has been loaded.
#' @param ... extra arguments to be passed to `FUN`
#' @param map_arg a list to map `FUN` arguments to metavariables values. See details
#' @return A [behavr] table.
#' In addition to the metadata, it contains the data, with the columns:
#' * `id` -- autogenerated unique identifier, one per animal
#' * `t` -- time (s)
#' * Several variables recorded by ethoscopes (position, angle, width/height and others), or computed by `FUN`.
#' Distance units (e.g. xy position, height/width) are expressed as a fraction of the width of the ROI they originate from.
#' @details
#' the linked metadata should be generated using [link_ethoscope_metadata].
#' `map_arg` is a list of the form `list(fun_arg = "metavariable")`.
#' When provided, `FUN` will set specific arguments (`fun_arg`) to the value of a (quoted) metavariable.
#'
#' @examples
#' dir <- paste0(scopr_example_dir(), "/ethoscope_results/")
#' data(region_id_metadata)
#' metadata <- link_ethoscope_metadata(region_id_metadata, dir)
#' print(metadata)
#'
#' # Default data loading
#' dt <- load_ethoscope(metadata)
#' dt
#'
#' # We use reference hour to set zt0 to 09:00 GMT
#' dt <- load_ethoscope(metadata, reference_hour=9)
#' dt
#'
#' # Only load x and y positions
#' dt <- load_ethoscope(metadata, columns=c("x", "y"), reference_hour=9)
#' dt
#' # apply function whilst loading the data
#' dt <- load_ethoscope(metadata, reference_hour=9, FUN=head)
#' dt
#'
#' @seealso
#' * [behavr::behavr] -- to understand the output format
#' * [experiment_info] -- to show information about a file/experiment
#' * [list_result_files] -- to list available files
#' @references
#' * [behavr tutorial](https://rethomics.github.io/behavr.html) -- how to work with the obtained [behavr] table
#' @export
load_ethoscope <- function(   metadata,
                              min_time = 0,
                              max_time = Inf,
                              reference_hour = NULL,
                              verbose = TRUE,
                              columns = NULL,
                              cache = NULL,
                              ncores = 1,
                              FUN = NULL,
                              map_arg = NULL,
                              ...){

  file_info = NULL
  # takes a part of a metadata and et the corresponding data
  load_fun <- function(q){
    # Each row of metadata refers to a unique ROI. to each ROI we apply the function `parse_single_roi`
    # and get each ROI in a dt.
    # So, l_dt is a list of data tables, one per ROI. If no data is availeble, the list element is `NULL`.

    data.table::setkeyv(q, data.table::key(metadata))
    l_rows <- lapply(1:nrow(q),function(i){q[i,]})

    parse_wrapper <- function(row){
      arg_list = list(row,
                      min_time = min_time,
                      max_time = max_time,
                      reference_hour = reference_hour,
                      verbose = verbose,
                      columns=columns,
                      cache=cache,
                      FUN, ...)
      arg_val = lapply(map_arg, function(x)row[,eval(parse(text=x))])
      arg_list = c(arg_list, arg_val)
      do.call(parse_single_roi, arg_list)

    }

    l_dt <- lapply(l_rows, parse_wrapper)
    behavr::bind_behavr_list(l_dt)
  }
  #


  experiment_id <- metadata[, sapply(file_info, function(x) x$path)]
  q_l <- split(metadata, experiment_id)

  if(ncores == 1){
    l_dt <- lapply(q_l, load_fun)
  }
  else{
    if (!requireNamespace("parallel", quietly = TRUE)) {
      stop("`parallel` package needed for ncores > 1.
           Please install it.",
           call. = FALSE)
    }
    l_dt <- parallel::mclapply(q_l, load_fun, mc.cores=ncores)
  }

  dt <- behavr::bind_behavr_list(l_dt)
  rm(l_dt)
  # we can force R to garbage collect, making memory avalable:
  gc()
  dt
}

Try the scopr package in your browser

Any scripts or data that you put into this service are public.

scopr documentation built on Aug. 15, 2022, 5:05 p.m.