damr: Interface to Drosophila Activity Monitor System Result Files

Documented in read_dam_file

#' Reads data from a single DAM2 single beam or a DAM5 multibeam file
#'
#' This function retrieves activity data in a DAMS text file.
#' It allows selection of a date range and channels (i.e. regions).
#'
#' @param path location of the file to read (character)
#' @param region_id vector of unique regions to read
#' @param start_datetime,stop_datetime  the start and the end of an the experiment (see details)
#' @param tz the timezone (see [OlsonNames] for a list)
#' @param date_format the format of the dates in the DAM file (see details)
#' @details
#'  `start_datetime` and `stop_datetime` are formatted as "YYYY-MM-DD HH:MM:SS".
#' `start_datetime` is used as the reference time (ZT0).
#' Therefore, if you are interested in circadian analysis and `D -> L` transitions are at 10:00:00,
#' you probably want to set `start_datetime = "YYYY-MM-DD 10:00:00"`.
#'
#' According to the acquisition system, the date format can be inconsistently formatted between DAM Systems.
#' Specify the format using [strptime] syntax. For instance:
#' * `"%d %b %y"`  -- the default, to parse `"15 Nov 2019"`
#' * `"%d-%m-%y"`  -- to parse `"15-11-2019"`
#' * `"%Y-%m-%d"`  -- the default to parse `"2019-11-15"`
#' @return A [behavr::behavr] table.
#' The metadata contains an autogenerated id per animal.
#' The data has the columns:
#' * `id` -- autogenerated unique identifier, one per animal
#' *  `t` -- time
#' * `activity` -- number of beam crosses
#' @examples
#' path <- damr_example("M064.txt")
#' dt <- read_dam_file(path, region_id = c(1:3), start_datetime = "2017-06-30 15:00:00")
#' print(dt)
#' @seealso
#'  * [load_dam] --  to load data from many files and biological conditions using metadata (the recommended alternative)
#' @aliases read_dam2_file
#' @export read_dam_file read_dam2_file
read_dam_file <- function(path,
                            region_id=1:32,
                            start_datetime=-Inf,
                            stop_datetime=+Inf,
                            tz="UTC",
                            date_format="%d %b %y"){
  . =  datetime =  time =  datetime_posix = data_type = status = NULL
  # todo check whether region has duplicates/ is in range
  start_datetime <- parse_datetime(start_datetime,tz=tz)
  stop_datetime <- parse_datetime(stop_datetime,tz=tz)
  # print(str(start_datetime))
  # print(str(stop_datetime))
  first_last_lines <- find_dam_first_last_lines(path,
                                                 start_datetime,
                                                 stop_datetime,
                                                 tz)
  first_line = first_last_lines$id[1]
  last_line = first_last_lines$id[2]
  # col_types=do.call(readr::cols_only, DAM5_COLS)


  col_names =  names(DAM5_COLS)
  col_class = c(i="integer", c="character", "_"="NULL")[as.character(DAM5_COLS)]
  #col_class <- col_class[which(DAM5_COLS != "_")]

  possible_classes <- unique(col_class)
  col_class <- lapply(possible_classes, function(x)which( col_class %in% x))
  names(col_class) <- possible_classes

  df <- fread(path,
              #header = F,
              col.names = col_names[which(DAM5_COLS != "_")],
              colClasses = col_class,
              #select = which(DAM5_COLS != "_"),
              skip = first_line - 1,
              nrows =  last_line - first_line + 1,
              showProgress = FALSE,
              drop=col_class$`NULL`)


  df <- df[, datetime := paste(date,time, sep=" ")]
  format <- paste(date_format,"%H:%M:%S", sep=" ")
  suppressWarnings(
    df <- df[, datetime_posix  := as.POSIXct(strptime(datetime,format,tz=tz))]
  )
  df[, datetime := NULL]
  setnames(df, "datetime_posix", "datetime")

  df[, data_type := DATA_TYPE_NAMES[as.character(data_type)]]
   # if start date is not defined, t0 is the first read available, whether or not is is valid!
  if(is.infinite(start_datetime))
    t0 = df$datetime[1]
  else
    t0 = start_datetime

  if(is.infinite(stop_datetime))
    t1 = df$datetime[nrow(df)]
  else
    t1 = stop_datetime
  experiment_id <- paste(format(t0, format = "%F %T"), basename(path),sep="|")
  df <- df[status == 1 & data_type != "TA"]
  dt <- df[, clean_dam_data(.SD, region_id, experiment_id, t0), by="data_type"]

  setkeyv(dt, "id")

  meta <- unique(dt[, c("id","region_id")],by="id")

  meta[,experiment_id := experiment_id]
  meta[,start_datetime := t0]
  meta[,stop_datetime := t1]

  file_info <- meta[,.(file_info =  list(list(path = path, file = basename(path)))), by="id"]
  meta <- file_info[meta]

  #meta <- met[,file:=basename(path)]
  dt <- dcast(dt, id + t ~ data_type,value.var="value")
  setkeyv(dt, "id")

  behavr::behavr(dt,meta)

}

clean_dam_data <- function(df, regions, experiment_id, t0){
  . = channel = datetime = id = region_id = t = value = NULL

  df <- unique(df, by="datetime")
  df <- df[, (colnames(df) %like% "(channel)|(datetime)"), with=F]
  setnames(df,
           grep("channel_", colnames(df), value = T),
           gsub("channel_", "0", grep("channel_", colnames(df), value = T)))
  df <- melt(df, id="datetime", variable.name = "channel", value.name = "value")

  dt <- df[ ,. (id = as.factor(sprintf("%s|%02d",experiment_id, as.integer(channel))),
                region_id = as.integer(channel),
                t = as.numeric(datetime-t0, units = "secs"),
                value = value)]

  setkeyv(dt, "id")
  dt <- dt[region_id %in% regions]
  dt
}


read_dam2_file <- function(path,
                          region_id=1:32,
                          start_datetime=-Inf,
                          stop_datetime=+Inf,
                          tz="UTC"){
  message("read_dam2_file is deprecated, please use read_dam_file instead")
  read_dam_file(path, region_id, start_datetime, stop_datetime, tz)
}