R/import-data.R

Defines functions import.all import.shrooms import.neighborhood import.dtk import.mat read_edf find.unprocessed

Documented in find.unprocessed import.all import.dtk import.mat import.neighborhood import.shrooms read_edf

# Import Data
#
# Imports data from various tasks utilizing a single schema
# to perform uniform unary operations and analysis.
#
# Some useful keyboard shortcuts for package authoring:
#
#   Install Package:           'Ctrl + Shift + B'
#   Check Package:             'Ctrl + Shift + E'
#   Test Package:              'Ctrl + Shift + T'

#' Finds Unprocessed files based on file names
#'
#' Finds files that exist in a raw directory, which do not have a counterpart
#' in a processed directory.  Files names are expected to have the same file name
#' i.e. foo_bar.fb <-> foo_bar.bf
#'
#' @param dir.raw Directory location for the raw directory.
#' @param dir.processed Directory location for the processed directory.
#' @param input.file.extension File extension to look for (empty string to disregard (standard))
#'
#' @return raw file names
find.unprocessed <- function(dir.raw, dir.processed, input.file.extension = '') {
  log.debug("Checking for unprocessed data.", "IMPORT")

  # Try for Read-File Exception
  tryCatch({
    raw <- list.files(dir.raw, pattern = paste(input.file.extension, "$", sep = ''))
    processed <- list.files(dir.processed)
  }, error = function(err) {
    log.error("Error Accessing Directory", "IMPORT")
    return(list())
  })

  if(length(raw) == 0) {
    log.warn(paste("No Raw Files Found at ", dir.raw, sep = " "), "IMPORT")
  }

  # Remove File Extensions
  if(input.file.extension != "") {
    pattern <- paste("(", input.file.extension, ")", sep = "")
    raw.simple <- (raw[which(grepl(pattern = pattern, basename(raw)))])
  }
  raw.simple <- sub(pattern = "(.*)\\..", replacement = "\\1", basename(raw.simple))
  processed <- sub(pattern = "(.*)\\..", replacement = "\\1", basename(processed))

  # Expects raw and processed names to be the same
  raw <- raw[which(!raw.simple %in% processed)]
  return(raw)
}


#' @title Import EDF File
#'
#' @description A convience wrapper around FDBeye file importer to
#'      import using UTF-8 format
#'
#' @details Call edf2asc" R function to convert *edf files to *asc
#'     files.  This wrapper  only allows a single path instead of multiple.
#'     Existing *asc files will be overwritten because we se the option to -y.
#'     Before using this function, you must have the SR Research edf2asc downloaded from the SR Research Website (Downloads/Eyelink Developers Kit: https://www.sr-support.com/thread-13.html)
#'     and included in PATH. See https://rdrr.io/github/davebraze/FDBeye/man/edf2asc.html for details on how to add the command-line utility to your PATH.
#'
#' @param edf.file Path to *edf file in UTF-8 format (UNIX/MAC pathing).  Paths will be converted relative to your OS.
#' @param parse_all Logical. Whether to additionally parse between-event messages.
#' @param samples Logical. Whether to import raw data at the sampling rate aquired off the eyetracker.
#'
#' @importFrom eyelinker read.asc
#' @return list of subject eye data, where each element is a list generated by eyelinker::read.asc
#' @examples
#' \dontrun{
#'   eye_parsed <- read_edf(c("~/Box/s3_behav_data/neighborhood/eye/data/raw/N_003_TS.edf",
#'       "~/Box/s3_behav_data/neighborhood/eye/data/raw/N_004_az.edf"), keep_asc=FALSE, parse_all=TRUE)
#' }
#' @export
read_edf <- function(edf_file,
                     asc_output_dir=NULL,
                     keep_asc=TRUE,
                     gzip_asc=TRUE,
                     samples=TRUE,
                     header = NULL,
                     parse_all = TRUE,
                     data.table = TRUE,
                     ...) {
  # debug:
  # -----
  # edf_file <- file
  # keep_asc <- FALSE
  # parse_all=TRUE
  # samples = TRUE
  # gzip_asc = TRUE
  # data.table = TRUE
  # -----

  if(!is.null(header)) {
    log_chunk_header(header)
    dt = paste0(str_extract(header, "\\d+\\."), "1 Read EDF file:")
  } else(dt = NULL)

  tryCatch.ep({
    stopifnot(all(file.exists(edf_file))) #require that files are present
    if (!keep_asc) {
      asc_output_dir <- tempdir() #output ascs to temporary directory
    } else {
      if (!is.null(asc_output_dir) && !dir.exists(asc_output_dir)) { dir.create(asc_output_dir) } #create output directory for ASC files if requested
    }

    #convert all files to asc
    asc_files <- pkgcond::suppress_warnings(edf2asc(edf_file, asc_output_dir=asc_output_dir, gzip_asc=gzip_asc))

    #pass additional arguments such as parse_all to read.asc
    res <- lapply(asc_files, function(fname) {

      eye_data <- pkgcond::suppress_warnings(read.asc(fname=fname, samples=samples, parse_all = parse_all, ...), pattern = "had status 255", fixed =TRUE)
      eye_data$asc_file <- fname
      class(eye_data) <-
        return(eye_data)
    })

    if (!keep_asc) { file.remove(asc_files) } #cleanup asc files if requested

    names(res) <- basename(edf_file)

    # tag with initial .edf name
    for(i in 1:length(res)){res[[i]][["edf_file"]] <- edf_file[i]}

    # to avoid confusion, re-label .edf "block" column with the more useful title "event" to fit with ep conventions.
    res <- lapply(res, function(x){ # over subjects
      lapply(x, function(y){ # over elements in ep.subject
        if("block" %in% names(y)){
          y <- y %>% rename(`eventn` = `block`)
        }
        return(y)
      })
    })

    }, dt)

  if(!exists("res")) res <- "Something went wrong"

  if(data.table){
    # res[[1]]$raw <- lazy_dt(res[[1]]$raw, immutable = FALSE)
    res[[1]]$raw <- data.table(res[[1]]$raw)
    res[[1]]$sacc <- data.table(res[[1]]$sacc)
    res[[1]]$fix <- data.table(res[[1]]$fix)
    res[[1]]$blinks <- data.table(res[[1]]$blinks)
    res[[1]]$msg <- data.table(res[[1]]$msg)
  }


  return(res)
}




#' Imports MAT files
#'
#' Imports MAT (matlab) data and creates asc as well as read the data
#' into dataframes.
#'
#' @param dir base directory for raw and proc directories.
#' @return list of edf file locations and edf data.
import.mat <- function(dir) {
  os <- NeuroMap$SYSTEM$os

  if(os == "Windows") {
    file.separator <- "\\"
  } else if(os == "OSX") {
    file.separator <- "/"
  }

  files.mat <- find.unprocessed(paste(dir, "raw", file.separator, sep = ''),
                                paste(dir, "proc", file.separator, sep = ''),
                                ".mat")
  subjects = list()

  mat.paths <- c()
  mat.data <- c()
  for(imp in files.mat) {
    # TODO Need more concrete file name and probably want to figure that out somewhere else
    log.info(paste("Importing mat file", imp, sep = " "), logger = "IMPORT")

    mat.datum <- R.matlab::readMat(paste(dir, "raw", file.separator, imp, sep = ''))
    mat.data <- c(mat.data, mat.datum)
  }

  return(list(mat.data = mat.data))
}

################ DEPRECATE BELOW?


#' Imports Defend the Kingdom data
#'
#' Imports defend the kingdom data as dataframes.
import.dtk <- function() {
  print("Importing Defend The Kingdom")
}


#' Imports Neighborhood data
#'
#' Imports neighborhood data as dataframes.
import.neighborhood <- function() {
  print("Importing Neighborhood")
}


#' Imports shrooms data
#'
#' Imports shrooms data as dataframes.
#'
#' @param dir directory to import from
#' @return list of asc and mat data
import.shrooms <- function(dir) {
  log.info("Importing Shrooms Data", logger = "IMPORT")

  asc.data <- import.edf(dir)
  mat.data <- import.mat(dir)

  return(c(asc.data, mat.data))
}

#' Pipeline Block Function
#'
#' @param input empty import used for block
#'
#' @export
import.all <- function(input = list()) {
  create.log("IMPORT", level = NeuroMap$CONFIG$log_level_import)
  import.shrooms(NeuroMap$CONFIG$dir_shrooms)

  return(input)
}
PennStateDEPENdLab/experiment_pipeline documentation built on Nov. 27, 2024, 4:56 a.m.