R/file_io.R

Defines functions import_set read_vmrk read_dat read_vhdr import_vhdr import_cnt import_raw

Documented in import_cnt import_raw import_set import_vhdr read_dat read_vmrk

#' Function for reading raw data.
#'
#' Currently BDF/EDF, 32-bit .CNT, and Brain Vision Analyzer files are
#' supported. Filetype is determined by the file extension.The `edfReader`
#' package is used to load BDF/EDF files, whereas custom code is used for .CNT
#' and BVA files. The function creates an `eeg_data` structure for
#' subsequent use.
#'
#' @author Matt Craddock, \email{matt@@mattcraddock.com}
#' @param file_name File to import. Should include file extension.
#' @param file_path Path to file name, if not included in filename.
#' @param recording Name of the recording. By default, the filename will be
#'   used.
#' @param participant_id Identifier for the participant. Defaults to NA.
#' @param fast_bdf New, faster method for loading BDF files. Experimental.
#' @import tools
#' @importFrom purrr map_df is_empty
#' @importFrom tibble tibble as_tibble
#' @examples
#' \dontrun{
#' import_raw("test_bdf.bdf")
#' }
#' @return An object of class `eeg_data`
#' @export

import_raw <- function(file_name,
                       file_path = NULL,
                       recording = NULL,
                       participant_id = NA,
                       fast_bdf = TRUE) {

  file_type <- tools::file_ext(file_name)

  if (!is.null(file_path)) {
    file_name <- file.path(file_path, file_name)
  }

  if (is.null(recording)) {
    recording <- basename(tools::file_path_sans_ext(file_name))
  }

  if (file_type %in% c("bdf","edf")) {
    message(paste("Importing",
                  file_name,
                  "as",
                  toupper(file_type)))

    if (fast_bdf && file_type == "bdf") {
      bdf_header <- read_bdf_header(file_name)
      sigs <- read_bdf_data(file_name, bdf_header)
      colnames(sigs) <- bdf_header$chan_labels
      sigs <- tibble::as_tibble(sigs)
      srate <- bdf_header$srate[[1]]
    } else {

      if (!requireNamespace("edfReader", quietly = TRUE)) {
        stop("Package \"edfReader\" needed. Please install it.",
             call. = FALSE)
      }
      data <- edfReader::readEdfSignals(edfReader::readEdfHeader(file_name))
      #check for an annotations channel
      anno_chan <- which(vapply(data,
                                function(x) isTRUE(x$isAnnotation),
                                FUN.VALUE = logical(1)))

      #remove annotations if present - could put in separate list...
      if (length(anno_chan) > 0) {
        data <- data[-anno_chan]
        message("Annotations are currently discarded. File an issue on Github if you'd like
  this to change.")
      }

      sigs <- purrr::map_df(data, "signal")
      srate <- data[[1]]$sRate
    }
    # Biosemi triggers should be in the range 0-255, but sometimes are read from
    # the wrong "end"
    if ("Status" %in% names(sigs)) {
      events <- sigs$Status %% (256)
    } else {
      events <- integer(0)
    }

    if (purrr::is_empty(events)) {
      warning("No status channel. Retaining all channels.")
      chan_nos <- 1:ncol(sigs)
    } else {
      #all chans except Status
      chan_nos <- (1:ncol(sigs))[-which(names(sigs) == "Status")]
    }

    timings <- tibble::tibble(sample = 1:nrow(sigs))
    timings$time <- (timings$sample - 1) / srate

    if (is.null(chan_nos)) {
      chan_nos <- 1:(ncol(sigs) - 1)
    }

    sigs <- tibble::as_tibble(sigs[, chan_nos])

    # The events are often recorded over a number of samples, but should
    # typically be point events; this finds the time at which the events start
    events_diff <- diff(events)
    event_table <-
      tibble::tibble(event_onset = which(events_diff > 0) + 1,
                     event_time = which(events_diff > 0) / srate,
                     event_type = events[which(events_diff > 0) + 1])

    epochs <- tibble::new_tibble(list(epoch = 1,
                                      participant_id = participant_id,
                                      recording = recording),
                                 nrow = 1,
                                 class = "epoch_info")

    data <- eeg_data(data = sigs,
                     srate = srate,
                     events = event_table,
                     timings = timings,
                     epochs = epochs)
    data
  } else if (identical(file_type,"cnt")) {
    message(paste("Importing Neuroscan", toupper(file_type), file_name))
    message(paste("Note: if this is 16-bit or an ANT Neuro .CNT file, reading will fail."))
    data <- import_cnt(file_name)

    sigs <- tibble::as_tibble(t(data$chan_data))
    names(sigs) <- data$chan_info$electrode
    srate <- data$head_info$samp_rate
    timings <- tibble::tibble(sample = 1:dim(sigs)[[1]])
    timings$time <- (timings$sample - 1) / srate

    event_table <-
      tibble::tibble(event_onset = data$event_list$offset + 1,
                     event_time = (data$event_list$offset + 1) / srate,
                     event_type = data$event_list$event_type)

    epochs <- tibble::new_tibble(list(epoch = 1,
                                      participant_id = participant_id,
                                      recording = recording),
                                 nrow = 1,
                                 class = "epoch_info")

    data <- eeg_data(data = sigs,
                     srate = srate,
                     chan_info = validate_channels(data$chan_info),
                     events = event_table,
                     timings = timings,
                     epochs = epochs)

    } else if (identical(file_type, "vhdr")) {

      if (!requireNamespace("ini", quietly = TRUE)) {
        stop("Package \"ini\" needed to read BVA files. Please install it.",
             call. = FALSE)
      }

      message(paste("Importing Brain Vision Analyzer file", file_name))


      data <- import_vhdr(file_name,
                          recording = recording,
                          participant_id = participant_id)

    } else {
      stop("Unsupported filetype")
    }
  data
}

#' Import Neuroscan .CNT file
#'
#' Beta version of function to import Neuroscan .CNT files. Only intended for
#' import of 32-bit files.
#'
#' @param file_name Name of .CNT file to be loaded.
#' @importFrom tibble tibble
#' @keywords internal

import_cnt <- function(file_name) {

  cnt_file <- file(file_name, "rb")

  # Read in meta-info - number of channels, location of event table, sampling
  # rate...

  pos <- seek(cnt_file, 12)
  next_file <- readBin(cnt_file,
                       integer(),
                       size = 4,
                       n = 1,
                       endian = "little")
  pos <- seek(cnt_file, 353)
  n_events <- readBin(cnt_file,
                      integer(),
                      n = 1,
                      endian = "little")
  pos <- seek(cnt_file, 370)
  n_channels <- readBin(cnt_file,
                        integer(),
                        n = 1,
                        size = 2,
                        signed = FALSE,
                        endian = "little")
  pos <- seek(cnt_file, 376)
  samp_rate <-  readBin(cnt_file,
                        integer(),
                        n = 1,
                        size = 2,
                        signed = FALSE,
                        endian = "little")
  pos <- seek(cnt_file, 864)
  n_samples <- readBin(cnt_file,
                       integer(),
                       size = 4,
                       n = 1,
                       endian = "little")
  pos <- seek(cnt_file, 886)
  event_table_pos <- readBin(cnt_file,
                             integer(),
                             size = 4,
                             n = 1,
                             endian = "little") # event table
  pos <- seek(cnt_file, 900)

  data_info <- data.frame(n_events,
                          n_channels,
                          samp_rate,
                          event_table_pos)

  chan_df <- data.frame(electrode = character(n_channels),
                        chan_no = numeric(n_channels),
                        x = numeric(n_channels),
                        y = numeric(n_channels),
                        baseline = numeric(n_channels),
                        sens = numeric(n_channels),
                        cal = numeric(n_channels),
                        stringsAsFactors = FALSE)

  # Read channel names and locations

  for (i in 1:n_channels) {
    chan_start <- seek(cnt_file)
    chan_df$electrode[i] <- readBin(cnt_file, character(),
                                    n = 1, endian = "little")
    chan_df$chan_no[i] <- i
    pos <- seek(cnt_file, chan_start + 19)
    chan_df$x[i] <- readBin(cnt_file,
                            double(),
                            size = 4,
                            n = 1, endian = "little") # x coord
    chan_df$y[i] <- readBin(cnt_file,
                            double(),
                            size = 4,
                            n = 1, endian = "little") # y coord

    pos <- seek(cnt_file, chan_start + 47)
    chan_df$baseline[i] <- readBin(cnt_file,
                                   integer(),
                                   size = 1,
                                   n = 1,
                                   endian = "little")
    pos <- seek(cnt_file, chan_start + 59)
    chan_df$sens[i] <- readBin(cnt_file,
                               double(),
                               size = 4, n = 1,
                               endian = "little")
    pos <- seek(cnt_file, chan_start + 71)
    chan_df$cal[i] <- readBin(cnt_file,
                              double(),
                              size = 4,
                              n = 1,
                              endian = "little")
    pos <- seek(cnt_file, (900 + i * 75))
  }

  beg_data <- seek(cnt_file) # beginning of actual data
  real_n_samples <- event_table_pos - (900 + 75 * n_channels) / (2 * n_channels)

  frames <- floor((event_table_pos - beg_data) / n_channels / 4)

  chan_data <- matrix(readBin(cnt_file,
                              integer(),
                              size = 4,
                              n = n_channels * frames,
                              endian = "little"),
                      nrow = n_channels, ncol = frames)

  # rescale chan_data to microvolts
  mf <- chan_df$sens * (chan_df$cal / 204.8)
  chan_data <- (chan_data - chan_df$baseline) * mf

  # Read event table

  pos <- seek(cnt_file,
              event_table_pos)
  teeg <- readBin(cnt_file,
                  integer(),
                  size = 1,
                  n = 1,
                  endian = "little")
  tsize <- readBin(cnt_file,
                   integer(),
                   n = 1,
                   endian = "little")
  toffset <- readBin(cnt_file,
                     integer(),
                     n = 1,
                     endian = "little")
  ev_table_start <- seek(cnt_file)

  ev_list <- data.frame(event_type = integer(n_events),
                        keyboard = character(n_events),
                        keypad_accept = integer(n_events),
                        accept_evl = integer(n_events),
                        offset = integer(n_events),
                        type = integer(n_events),
                        code = integer(n_events),
                        latency = numeric(n_events),
                        epochevent = integer(n_events),
                        accept = integer(n_events),
                        accuracy = integer(n_events),
                        stringsAsFactors = FALSE
                        )

  for (i in 1:n_events) {
    ev_list$event_type[i] <- readBin(cnt_file,
                                     integer(),
                                     size = 2,
                                     n = 1,
                                     endian = "little")
    ev_list$keyboard[i] <- readBin(cnt_file,
                                   integer(),
                                   size = 1,
                                   n = 1,
                                   endian = "little")
    temp <- readBin(cnt_file,
                    integer(),
                    size = 1,
                    n = 1,
                    signed = FALSE,
                    endian = "little")
    ev_list$keypad_accept[i] <- bitwAnd(15, temp)
    ev_list$accept_evl[i] <- bitwShiftR(temp, 4)
    ev_list$offset[i] <- readBin(cnt_file,
                                 integer(),
                                 size = 4,
                                 n = 1,
                                 endian = "little")
    ev_list$type[i] <- readBin(cnt_file,
                               integer(),
                               size = 2,
                               n = 1,
                               endian = "little")
    ev_list$code[i] <- readBin(cnt_file,
                               integer(),
                               size = 2,
                               n = 1,
                               endian = "little")
    ev_list$latency[i] <- readBin(cnt_file,
                                  double(),
                                  size = 4,
                                  n = 1,
                                  endian = "little")
    ev_list$epochevent[i] <- readBin(cnt_file,
                                     integer(),
                                     size = 1,
                                     n = 1,
                                     endian = "little")
    ev_list$accept[i] <- readBin(cnt_file,
                                 integer(),
                                 size = 1,
                                 n = 1,
                                 endian = "little")
    ev_list$accuracy[i] <- readBin(cnt_file,
                                   integer(),
                                   size = 1,
                                   n = 1,
                                   endian = "little")
  }

  ev_list$offset <- (ev_list$offset - beg_data) / (4 * n_channels) + 1

  close(cnt_file)
  chan_df <- chan_df[, names(chan_df) %in% c("electrode", "chan_no")]
  out <- list(chan_info = chan_df,
              head_info = data_info,
              chan_data = chan_data,
              event_list = ev_list)
}

#' Function for importing Brain Vision Analyzer files
#' @param file_name file name of the header file.
#' @keywords internal
import_vhdr <- function(file_name,
                        participant_id,
                        recording) {

  .data <- read_vhdr(file_name)
  epochs <- tibble::new_tibble(list(epoch = 1,
                                    participant_id = participant_id,
                                    recording = recording),
                               nrow = 1,
                               class = "epoch_info")
  .data$epochs <- epochs
  .data
}

#' @keywords internal
read_vhdr <- function(file_name) {

  header_info <- ini::read.ini(file_name)

  if (identical(header_info$`Common Infos`$DataFormat, "ASCII")) {
    stop("BVA ASCII not currently supported.")
  }

  if (identical(header_info$`Common Infos`$DataType,
                "FREQUENCYDOMAIN")) {
    stop("Only time domain data is currently supported,
     this data is in the frequency domain.")
  }

  if (identical(header_info$`Common Infos`$DataOrientation,
                "VECTORIZED")) {
    multiplexed <- FALSE
  } else {
    multiplexed <- TRUE
  }

  data_file <- file.path(dirname(file_name),
                         header_info$`Common Infos`$DataFile)
  vmrk_file <- file.path(dirname(file_name),
                         header_info$`Common Infos`$MarkerFile)
  n_chan <- as.numeric(header_info$`Common Infos`$NumberOfChannels)
  # header gives sampling times in microseconds
  srate <- 1e6 / as.numeric(header_info$`Common Infos`$SamplingInterval)
  bin_format <- header_info$`Binary Infos`$BinaryFormat

  chan_labels <- lapply(header_info$`Channel Infos`,
                        function(x) unlist(strsplit(x = x, split = ",")))
  chan_names <- sapply(chan_labels, "[[", 1)
  #EEGLAB exported BVA files are missing the channel resolution, which occupies
  #the 3rd position - check length of chan_labels elements first to cope with
  #that. Probably always 1 microvolt anyway...
  if (length(chan_labels[[1]]) == 3) {
    chan_scale <- as.numeric(sapply(chan_labels, "[[", 3))
  } else {
    chan_scale <- rep(1, length(chan_labels))
  }

  chan_info <- parse_vhdr_chans(chan_names,
                                header_info$`Coordinates`)

  file_size <- file.size(data_file)
  .data <- read_dat(data_file,
                    file_size = file_size,
                    bin_format = bin_format,
                    multiplexed)

  .data <- matrix(.data,
                  ncol = n_chan,
                  byrow = multiplexed)

  if (any(!is.na(chan_scale))) {
    .data <- sweep(.data,
                   2,
                   chan_scale,
                   "*")
  }
  colnames(.data) <- chan_names
  .data <- tibble::as_tibble(.data)

  n_points <- nrow(.data)

  timings <- tibble::tibble(sample = 1:n_points,
                            time = (sample - 1) / srate)

  .markers <- read_vmrk(vmrk_file)
  .markers <-
    dplyr::mutate(.markers,
                  event_onset = as.numeric(event_onset),
                  length = as.numeric(length),
                  .electrode = ifelse(.electrode == 0,
                                      NA,
                                      as.character(chan_info$electrode[.electrode])),
                  event_time = (event_onset - 1) / srate)
  .data <- eeg_data(data = .data,
                    srate = srate,
                    events = .markers,
                    chan_info = chan_info,
                    timings = timings,
                    reference = NULL)
  .data
}

#' Read the raw data for a BVA file.
#'
#' @param file_name Filename of the .dat file
#' @param file_size Size of the .dat file
#' @param bin_format Storage format.
#' @param multiplexed Logical; whether the data is VECTORIZED (FALSE) or MULTIPLEXED (TRUE)
#' @keywords internal

read_dat <- function(file_name,
                     file_size,
                     bin_format,
                     multiplexed) {

  if (identical(bin_format, "IEEE_FLOAT_32")) {
    nbytes <- 4
    signed <- TRUE
    file_type <- "double"
  } else if (identical(bin_format, "UINT_16")) {
    nbytes <- 2
    signed <- FALSE
    file_type <- "int"
  } else {
    nbytes <- 2
    signed <- TRUE
    file_type <- "int"
  }

  raw_data <- readBin(file_name,
                      what = file_type,
                      n = file_size,
                      size = nbytes,
                      signed = signed)
  raw_data
}

#' Read BVA markers
#'
#' Import and parse BVA marker files.
#'
#' @param file_name File name of the .vmrk markers file.
#' @keywords internal
read_vmrk <- function(file_name) {

  vmrks <- ini::read.ini(file_name)
  marker_id <- names(vmrks$`Marker Infos`)

  markers <- lapply(vmrks$`Marker Infos`,
                    function(x) unlist(strsplit(x, ",")))

  # to do - fix to check for "New segment" instead - it's the extra date field that causes issues
  if (length(markers[[1]]) == 6) {
    date <- markers[[1]][[6]]
    markers <- lapply(markers, `[`, 1:5)
  } else {
    date <- NA
  }

  markers <- do.call(rbind, markers)

  colnames(markers) <- c("BVA_type",
                         "event_type",
                         "event_onset",
                         "length",
                         ".electrode")

  markers <- tibble::as_tibble(markers)
  markers <-
    dplyr::mutate(markers,
                  event_onset = as.numeric(event_onset),
                  length = as.numeric(length),
                  .electrode = as.numeric(.electrode))
  markers
}

#' Load `EEGLAB` .set files
#'
#' Load `EEGLAB` .set files and convert them to `eeg_epochs` objects. Supports
#' import of files saved in either Matlab v6.5 or Matlab v7.3 formats. Currently,
#' any ICA weights or decompositions are discarded.
#'
#' @param file_name Filename (and path if not in present working directory)
#' @param df_out Defaults to FALSE - outputs an object of class `eeg_epochs`. Set
#'   to TRUE for a normal data frame.
#' @param participant_id Character vector. By default, the filename will be used as the id of the
#'   participant.
#' @param recording Character vector. By default, the filename will be used as the name of the
#'   recording.
#' @param drop_custom Drop custom event fields. TRUE by default.
#' @param verbose Print informative messages. TRUE by default.
#' @author Matt Craddock \email{matt@@mattcraddock.com}
#' @importFrom dplyr group_by mutate rename
#' @importFrom tibble tibble as_tibble
#' @importFrom purrr is_empty map_df
#' @examples
#' \dontrun{import_set("your_data.set")}
#' @return An object of class `eeg_epochs`
#' @export

import_set <- function(file_name,
                       df_out = FALSE,
                       participant_id = NULL,
                       recording = NULL,
                       drop_custom = FALSE,
                       verbose = TRUE) {

  checkpkgs <-
    unlist(
      lapply(c("R.matlab", "hdf5r"),
             requireNamespace,
             quietly = TRUE)
    )
  if (!all(checkpkgs)) {
    missing_pkg <- c("R.matlab", "hdf5r")[!checkpkgs]

    if (length(missing_pkg) == 1) {
      stop(paste("Package",
                 missing_pkg,
                 "needed. Please install it."),
           call. = FALSE)
    } else {
      stop(
        paste("Packages",
              missing_pkg[[1]],
              "&",
              missing_pkg[[2]],
              "needed. Please install them."
              ),
        call. = FALSE
        )
    }
  }

  if (verbose) {
    message("Importing from EEGLAB .set file.")
  }

  if (is.null(recording)) {
    recording <- basename(tools::file_path_sans_ext(file_name))
  }

  if (is.null(participant_id)) {
    participant_id <- basename(tools::file_path_sans_ext(file_name))
  }

  check_hdf5 <- hdf5r::is.h5file(file_name)

  if (check_hdf5) {
    if (verbose) {
      message("Matlab 7.3 file format detected.")
    }
    return(
      read_hdf5_set(file_name,
                    recording = recording,
                    participant_id = participant_id)
      )
  }


  temp_dat <- R.matlab::readMat(file_name)

  if (identical(names(temp_dat)[[1]], "EEG")) {
    temp_dat <- temp_dat$EEG[, 1, 1]
  }

  n_chans <- temp_dat[["nbchan"]]
  n_trials <- temp_dat[["trials"]]
  times <- temp_dat[["times"]]
  chan_info <- drop(Reduce(rbind,
                           temp_dat["chanlocs"]))
  #   var_names <- dimnames(temp_dat$EEG)[[1]]
  #
  # n_chans <- temp_dat$EEG[[which(var_names == "nbchan")]]
  # n_trials <- temp_dat$EEG[[which(var_names == "trials")]]
  # times <- temp_dat$EEG[[which(var_names == "times")]]
  # chan_info <- drop(Reduce(rbind, temp_dat$EEG["chanlocs",,]))

  pick_empties <-
    vapply(
      chan_info,
      function(x) is.null(x) | length(x) == 0,
      FUN.VALUE = logical(1)
    )
  chan_info[pick_empties] <- NA
  chan_info <- lapply(data.frame(t(chan_info)),
                      unlist) # unlist each data.frame column
  chan_info <- data.frame(chan_info) # turn back into data frame
  chan_info <- parse_chaninfo(chan_info)

  # check if the data is stored in the set or in a separate .fdt
  #if (is.character(temp_dat$EEG[[which(var_names == "data")]])) {
  if (is.character(temp_dat[["data"]])) {
    if (verbose) {
      message("Importing data from .fdt file.")
    }
    fdt_file <- paste0(tools::file_path_sans_ext(file_name),
                       ".fdt")
    fdt_file <- file(fdt_file, "rb")

    # read in data from .fdt
    # do this in chunks to avoid memory errors for large files...?
    signals <- readBin(fdt_file,
                       "double",
                       n = n_chans * n_trials * length(times),
                       size = 4,
                       endian = "little")
    close(fdt_file)

    dim(signals) <- c(n_chans, length(times) * max(n_trials, 1))
    times <- rep(times, max(n_trials, 1))

    if (n_trials == 1) {
      continuous <- TRUE
    } else {
      continuous <- FALSE
    }

  } else {

    # if the data is in the .set file, load it here instead of above
    #signals <- temp_dat$EEG[[which(dimnames(temp_dat$EEG)[[1]] == "data")]]
    signals <- temp_dat[["data"]]
    dim_signals <- dim(signals)

    if (length(dim_signals) == 3) {
      dim(signals) <- c(dim_signals[1], dim_signals[2] * dim_signals[3])
      times <- rep(times, n_trials)
      continuous <- FALSE
    } else {
      continuous <- TRUE
    }
  }

  signals <- t(signals)
  colnames(signals) <- unique(chan_info$electrode)
  signals <- as.data.frame(signals)
  signals$time <- as.numeric(times)

  #srate <- temp_dat$EEG[[which(var_names == "srate")]][[1]]
  srate <- temp_dat[["srate"]][[1]]

  if (!continuous) {
    signals <- dplyr::group_by(signals,
                               time)
    signals <- dplyr::mutate(signals,
                             epoch = 1:dplyr::n())
    signals <- dplyr::ungroup(signals)
  }

  #event_info <- temp_dat$EEG[[which(var_names == "event")]]
  event_info <- temp_dat[["event"]]

  event_table <- event_info
  dim(event_table) <- c(dim(event_info)[[1]],
                        dim(event_info)[[3]])

  event_table <- t(event_table)

  colnames(event_table) <- dimnames(event_info)[[1]]

  event_table <- tibble::as_tibble(event_table)
  event_table <-
    purrr::map_df(event_table,
                  ~unlist(
                    purrr::map(.,
                               ~ifelse(is.null(.),
                                       NA,
                                       .)))
                  )
  event_table <- lapply(event_table,
                        unlist)
  empty_entries <- unlist(lapply(event_table,
                                 rlang::is_empty))
  if (any(empty_entries)) {
    empty_cols <- names(event_table)[empty_entries]
    message(paste0("Removing empty event table column (s):",
                   empty_cols))
    event_table <- event_table[!empty_entries]
  }
  event_table <- tibble::as_tibble(event_table)

  # EEGLAB stores latencies in samples and allows non-integer samples (e.g.
  # through downsampling, or more rapidly sampled events than EEG signal)
  #
  if (any(event_table$latency %% 1 > 0)) {
    message("Rounding non-integer event sample latencies...")
    event_table$latency <- round(event_table$latency)
    # This can result in an event with a latency of zero in samples, which
    # causes problems with subsequent import steps - fix that and turn it into
    # sample 1
    event_table$latency <- ifelse(event_table$latency == 0,
                                  1,
                                  event_table$latency)
  }

  # EEGLAB stores latencies in samples starting from 1, my event_time is in
  # seconds, starting from 0

  event_table$event_time <- (event_table$latency - 1) / srate

  std_cols <- c("latency",
                "event_time",
                "type",
                "epoch")

  if (drop_custom & any(!colnames(event_table) %in% std_cols)) {
    message("Dropping custom columns...")
    event_table <- event_table[, std_cols]
  }

  col_check <-  colnames(event_table) %in% c("event_type", "event_onset")

  if (any(col_check)) {
    dupe_checks <- colnames(event_table)[col_check]
    dupe_labs <- paste0(dupe_checks, ".x")
  }

  #need to build in check for duplicate columns
  event_table <- dplyr::rename(event_table,
                               event_type = "type",
                               event_onset = "latency")
  if (df_out) {
    return(signals)
  } else {
    signals$time <- signals$time / 1000
    # convert to seconds - eeglab uses milliseconds
    if (continuous) {
      timings <- tibble::tibble(time = signals$time,                                #epoch = signal,
                                sample = 1:length(signals$time))
      n_epochs <- 1
    } else {
      timings <- tibble::tibble(time = signals$time,
                                epoch = signals$epoch,
                                sample = 1:length(signals$time))
      event_table$time <- NA
      event_table$time <- timings[which(timings$sample %in% event_table$event_onset,
                                        arr.ind = TRUE), ]$time

      n_epochs <- length(unique(timings$epoch))
    }



    epochs <-
      tibble::new_tibble(list(epoch = 1:n_epochs,
                              participant_id = rep(participant_id, n_epochs),
                              recording = rep(recording, n_epochs)),
                         nrow = n_epochs,
                         class = "epoch_info")
    if (continuous) {
      out_data <- eeg_data(signals[, 1:n_chans],
                           srate = srate,
                           timings = timings,
                           chan_info = chan_info,
                           events = event_table,
                           epochs = epochs)
    } else {
      out_data <- eeg_epochs(signals[, 1:n_chans],
                             srate = srate,
                             timings = timings,
                             chan_info = chan_info,
                             events = event_table,
                             reference = NULL,
                             epochs = epochs)
    }
    out_data
  }
}


#' Parse channel info from an EEGLAB set file
#'
#' Internal function to convert EEGLAB chan_info to eegUtils style
#'
#' @param chan_info Channel info list from an EEGLAB set file
#' @param drop If there are additional columns, remove all columns except
#'   electrode if TRUE, or just unexpected columns if FALSE.
#' @keywords internal
parse_chaninfo <- function(chan_info,
                           drop = FALSE) {

  # chan_info <- chan_info[sort(names(chan_info),
  #                             method = "shell")]
  expected <- c("labels", "radius",
                "ref", "sph.phi",
                "sph.radius", "sph.theta",
                "theta", "type",
                "urchan", "X",
                "Y", "Z")
  # Check for two things:
  # 1) Missing expected columns.
  # 2) Columns which are non-standard.
  if (!all(names(chan_info) %in% expected)) {
    if (drop) {
      warning("EEGLAB chan info has unexpected format, taking electrode names only.")
      out <- data.frame(chan_info["labels"])
      names(out) <- "electrode"
      return(validate_channels(out))
    } else {
      warning("EEGLAB chan info has unexpected format, taking only expected columns.")
      miss_cols <- expected[!(expected %in% names(chan_info))]
      #Why did I set this to NA? must have been a reason but it has unintended
      #consequences. Remember to build a test when I end up back here with an
      #example...
      chan_info[miss_cols] <- NA
    }
  }

  if (!all(expected %in% names(chan_info))) {
    miss_cols <- expected[!(expected %in% names(chan_info))]
    chan_info[miss_cols] <- NA

  }

  chan_info <- chan_info[expected]

  names(chan_info) <- c("electrode",
                        "radius",
                        "ref",
                        "sph_phi",
                        "sph_radius",
                        "sph_theta",
                        "angle",
                        "type",
                        "urchan",
                        "cart_x",
                        "cart_y",
                        "cart_z"
                        )

  chan_info <- chan_info[c("electrode",
                           "cart_x",
                           "cart_y",
                           "cart_z")]
  # EEGLAB co-ordinates are rotated 90 degrees compared to our coordinates,
  # and left-right flipped
  names(chan_info) <- names(chan_info)[c(1, 3, 2, 4)]
  chan_info <- chan_info[, c(1, 3, 2, 4)]
  chan_info$cart_x <- -chan_info$cart_x
  sph_coords <- cart_to_spherical(chan_info[, c("cart_x", "cart_y", "cart_z")])
  xy <- project_elecs(sph_coords)
  chan_info <- dplyr::bind_cols(electrode = as.character(chan_info$electrode),
                                sph_coords,
                                chan_info[, 2:4],
                                xy)
  chan_info
  }

#' Parse BVA channel info
#'
#' @param chan_labels The `Channel Infos` section from a BVA vhdr file
#' @param chan_info The `Coordinates` section from a BVA vhdr file
#' @keywords internal
parse_vhdr_chans <- function(chan_labels,
                             chan_info,
                             verbose = TRUE) {

  init_chans <- data.frame(electrode = chan_labels)
  if (is.null(chan_info)) {
    init_chans$radius <- NA
    init_chans$theta <- NA
    init_chans$phi <- NA
    if (verbose) message("No channel locations found.")
    return(validate_channels(init_chans))
  } else {
    coords <- lapply(chan_info,
                     function(x) as.numeric(unlist(strsplit(x, split = ","))))
    new_coords <- data.frame(do.call(rbind,
                                     coords))
    names(new_coords) <- c("radius", "theta", "phi")
    new_coords <- cbind(init_chans, new_coords)
    new_coords[new_coords$radius %in% c(0, NaN), 2:4] <- NA

    chan_info <- bva_elecs(new_coords)
    tibble::as_tibble(chan_info)
  }
}

#' Convert BVA spherical locations
#'
#' Reads the BrainVoyager spherical electrode locations and converts them to
#' Cartesian 3D and 2D projections.
#'
#' @param chan_info A data.frame containing electrodes
#' @param radius Head radius in millimetres
#' @keywords internal
bva_elecs <- function(chan_info, radius = 85) {
  chan_info <-
    dplyr::mutate(chan_info,
                  cart_x = sin(deg2rad(theta)) * cos(deg2rad(phi)),
                  cart_y = sin(deg2rad(theta)) * sin(deg2rad(phi)),
                  cart_z = cos(deg2rad(theta)),
                  x = theta * cos(deg2rad(phi)),
                  y = theta * sin(deg2rad(phi)))
  chan_info[, c("cart_x", "cart_y", "cart_z")] <-
    chan_info[, c("cart_x", "cart_y", "cart_z")] * radius
  chan_info
}

#' Import Fieldtrip files
#'
#' Fieldtrip is a Matlab package for EEG/MEG processing and analysis.
#'
#' @author Matt Craddock \email{matt@@mattcraddock.com}
#' @param file_name Name of file to be imported.
#' @param recording Name of the recording. By default, the filename will be
#'   used.
#' @param participant_id Identifier for the participant.
#' @param verbose Informative messages printed to console. Defaults to TRUE.
#' @examples
#' \dontrun{import_ft("fieldtrip_test.mat")}
#' @return An object of class `eeg_data`, `eeg_epochs`, or
#'   `eeg_tfr`, depending on the type of input data.
#' @export
import_ft <- function(file_name,
                      participant_id = NULL,
                      recording = NULL,
                      verbose = TRUE) {

  if (!requireNamespace("R.matlab", quietly = TRUE)) {
    stop("Package \"R.matlab\" needed. Please install it.",
         call. = FALSE)
  }
  tmp_ft <- R.matlab::readMat(file_name)

  tmp_ft <- tmp_ft[[1]]
  struct_names <- rownames(tmp_ft)

  if ("pos" %in% struct_names) {
    stop("Import of source data is not currently supported.")
  }

  if (is.null(participant_id)) {
    participant_id <- basename(tools::file_path_sans_ext(file_name))
  }

  if (is.null(recording)) {
    recording <- basename(tools::file_path_sans_ext(file_name))
  }

  if ("dimord" %in% struct_names) {

    dimensions <- proc_dimord(unlist(tmp_ft["dimord", ,],
                                     use.names = FALSE))

    if ("frequency" %in% dimensions) {
      return(ft_freq(tmp_ft,
                     dimensions,
                     struct_names = struct_names,
                     participant_id = participant_id,
                     recording = recording,
                     verbose = verbose))
    }

  } else {

    # work out if the file stores components (from ICA or PCA) or "raw" data
    if ("unmixing" %in% struct_names) {
      return(ft_comp(tmp_ft))
    } else {
      return(ft_raw(tmp_ft,
                    struct_names = struct_names,
                    participant_id = participant_id,
                    recording = recording,
                    verbose = verbose))
    }
  }
}



read_bdf_header <- function(file_name) {

  file_read <- file(file_name, "rb")
  pos <- seek(file_read, 1)
  sys_type <- readChar(file_read, 7)
  if (sys_type == "BIOSEMI") {
    samp_bits <- 24
  } else {
    samp_bits <- 16
  }
  patient_id <- readChar(file_read, 80)
  recording_id <- readChar(file_read, 80)
  recording_date <- readChar(file_read, 8)
  recording_time <- readChar(file_read, 8)
  n_bytes <- readChar(file_read, 8)
  format_version <- readChar(file_read, 44)
  n_records <- readChar(file_read, 8)
  dur_records <- readChar(file_read, 8)
  n_chan <- as.integer(readChar(file_read, 4))
  chan_labels <- readChar(file_read, n_chan * 16)
  chan_labels <- unlist(strsplit(chan_labels, " "))
  chan_labels <- chan_labels[chan_labels != ""]

  if (length(chan_labels) != n_chan) {
    stop("Chan_label import went wrong!")
  }

  chan_types <- readChar(file_read, n_chan * 80)
  chan_types <- unlist(strsplit(chan_types,  "  "))
  chan_types <- chan_types[chan_types != ""]

  chan_units <- readChar(file_read, n_chan * 8)
  chan_units <- unlist(strsplit(chan_units, " "))
  chan_units <- chan_units[chan_units != ""]

  phys_mins <- readChar(file_read, n_chan * 8)
  phys_mins <- as.integer(unlist(strsplit(phys_mins, " ")))

  phys_max <- readChar(file_read, n_chan * 8)
  phys_max <- as.integer(unlist(strsplit(phys_max, "  ")))

  dig_mins <- readChar(file_read, n_chan * 8)
  dig_mins <- unlist(strsplit(gsub("-", " -", dig_mins), " "))
  dig_mins <- as.integer(dig_mins)
  dig_mins <- dig_mins[!is.na(dig_mins)]

  dig_max <- readChar(file_read, n_chan * 8)
  dig_max <- as.integer(unlist(strsplit(gsub("-", " -", dig_max), " ")))

  prefilt <- readChar(file_read, n_chan * 80)
  prefilt <- remove_empties(prefilt, "  ")

  n_samp_rec <- readChar(file_read, n_chan * 8) # sampling rate
  n_samp_rec <- remove_empties(n_samp_rec)

  reserved <- readChar(file_read, n_chan * 32)
  close(file_read)

  list(sys_type = sys_type,
       samp_bits = samp_bits,
       patient_id = patient_id,
       recording_id = recording_id,
       recording_time = recording_time,
       n_chans = n_chan,
       n_records = as.integer(n_records),
       record_dur = as.integer(dur_records),
       chan_labels = chan_labels,
       chan_types = chan_types,
       chan_units = chan_units,
       phys_mins = phys_mins,
       phys_max = phys_max,
       dig_mins = dig_mins,
       dig_max = dig_max,
       prefiltering = prefilt,
       srate = as.integer(n_samp_rec))
}

remove_empties <- function(strings, char_match = " ") {
  strings <- unlist(strsplit(strings, char_match))
  strings[strings != ""]
}

read_bdf_data <- function(file_name, headers) {
  sig_file <- file(file_name,
                   "rb")
  #skip headers
  start_pos <- (headers$n_chans + 1) * 256
  pos <- seek(sig_file,
              start_pos)

  bytes_per_rec <- headers$samp_bits / 8
  n_chans <- headers$n_chans

  sig_length <- headers$srate[[1]] * bytes_per_rec * n_chans # length of one record in bytes
  rec_length <- headers$srate[[1]]

  rec_size <- sig_length / 256 # calc size in kilobytes in memory

  records_per <- min(floor(20000 / rec_size), headers$n_records) # read up to 20000 kb at once

  gains <- (headers$phys_max - headers$phys_min) / (headers$dig_max - headers$dig_mins)
  offsets <- headers$phys_mins - gains * headers$dig_mins

  sig_out <- matrix(0,
                    nrow = headers$srate[[1]] * headers$n_records,
                    ncol = n_chans)

  chunk_size <- rec_length * records_per

  n_chunks <- floor(headers$n_records / records_per)

  remaining <- headers$n_records %% records_per
  first_record <- integer(sig_length * records_per)
  shifted <- integer(sig_length * records_per)

  for (records in 1:n_chunks) {
    first_record <- readBin(sig_file,
                            "raw",
                            n = sig_length * records_per,
                            endian = "little")

    shifted <- as.integer(first_record) * as.integer(2^c(0, 8, 16))
    shifted  <- array(shifted,
                      dim = c(3,
                              headers$srate[[1]],
                              n_chans,
                              records_per))
    shifted <- colSums(shifted)
    shifted <- matrix(aperm(shifted,
                            c(1, 3, 2)),
                      nrow = prod(dim(shifted)[c(1, 3)]))
    modifier <- chunk_size * (records - 1)
    sig_out[(1:chunk_size) + modifier, ] <- shifted
  }

  if (remaining > 0) {
    final_records <- readBin(sig_file,
                             "raw",
                             n = sig_length * remaining,
                             endian = "little")

    shifted <- as.integer(final_records) * as.integer(2^c(0, 8, 16))

    shifted  <- array(shifted, dim = c(3,
                                       headers$srate[[1]],
                                       n_chans,
                                       remaining))
    shifted <- colSums(shifted)
    modifier <- nrow(sig_out) - remaining * headers$srate[[1]] + 1
    sig_out[modifier:nrow(sig_out), ] <- matrix(aperm(shifted,
                                                      c(1, 3, 2)),
                                                nrow = prod(dim(shifted)[c(1, 3)]))
  }

  # Correct for sign
  sig_out <- sig_out - bitwShiftL(bitwShiftR(sig_out,
                                             23),
                                  24)
  sig_out[, 1:(n_chans - 1)] <- sig_out[, 1:(n_chans - 1)] * gains[1] + offsets[1]
  close(sig_file)
  sig_out
}
craddm/eegUtils documentation built on March 24, 2022, 9:17 a.m.