R/ecg_detect_beats.R

Defines functions ecg_detect_beats

Documented in ecg_detect_beats

#' This function accepts a single ecg trace already scaled to millivolts, then runs
#' the ECG trace through one or more WFDB QRS detection algorithms, which tend to detect heart beats with
#' high fidelity, even in noisy data.
#'
#' @param ecg_trace This is the ECG trace that should be passed to WFDB. If a numeric
#'   vector is provided, then this will written to a temporary file and passed through
#'   WFDB. If a readable file is passed in, we assume that this is a single column file
#'   containing scaled ECG values. This file is read in, then processed through WFDB.
#' @param freq The acquisition frequency in Hz. Default is 1000.
#' @param wfdb_out_file The prefix of WFDB outputs generated by processing. Defaults to a temporary file
#' @param detectors A vector of one or more WFDB QRS detection algorithms to be applied. Options
#'   are c("gqrs", "wqrs", "sqrs", "ecgpuwave").
#' @param flags A named list of arguments passed to QRS detection algorithms. These should not
#'   include the call for what record to output (-r <out_name>).
#' @param correct_peaks If TRUE, the detected QRS complexes are passed to a peak correction
#'   algorithm (ported from wfdb-python) that shifts the detected events to the local maximum
#'   within a small time window. This allows for small adjustments that can overcome algorithm failures
#'   or, more commonly, that shift the events to the R spikes for plotting convenience instead of the Q onset.
#' @param wfdb_path location of the WFDB bin directory on this computer.
#' @param signal_mult Value to multiply signal values by before importation by wrsamp
#' @param adc_gain Gain used internally by WFDB to convert raw units to millivolts
#'
#' @details
#'   Note that WFDB is very picky about the analog-digital converter gain settings and
#'   can produce weird and invalid outputs if the input values are converted properly
#'   by wrsamp. In particular, if small or fractional values are provided in the first place,
#'   wrsamp will convert all of these to integers, resulting in all (or nearly all) zero values.
#'
#'   To work around this problem, we apply an x 1000 transformation to the input values and
#'   then also store a gain of 1000 in the header of the wfdb file. This keeps the millivolt
#'   scaling of the original data, but allows wfdb to process the data as expected.
#'
#'   Also note that the gqrs algorithm and other WFDB QRS detectors (I think) tend to return
#'   annotations for when the QRS complex begins, not when the spike occurs. If you want
#'   the annotations to have the R spike, turn on correct_peaks, which will pass the detected
#'   QRS complexes through the correct_peaks function (ported from wfdb-python).
#'
#' @return A list containing two data.table objects. $beat_data contains beat times, positions,
#'   HR, and RR for each detector. Shifted peaks are denoted with the suffix '_shift'.
#'   $ecg_df is a data.table containing the time and ecg trace of the raw data.
#'
#' @examples
#'  \dontrun{
#'    ecg_data <- ecg_detect_beats("ecg_027_full.txt.bz2", detectors="gqrs")
#'  }
#' @importFrom RHRV CreateHRVData SetVerbose LoadBeatVector BuildNIHR InterpolateNIHR
#' @importFrom data.table data.table fread
#' @author Michael Hallquist
#' @export
ecg_detect_beats <- function(ecg_trace, freq=1000, wfdb_out_file=file.path(tempdir(), "ecg_out"),
                             detectors=c("wqrs", "gqrs", "ecgpuwave", "sqrs"),
                             flags=list(gqrs="-o gqrs -m 1.0",
                                        wqrs="-R -m 100",
                                        ecgpuwave="-a ecgpuwave -n 1",
                                        sqrs="-m 500"),
                             correct_peaks=TRUE, wfdb_path="/usr/local/wfdb/bin",
                             signal_mult=1000,
                             adc_gain=1000) {

  checkmate::assert_number(freq)
  checkmate::assert_subset(detectors, c("wqrs", "gqrs", "ecgpuwave", "sqrs"))
  checkmate::assert_logical(correct_peaks)
  checkmate::assert_directory_exists(wfdb_path)
  checkmate::assert_file_exists(file.path(wfdb_path, "wrsamp"), access="rx") #does the wrsamp command exist?
  checkmate::assert_integerish(signal_mult)
  checkmate::assert_integerish(adc_gain)

  if (is.character(ecg_trace)) {
    ecg_raw <- data.table::fread(ecg_trace)
  } else if (is.vector(ecg_trace)) {
    ecg_raw <- data.table(ecg=ecg_trace) #function was passed in data
  } else { error("Cannot interpret input ecg_trace.") }

  ecg_raw[, "time" := seq(0, by=1/freq, length.out=nrow(ecg_raw))]
  setnames(ecg_raw, c("ecg", "time"))

  #not needed for anything
  #ecg_raw[, "rownum" := 1:nrow(ecg_raw)]
  #setnames(ecg_raw, c("ecg", "time", "rownum"))

  setkey(ecg_raw, time) #index on time (speeds up merge with event data, if needed)

  #wfdb likes to output files to a local target
  curwd <- getwd()
  setwd(file.path(dirname(wfdb_out_file)))
  on.exit(setwd(curwd))

  #import original data into wfdb format
  #always write to file before passing to wrsamp (in case it was vector or compressed file to begin with)
  data.table::fwrite(ecg_raw[,"ecg"], file = file.path(dirname(wfdb_out_file), "ecg_input.txt"))
  system(paste0(wfdb_path, "/wrsamp -F ", freq, " -G ", adc_gain, " -x ", signal_mult,
                " -i ", file.path(dirname(wfdb_out_file), "ecg_input.txt"), " -o ", basename(wfdb_out_file)))

  #small helper subfunction to get a data.table from RHRV containing HR and RR for a set of beat times
  get_hrv_dt <- function(beat_times, freq) {
    hrv.data = CreateHRVData()
    hrv.data = SetVerbose(hrv.data, TRUE)
    hrv.data = LoadBeatVector(hrv.data, beat_times)
    hrv.data = BuildNIHR(hrv.data)
    hrv.data = InterpolateNIHR(hrv.data, freqhr = freq)
    beat_times <- data.table(hrv.data$Beat)
    setnames(beat_times, c("time", "niHR", "RR"))

    #get time codes on exact grid that matches raw data so that any merge with raw data works
    beat_times[, "time":=plyr::round_any(time, 1/freq)]
    setkey(beat_times, time)
    return(beat_times)
  }

  #helper subfunction to import WFDB beats, compute HR and RR and index by time
  #Note: RHRV::LoadBeatWFDB doesn't do well with sqrs annotations -- times are wrong!
  #Thus, move away from its import function in favor of our internal import_wfdb_annotations
  import_wfdb_beats <- function(wfdb_out_file, annotator, freq, wfdb_bin = "/usr/local/wfdb/bin") {
    ann_df <- import_wfdb_annotations(wfdb_out_file, annotator, elapsed = TRUE, channel = 0L,
                                      type = "N", wfdb_path = wfdb_bin)

    beat_times <- get_hrv_dt(ann_df$time_sec, freq)

    #add beat positions (in terms of ECG trace vector) back to data.table
    beat_times[, "pos" := ann_df$pos]
    beat_times[, "annotator" := annotator] #tag source of annotation

    return(beat_times)
  }

  #helper subfunction to add peak-shifted columns
  add_peak_correction <- function(ecg_df, beat_list, annotator, freq)  {
    #message("Correcting peaks post-gqrs detection")

    #Use bpm = 200 and allow a maximum shift of the half the period
    # of a typical IBI. So, at 1000 Hz, we'd say an IBI of 300ms is typical for
    # BPM=200. Thus, the shift can be up to 150 samples.
    shift_allowance <- (60*freq) / 200
    shifted_peaks <- correct_peaks(ecg_df$ecg, beat_list[[annotator]]$pos,
                                   search_radius = shift_allowance,
                                   smooth_window_size=shift_allowance,
                                   peak_dir='up')

    if (any(duplicated(shifted_peaks))) {
      which_dupes <- which(duplicated(shifted_peaks))
      message("Duplicate beats occurred after correct_peaks. These are probably false positives in the annotator algorithm.")
      message("Dropping these from the shifted annotation.")
      message("Possible false beats at times: ", paste(round(ecg_df$time[shifted_peaks[which_dupes]], 2), collapse = ", "))
      shifted_peaks <- shifted_peaks[-which_dupes]
    }

    beat_times <- get_hrv_dt(ecg_df$time[shifted_peaks], freq = freq)
    
    beat_times[, "pos" := shifted_peaks]
    beat_times[, "annotator" := paste0(annotator, "_shift")] #tag source of annotation

    beat_list[[paste0(annotator, "_shift")]] <- beat_times

    return(beat_list)
  }

  beat_list <- list() #for storing beat annotations

  #loop over WFDB detectors
  for (dd in detectors) {
    checkmate::assert_file_exists(file.path(wfdb_path, dd))
    system(paste0(wfdb_path, "/", dd, " -r ", basename(wfdb_out_file), " ", flags[[dd]]))

    #sqrs always forces the extension to .qrs, which generates collisions and confusion
    if (dd=="sqrs") { file.rename(paste0(wfdb_out_file, ".qrs"), paste0(wfdb_out_file, ".sqrs")) }
    beat_list[[dd]] <- import_wfdb_beats(wfdb_out_file, dd, freq, wfdb_bin = wfdb_path)

    #add shifted peaks to beat events list
    if (isTRUE(correct_peaks)) { beat_list <- add_peak_correction(ecg_raw, beat_list, dd, freq) }
  }

  return(list(beat_data=do.call(rbind, beat_list), ecg_df = ecg_raw))
}
PennStateDEPENdLab/experiment.pipeline documentation built on April 14, 2024, 9:37 p.m.