knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-",
  out.width = "100%"
)
options(tibble.print_min = 5, tibble.print_max = 5)

wranglEHR

pkgload::load_all()

Lifecycle Status R-CMD-check

Overview

wranglEHR is a data wrangling and cleaning tool for CC-HIC. It is designed to run against the CC-HIC EAV table structure (which at present exists in PostgreSQL and SQLite flavours). We are about to undergo a major rewrite to OHDSI CDM version 6, so this package will be in flux. Please see the R vignettes for further details on how to use the package to perform the most common tasks:

This package is designed to work in concert with inspectEHR which provides data quality evaluation for the CC-HIC.

Installation

# install directly from github with
remotes::install_github("DocEd/wranglEHR")
library(wranglEHR)

Usage

# Connect to the database (will use the internal test db)
ctn <- setup_dummy_db()

# Extract static variables. Rename on the fly.
dtb <- extract_demographics(
  connection = ctn,
  episode_ids = 1:10, # specify for episodes
  code_names = c("NIHR_HIC_ICU_0017", "NIHR_HIC_ICU_0019"),
  rename = c("height", "weight")
)

head(dtb)

# Extract time varying variables. Rename on the fly.
ltb <- extract_timevarying(
  ctn,
  episode_ids = 1:10,
  code_names = "NIHR_HIC_ICU_0108",
  rename = "hr")

head(ltb)

# Pull out to any arbitrary temporal resolution and custom define the
# behaviour for information recorded at resolution higher than you are sampling.
# only extract the first 24 hours of data

ltb_2 <- extract_timevarying(
  ctn,
  episode_ids = 1:10,
  code_names = "NIHR_HIC_ICU_0108",
  rename = "hr",
  cadence = 2, # 1 row every 2 hours
  coalesce_rows = mean, # use mean to downsample to our 2 hour cadence
  time_boundaries = c(0, 24)
  )

head(ltb_2)

## Don't forget to turn the lights out as you leave.
DBI::dbDisconnect(ctn)

Getting help

If you find a bug, please file a minimal reproducible example on github.


  1. https://www.ohdsi.org/analytic-tools/achilles-for-data-characterization/
  2. Kahn, Michael G.; Callahan, Tiffany J.; Barnard, Juliana; Bauck, Alan E.; Brown, Jeff; Davidson, Bruce N.; Estiri, Hossein; Goerg, Carsten; Holve, Erin; Johnson, Steven G.; Liaw, Siaw-Teng; Hamilton-Lopez, Marianne; Meeker, Daniella; Ong, Toan C.; Ryan, Patrick; Shang, Ning; Weiskopf, Nicole G.; Weng, Chunhua; Zozus, Meredith N.; and Schilling, Lisa (2016) "A Harmonized Data Quality Assessment Terminology and Framework for the Secondary Use of Electronic Health Record Data," eGEMs (Generating Evidence & Methods to improve patient outcomes): Vol. 4: Iss. 1, Article 18.


DocEd/wranglEHR documentation built on May 28, 2022, 1:50 p.m.