Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----eval = FALSE-------------------------------------------------------------
# install.packages("healthdb")
## ----setup, message=FALSE-----------------------------------------------------
library(dplyr)
library(dbplyr)
library(lubridate)
library(glue)
library(purrr)
library(healthdb)
## -----------------------------------------------------------------------------
# make_test_dat() makes either a toy data.frame or database table in memory with known number of rows that satisfy the query we will show later
claim_db <- make_test_dat(vals_kept = c("303", "304", "305", "291", "292", glue("30{30:59}"), glue("29{10:29}"), noise_val = c("999", "111")), type = "database")
# this is a database table
# note that in-memory SQLite database stores dates as numbers
claim_db %>% head()
## -----------------------------------------------------------------------------
hosp_df <- make_test_dat(vals_kept = c(glue("F{10:19}"), glue("F{100:199}"), noise_val = "999"), type = "data.frame")
# this is a local data.frame/tibble
hosp_df %>% head()
# convert Date to numeric to be consistent with claim_db
hosp_df <- hosp_df %>%
mutate(dates = julian(dates))
## ----eval=FALSE---------------------------------------------------------------
# ## not run
# claim_db %>%
# # identify the target codes
# filter(if_any(starts_with("diagx"), ~ str_like(., c("291%", "292%", "303%", "304%", "305%")))) %>%
# # each clnt has at least 2 records on different dates
# group_by(clnt_id) %>%
# # the n_distinct step is mainly for reducing computation in the next step
# filter(n_distinct(dates) >= 2) %>%
# # any two dates within one year?
# filter((max(dates) - min(dates)) <= 365)
# ## end
## -----------------------------------------------------------------------------
result1 <- claim_db %>%
identify_row(
vars = starts_with("diagx"),
match = "start",
vals = c(291:292, 303:305)
)
## -----------------------------------------------------------------------------
result2 <- result1 %>%
exclude(
excl = identify_row(claim_db, starts_with("diagx"), "in", "111"),
by = "clnt_id"
)
## -----------------------------------------------------------------------------
result3 <- result2 %>% restrict_n(
clnt_id = clnt_id,
n_per_clnt = 2,
count_by = dates,
# here we use filter mode to remove records that failed the restriction
mode = "filter"
)
## -----------------------------------------------------------------------------
result4 <- result3 %>% restrict_date(
clnt_id = clnt_id,
date_var = dates,
n = 2,
within = 365,
uid = uid,
# here we use flag mode to flag records that met the restriction instead of removing those
mode = "flag"
)
## -----------------------------------------------------------------------------
# Class of result4
class(result4)
# execute query and download the result
result_df <- result4 %>% collect()
# Number of rows in source
nrow(claim_db %>% collect())
# Number of rows in the current result
nrow(result_df)
## -----------------------------------------------------------------------------
# make two look up tables
age_tab <- data.frame(
clnt_id = 1:50,
age = sample(1:90, 50),
sex = sample(c("F", "M"), 50, replace = TRUE)
)
address_tab <- data.frame(
clnt_id = rep(1:50, 5), year = rep(2016:2020, each = 50),
area_code = sample(0:200, 50, replace = TRUE)
)
# get year from dates for matching
result_df <- result_df %>% mutate(year = lubridate::year(as.Date(dates, origin = "1970-01-01")))
# note that keys must be present in all tables
result_df %>%
fetch_var(
keys = c(clnt_id, year),
linkage = list(
# the formula means from_table ~ get_variable
# |clnt_id means matching on clnt_id only
age_tab ~ c(age, sex) | clnt_id,
address_tab ~ area_code
)
) %>%
select(uid, clnt_id, dates, age, sex, area_code) %>%
head()
## -----------------------------------------------------------------------------
# build the full definition of SUD
sud_def <- build_def(
# name of definition
def_lab = "SUD",
# place holder names for sources
src_labs = c("claim", "hosp"),
def_fn = define_case, # you could alter it and supply your own function
# below are argumets of define_case
fn_args = list(
# if length = 1, the single element will be use for every source
vars = list(starts_with("diagx")),
match = "start", # match ICD starts with vals
vals = list(c(291:292, 303:305), glue("F{10:19}")),
clnt_id = clnt_id,
n_per_clnt = c(2, 1),
date_var = dates,
within = c(365, NULL),
uid = uid,
mode = "flag"
)
)
sud_def
## -----------------------------------------------------------------------------
sud_def$fn_call
## -----------------------------------------------------------------------------
# execute the definition
result_list <- sud_def %>%
execute_def(with_data = list(
claim = claim_db,
hosp = hosp_df
))
## -----------------------------------------------------------------------------
# view the results
purrr::walk(result_list, ~ head(.) %>% print())
## -----------------------------------------------------------------------------
bind_source(result_list,
# output_name = c(names in the list elements)
src = "src",
uid = "uid",
clnt_id = "clnt_id",
flag = c("flag_restrict_date", NA),
# force_proceed is needed to collect remote tables to local memory
force_proceed = TRUE
)
## -----------------------------------------------------------------------------
pool_case(result_list,
def = sud_def,
# your could skip summary with output_lvl = "raw"
output_lvl = "clnt",
# include records only from sources having valid records, see function documentation for more detail and other options
include_src = "has_valid",
force_proceed = TRUE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.