# Doc header --------------------------------------------------------------
# author: "Jan van den Brand, PhD"
# email: jan.vandenbrand@kuleuven.be
# project: NSN19OK003
# funding: Dutch Kidney Foundation
# Topic: Data edit
# 0: Preliminaries ------------------------------------------------------
# 1: Extract --------------------------------------------------------
d <- read_sas("data/tx_data_tot_1sep2019.sas7bdat",
NULL)
d <- as.data.frame(d)
d_gfr <- read_sas("data/egfr_tot1sep2019.sas7bdat", NULL)
d_gfr <- as.data.frame(d_gfr)
d_bx <- read_sas("data/all_biopsies_1sep2019.sas7bdat", NULL)
d_bx <- as.data.frame(d_bx)
# baseline data -----------------------
source('R/edit_baseline_data.R')
# GFR data ----
# unify variable names
names(d_gfr) <- tolower(names(d_gfr))
# calculate follow-up time
d_gfr <- d_gfr %>% mutate(time = difftime(date, txdate, units = "days")/30.4375)
attr(d_gfr$time, "units") <- "months"
maxtime <- d_gfr %>% group_by(transnr) %>% summarize(maxtime = max(as.numeric(time)))
d_gfr <- merge(d_gfr, maxtime, by = "transnr")
# Biopsy data ----
source("R/edit_bx_data.R")
# Merge ----
d <- d %>% dplyr::select(transnr, eadnr, txdate, repeat_tx, donor_age, donor_sex_m1,
donor_ld, donor_dcd, donor_dbd,
rec_age, rec_sex_m1, rec_race, rec_bmi_d0,
cit, abdr_antigen_mismatches,
abdrdq_antigen_mismatches, #dr/dq is specialized
pretx_hla_abs, overall_pretx_dsa, induction, # BSX
anastomosis_time_minutes, # optional - definitions not clear
primary_kd, txyear,
event, stime)
d <- d %>% mutate(donor_type = factor(
case_when(
donor_ld == "yes" ~ "Living",
donor_dcd == "yes" ~ "DCD",
donor_dbd == "yes" ~ "DBD"
)
)
)
d_gfr <- d_gfr %>% dplyr::select(transnr, eadnr, txdate, date, gfr, nf_protu, nf_procr)
# Check how many biopsies there are without creatinine values
nrow(d_gfr %>% right_join(d_bx, by = c("transnr" = "transnr", "date" = "biopsy_date")))
nrow(d_bx)
d_gfr <- d_gfr %>% left_join(d_bx, by = c("transnr" = "transnr", "date" = "biopsy_date"))
d_long <- d %>% full_join(d_gfr, by = c("transnr" = "transnr"))
# Check how many follow-up visits there are without creatinine values
nrow(d_long %>% filter(is.na(gfr)))
d_long <- d_long %>% rename(eadnr = eadnr.x,
txdate = txdate.x) %>%
dplyr::select(!ends_with(".y"))
# Select baseline data
d_bas <- d_long %>%
group_by(transnr) %>%
filter(date == min(date))
nrow(d_long[!is.na(d_long$biopsy_id),]) / nrow(d_long)
# only 3.3% of rows have associated biopsy values
rm(list = c("d", "d_gfr", "d_bx", "maxtime"))
# Transform ----
d_long <- d_long %>% mutate(
time = round(as.numeric((date - txdate)/30.4375), 3)
)
d_long_grouped <- d_long %>%
group_by(transnr) %>%
summarize(gfr_iqr = IQR(gfr, na.rm = TRUE),
gfr_median = median(gfr, na.rm = TRUE),
nf_protu_iqr = IQR(nf_protu, na.rm = TRUE),
nf_protu_median = median(nf_protu, na.rm = TRUE),
nf_procr_iqr = IQR(nf_procr, na.rm = TRUE),
nf_procr_median = median(nf_procr, na.rm = TRUE)
)
d_long <- d_long %>% merge(d_long_grouped, by = "transnr")
rm(d_long_grouped)
d_long <- d_long %>%
mutate(
gfr = case_when(
gfr < gfr_median - 3 * gfr_iqr ~ NA_real_,
gfr > gfr_median + 3 * gfr_iqr ~ NA_real_,
gfr > 200 ~ NA_real_,
TRUE ~ gfr),
nf_protu = case_when(
nf_protu < nf_protu_median - 3 * nf_protu_iqr ~ NA_real_,
nf_protu > nf_protu_median + 3 * nf_protu_iqr ~ NA_real_,
TRUE ~ nf_protu),
nf_procr = case_when(
nf_procr < nf_procr_median - 3 * nf_procr_iqr ~ NA_real_,
nf_procr > nf_procr_median + 3 * nf_procr_iqr ~ NA_real_,
nf_procr > 20 ~ NA_real_,
TRUE ~ nf_procr)
) %>%
dplyr::select(!ends_with("median") & !ends_with("iqr"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.