R/edit.R

# Doc header --------------------------------------------------------------

# author: "Jan van den Brand, PhD"
# email: jan.vandenbrand@kuleuven.be
# project: NSN19OK003
# funding: Dutch Kidney Foundation
# Topic: Data edit

# 0: Preliminaries  ------------------------------------------------------

# 1: Extract --------------------------------------------------------

d <- read_sas("data/tx_data_tot_1sep2019.sas7bdat",
              NULL)
d <- as.data.frame(d)

d_gfr <- read_sas("data/egfr_tot1sep2019.sas7bdat", NULL)
d_gfr <- as.data.frame(d_gfr)

d_bx <- read_sas("data/all_biopsies_1sep2019.sas7bdat",  NULL)
d_bx <- as.data.frame(d_bx)

# baseline data -----------------------

source('R/edit_baseline_data.R')

# GFR data ----
# unify variable names 
names(d_gfr) <- tolower(names(d_gfr))

# calculate follow-up time
d_gfr <- d_gfr %>% mutate(time = difftime(date, txdate, units = "days")/30.4375)
attr(d_gfr$time, "units") <- "months"
maxtime <- d_gfr %>% group_by(transnr) %>% summarize(maxtime = max(as.numeric(time)))
d_gfr <- merge(d_gfr, maxtime, by = "transnr")

# Biopsy data ----
source("R/edit_bx_data.R")

# Merge ----
d <- d %>% dplyr::select(transnr, eadnr, txdate, repeat_tx, donor_age, donor_sex_m1,
                         donor_ld, donor_dcd, donor_dbd,
                         rec_age, rec_sex_m1, rec_race, rec_bmi_d0, 
                         cit, abdr_antigen_mismatches, 
                         abdrdq_antigen_mismatches, #dr/dq is specialized
                         pretx_hla_abs, overall_pretx_dsa, induction, # BSX
                         anastomosis_time_minutes, # optional - definitions not clear 
                         primary_kd, txyear,
                         event, stime)
d <- d %>% mutate(donor_type = factor( 
                    case_when(
                      donor_ld == "yes" ~ "Living",
                      donor_dcd == "yes" ~ "DCD",
                      donor_dbd == "yes" ~ "DBD"
                    ) 
                  )
)
d_gfr <- d_gfr %>% dplyr::select(transnr, eadnr, txdate, date, gfr, nf_protu, nf_procr)
# Check how many biopsies there are without creatinine values
nrow(d_gfr %>% right_join(d_bx, by = c("transnr" = "transnr", "date" = "biopsy_date")))
nrow(d_bx)
d_gfr <- d_gfr %>% left_join(d_bx, by = c("transnr" = "transnr", "date" = "biopsy_date"))
d_long <- d %>% full_join(d_gfr, by = c("transnr" = "transnr"))
# Check how many follow-up visits there are without creatinine values
nrow(d_long %>% filter(is.na(gfr)))
d_long <- d_long %>% rename(eadnr = eadnr.x,
                            txdate = txdate.x) %>% 
  dplyr::select(!ends_with(".y"))
# Select baseline data
d_bas <- d_long %>% 
  group_by(transnr) %>%
  filter(date == min(date))
nrow(d_long[!is.na(d_long$biopsy_id),]) / nrow(d_long) 
# only 3.3% of rows have associated biopsy values
rm(list = c("d", "d_gfr", "d_bx", "maxtime"))

# Transform ---- 
d_long <- d_long %>% mutate(
  time = round(as.numeric((date - txdate)/30.4375), 3)
)

d_long_grouped <- d_long %>% 
  group_by(transnr) %>%
  summarize(gfr_iqr = IQR(gfr, na.rm = TRUE),
            gfr_median = median(gfr, na.rm = TRUE),
            nf_protu_iqr = IQR(nf_protu, na.rm = TRUE),
            nf_protu_median = median(nf_protu, na.rm = TRUE),
            nf_procr_iqr = IQR(nf_procr, na.rm = TRUE),
            nf_procr_median = median(nf_procr, na.rm = TRUE)
  ) 
d_long <- d_long %>% merge(d_long_grouped, by = "transnr")
rm(d_long_grouped)
d_long <- d_long %>%
  mutate(
    gfr = case_when(
      gfr < gfr_median - 3 * gfr_iqr ~ NA_real_,
      gfr > gfr_median + 3 * gfr_iqr ~ NA_real_,
      gfr > 200 ~ NA_real_,
      TRUE ~ gfr),
    nf_protu = case_when(
      nf_protu < nf_protu_median - 3 * nf_protu_iqr ~ NA_real_,
      nf_protu > nf_protu_median + 3 * nf_protu_iqr ~ NA_real_,
      TRUE ~ nf_protu),
    nf_procr = case_when(
      nf_procr < nf_procr_median - 3 * nf_procr_iqr ~ NA_real_,
      nf_procr > nf_procr_median + 3 * nf_procr_iqr ~ NA_real_,
      nf_procr > 20 ~ NA_real_,
      TRUE ~ nf_procr)
  ) %>% 
  dplyr::select(!ends_with("median") & !ends_with("iqr"))
JanvandenBrand/highdimjm documentation built on Dec. 18, 2021, 12:32 a.m.