R/edit_baseline_data.R

# Doc header --------------------------------------------------------------

# author: "Jan van den Brand, PhD"
# email: jan.vandenbrand@kuleuven.be
# project: NSN19OK003
# funding: Dutch Kidney Foundation


# Transform baseline data  ------------------------------------------------------

# unify variable names 
names(d) <- tolower(names(d))

# Collapse primary diagnoses into larger categories
d$era_edta_primary_renal_diagnosis <- tolower(d$era_edta_primary_renal_diagnosis)
# see https://nefrovisie.nl/wp-content/uploads/2016/08/renine_codes_primaire_diagnose.pdf
d <- d %>% mutate(
  primary_kd = case_when(
    grepl("glomerulo",era_edta_primary_renal_diagnosis) ~ "gn",
    grepl("iga",era_edta_primary_renal_diagnosis) ~ "gn",
    grepl("membranous",era_edta_primary_renal_diagnosis) ~ "gn",
    grepl("pyelo",era_edta_primary_renal_diagnosis) ~ "tin",
    grepl("analgesic",era_edta_primary_renal_diagnosis) ~ "tin",
    grepl("ciclosporin",era_edta_primary_renal_diagnosis) ~ "tin",
    grepl("cisplatin",era_edta_primary_renal_diagnosis) ~ "tin",
    grepl("tubulointerstitial",era_edta_primary_renal_diagnosis) ~ "tin",
    grepl("cyst",era_edta_primary_renal_diagnosis) ~ "cystic",
    grepl("congenital",era_edta_primary_renal_diagnosis) ~ "congenital",
    grepl("familial",era_edta_primary_renal_diagnosis) ~ "congenital", # includes familial FSGS
    grepl("prune",era_edta_primary_renal_diagnosis) ~ "congenital",
    grepl("hypoplasia",era_edta_primary_renal_diagnosis) ~ "congenital",
    grepl("oligomeganephronia",era_edta_primary_renal_diagnosis) ~ "congenital",
    grepl("hypertens",era_edta_primary_renal_diagnosis) ~ "vascular",
    grepl("(ckd)",era_edta_primary_renal_diagnosis) ~ "vascular", # includes nephrectomy and trauma
    grepl("renal failure",era_edta_primary_renal_diagnosis) ~ "vascular",
    grepl("diabetic",era_edta_primary_renal_diagnosis) ~ "diabetes",
    grepl("polyangiitis",era_edta_primary_renal_diagnosis) ~ "systemic",
    grepl("henoch",era_edta_primary_renal_diagnosis) ~ "systemic",
    grepl("polyarteritis",era_edta_primary_renal_diagnosis) ~ "systemic",
    grepl("lupus",era_edta_primary_renal_diagnosis) ~ "systemic",
    grepl("amyloid",era_edta_primary_renal_diagnosis) ~ "systemic",
    grepl("goodpasture",era_edta_primary_renal_diagnosis) ~ "systemic",
    grepl("hus",era_edta_primary_renal_diagnosis) ~ "systemic",
    TRUE ~ "other"
  )
)

# encode character strings
for (i in 1:length(colnames(d))) {
  if (typeof(d[,colnames(d)[i]]) == "character" ) { 
    print(paste(colnames(d)[i]))
    d[,colnames(d)[i]] <- factor(d[,colnames(d)[i]], exclude = "")
  }
}

# encode other factor variables
d <- d %>% mutate(repeat_tx = factor(repeat_tx, labels = c("no", "yes")),
                  donor_sex_m1 = factor(donor_sex_m1, labels = c("male", "female")),
                  donor_ld = factor(donor_ld, labels = c("no", "yes")),
                  donor_dcd = factor(donor_dcd, labels = c("no", "yes")),
                  donor_dbd = factor(donor_dbd, labels = c("no", "yes")),
                  rec_sex_m1 = factor(rec_sex_m1, labels = c("male", "female")),
                  pretx_hla_abs = factor(pretx_hla_abs, labels = c("no", "yes")),
                  iga_or_mesangial_proliferative_g = factor(iga_or_mesangial_proliferative_g, labels = c("no", "yes")),
                  antigen_mismatch_a = factor(antigen_mismatch_a),
                  antigen_mismatch_b = factor(antigen_mismatch_b),
                  antigen_mismatch_c = factor(antigen_mismatch_c),
                  antigen_mismatch_drb1 = factor(antigen_mismatch_dqb1),
                  antigen_mismatch_dr345 = factor(antigen_mismatch_dr345),
                  antigen_mismatch_dqb1 = factor(antigen_mismatch_drb1),
                  mismatch_1st_field_dpa1 = factor(mismatch_1st_field_dqa1),
                  mismatch_1st_field_dpb1 = factor(mismatch_1st_field_dpb1),
                  mismatch_1st_field_dqa1 = factor(mismatch_1st_field_dpb1),
                  jkmatch_1_jkmismatch_2 = factor(jkmatch_1_jkmismatch_2),
                  jkmatch_1_jkmm_3_jkbmm_4 = factor(jkmatch_1_jkmm_3_jkbmm_4),
                  fymatch_1_fymismatch_2 = factor(fymatch_1_fymismatch_2),
                  fymatch_1_fyamm_3_fybmm_4 = factor(fymatch_1_fyamm_3_fybmm_4),
                  jkfymatch_2_jkoffymm_3_jkfymm_4 = factor(jkfymatch_2_jkoffymm_3_jkfymm_4),
                  pretx_hla_abs = factor(pretx_hla_abs),
                  pretx_hla_i_abs = factor(pretx_hla_i_abs),
                  pretx_hla_ii_abs = factor(pretx_hla_ii_abs),
                  pretx_a_abs = factor(pretx_a_abs),
                  pretx_b_abs = factor(pretx_b_abs),
                  pretx_cw_abs = factor(pretx_cw_abs),
                  pretx_dr_abs = factor(pretx_dr_abs),
                  pretx_dq_abs = factor(pretx_dq_abs),
                  pretx_dp_abs = factor(pretx_dp_abs),
                  overall_pretx_dsa = factor(overall_pretx_dsa),
                  pretx_class_i_dsa = factor(pretx_class_i_dsa),
                  pretx_dsa_a = factor(pretx_dsa_a),
                  pretx_dsa_b = factor(pretx_dsa_b),
                  pretx_dsa_c = factor(pretx_dsa_c),
                  pretx_class_ii_dsa = factor(pretx_class_ii_dsa),
                  pretx_dsa_dr = factor(pretx_dsa_dr),
                  pretx_dsa_dq = factor(pretx_dsa_dq),
                  pretx_dsa_dp = factor(pretx_dsa_dp),
                  overall_dndsa = factor(overall_dndsa),
                  dndsa_class_i = factor(dndsa_class_i),
                  dndsa_class_ii = factor(dndsa_class_ii),
                  dsa_at_3months_posttx = factor(dsa_at_3months_posttx),
                  dsa_at_1year_posttx = factor(dsa_at_1year_posttx),
                  induction = factor(induction, labels = c("no", "yes")),
                  dgf = factor(dgf, labels = c("no", "yes")),
                  txyear = year(txdate)
)

# create outcome
d <- d %>% mutate(
  event = 
    factor(case_when(
      is.na(date_graft_failure) & is.na(date_rec_death) ~ 0, # censored
      (date_graft_failure < date_rec_death) | is.na(date_rec_death) ~ 1, # graft failure
      (date_rec_death <= date_graft_failure) | is.na(date_graft_failure) ~ 2), # death
    labels = c("censored", "graft failure", "death")),
  stime = case_when(
    is.na(date_graft_failure) & is.na(date_rec_death) ~ laatste_follow_up - txdate,
    (date_graft_failure < date_rec_death) | is.na(date_rec_death) ~ date_graft_failure - txdate,
    (date_rec_death <= date_graft_failure) | is.na(date_graft_failure) ~ date_rec_death - txdate
  )
)

# drop identifiers
drop <- names(dplyr::select(d,starts_with("date")))
drop <- c(drop, "gebdat_all")
d <- d[, (!names(d) %in% drop)]

# drop uninformative and redundant variables
drop <- c("combined_tx", # all 0
          "surv_time", # == stime
          "death_censor", # ~ event
          "nbvndiag", # primary_kd
          "era_edta_primary_renal_diagnosis" # collapsed into primary_kd
)
d <- d[, (!names(d) %in% drop)]
JanvandenBrand/highdimjm documentation built on Dec. 18, 2021, 12:32 a.m.