R/edit_bx_data.R

# Doc header --------------------------------------------------------------

# author: "Jan van den Brand, PhD"
# email: jan.vandenbrand@kuleuven.be
# project: NSN19OK003
# funding: Dutch Kidney Foundation

# biopsy data -------------------------------------------------------------
d_bx <- d_bx %>% dplyr::select(transnr, eadnr, txdate, time_point, biopsy_date, 
                               `Biopsy ID`, `Status biopsy`, `Biopsy quality`,
                               t, i, g, ah, v, cg, ci, ct, cv, mm, ptcitis, trombi,
                               ATN, `n glomeruli`, gs, gs_grade, GNF, `PVAN grade (0 - A-B-C)`,
                               `C4d PTC`, `C4d GC`, `C3d PTC`, `C3d GC`, IFTA_grade,
                               CADI_score, mvi, hum_rej_score_g_ptc_cg_v_C4d,
                               cell_rej_score_t_i_v, TCMR_zonder_border, TCMR_met_border,
                               ABMRh_BANFF_2019, current_dsa)
names(d_bx) <- str_replace_all(names(d_bx), pattern = " ", replacement = "_")
names(d_bx) <- tolower(names(d_bx))
d_bx <- d_bx %>% 
  mutate(biopsy_quality = 
           factor( 
             case_when(
               str_starts(tolower(biopsy_quality), "a") == TRUE ~ "sufficient",
               tolower(biopsy_quality) == "" ~ NA_character_,
               TRUE ~ "insufficient"
             )
           ),
         pvan = case_when(
           `pvan_grade_(0_-_a-b-c)` == 0 ~ 0, 
           `pvan_grade_(0_-_a-b-c)` == "" ~ NA_real_,
           TRUE ~ 1)
  )
# encode character strings
for (i in 1:length(colnames(d_bx))) {
  if (typeof(d_bx[,colnames(d_bx)[i]]) == "character" ) { 
    print(paste(colnames(d_bx)[i]))
    d_bx[,colnames(d_bx)[i]] <- factor(d_bx[,colnames(d_bx)[i]], exclude = "")
  }
}
# create factors for biospy descriptions
# bx_params <- c("t", "i", "g", "ah", "v", "cg", "ci", "ct", "cv", "mm", "ptcitis",
#                "trombi", "atn", "gs_grade", "c4d_ptc", "c4d_gc", "c3d_ptc",
#                "c3d_gc", "ifta_grade", "cadi_score", "mvi",
#                "hum_rej_score_g_ptc_cg_v_c4d", "cell_rej_score_t_i_v",
bx_params <- c("tcmr_zonder_border", "tcmr_met_border",
               "abmrh_banff_2019", "current_dsa", "pvan")
d_bx <-  d_bx %>% mutate(across(all_of(bx_params), as.factor))

# Select protocol baseline biopsies
# d_bx_bas <- d_bx %>% 
#   filter(time_point == "MONTH_00") %>%
#   select(transnr, biopsy_date, biopsy_id, 
#          t, i, g, ah, v, cg, ci, ct, cv, mm, ptcitis, trombi, 
#          gs, c4d_ptc, c4d_gc, c3d_ptc, c3d_gc, tcmr_zonder_border, tcmr_met_border,
#          abmrh_banff_2019, current_dsa, pvan)
# Select and rename follow-up biopsies
d_bx <- d_bx %>%
  dplyr::select(transnr, biopsy_date, biopsy_id, biopsy_quality,
         t, i, g, ah, v, cg, ci, ct, cv, mm, ptcitis, trombi,
         gs, c4d_ptc, c4d_gc, c3d_ptc, c3d_gc, tcmr_zonder_border, tcmr_met_border,
         abmrh_banff_2019, current_dsa, pvan)
JanvandenBrand/highdimjm documentation built on Dec. 18, 2021, 12:32 a.m.