# Doc header --------------------------------------------------------------
# author: "Jan van den Brand, PhD"
# email: jan.vandenbrand@kuleuven.be
# project: NSN19OK003
# funding: Dutch Kidney Foundation
# Topic: Data exploration
# 0: Preliminaries ------------------------------------------------------
reqlib <- c("foreign", "lattice", "MASS", "gridExtra", "tidyr", "haven", "mice", "reshape2",
"visdat", "tableone", "lubridate")
lapply(reqlib, library, character.only = TRUE)
setwd("C:/Users/jajgv/Documents/repos/highdimjm")
source("R/mmm_functions.R")
# set seed for reproducibility
set.seed(20201013)
# 1: edit --------------------------------------------------------
source('R/edit.R')
source("R/create_codebook.R")
## Codebook for baseline data
varlabels <- lapply(d_bas[1:ncol(d_bas)], get_label)
classlabel <- lapply(d_bas[1:ncol(d_bas)], get_class)
varsum <- lapply(d_bas[1:ncol(d_bas)], get_summary)
codebook <- data.frame(
varname = names(d_bas),
varlabel = unlist(varlabels),
class = unlist(classlabel),
summary = paste(varsum)
)
write.table(codebook, file = "data/codebook_baseline.txt", sep = "\t", row.names = FALSE)
# 2: Visualization ----
# Pie/bar charts for categorical variables
factors <- names(classlabel)[classlabel == "factor"]
pdf(file = "plots/visualization_barcharts.pdf")
lapply(factors, make_bar_plot, data = d_bas)
dev.off()
# create histograms for continuous variables
markers <- names(classlabel)[classlabel == "numeric"]
pdf(file = "plots/visualization_histograms.pdf")
lapply(markers, make_histogram, data = d_bas)
dev.off()
# Spaghetti plots
markers <- c("gfr", "nf_protu", "nf_procr")
pdf(file = "plots/visualization_spaghetti.pdf")
lapply(markers, make_spaghetti_plots, data = d_long, ftime = "time", id = "transnr", breaks = 12)
dev.off()
# Missing data analysis -----------------
pdf(file = "plots/template_baseline_missingdata.pdf")
vis_miss(d_bas, cluster = TRUE)
dev.off()
# Impute baseline missing data. Use a single imputation for testing and MI for model implementation
init_mice <- mice(d_bas, maxit = 0, predictorMatrix = quickpred(d_bas, mincor = 0.2), print = FALSE)
predmat <- init_mice$predictorMatrix
# Exclude predictors
predmat[,c("transnr", "eadnr", "txdate", "date")] <- 0
imp_method <- init_mice$method
imp_bas <- mice(d_bas, method = imp_method, predictorMatrix = predmat, maxit = 10, m = 20)
# diagnostics
pdf(file = "plots/template_baseline_imputation_dx.pdf")
plot(imp_bas)
stripplot(imp_bas)
dev.off()
pdf(file = "plots/template_missingfudata.pdf")
vis_miss(d_long, warn_large_data = FALSE)
dev.off()
# 3: Inspect baseline data -----------------------
tablevars <- c("rec_age", "rec_sex_m1", "rec_race", "rec_bmi_d0", "primary_kd",
"donor_age", "donor_sex_m1", "donor_type",
"txyear", "abdr_antigen_mismatches", "abdrdq_antigen_mismatches",
"pretx_hla_abs", "overall_pretx_dsa", "induction",
"cit", "anastomosis_time_minutes", "current_dsa",
"t", "i", "g", "ah", "v", "cg", "ci", "ct", "cv", "mm", "ptcitis",
"trombi", "gs", "c4d_ptc", "c4d_gc", "c3d_ptc", "c3d_gc",
"stime", "event")
factorvars <- c("rec_sex_m1", "rec_race", "primary_kd", "donor_type", "donor_sex_m1",
"abdr_antigen_mismatches", "pretx_hla_abs", "overall_pretx_dsa", "induction",
"t", "i", "g", "ah", "v", "cg", "ci", "ct", "cv", "mm", "ptcitis",
"trombi", "c4d_ptc", "c4d_gc", "c3d_ptc", "c3d_gc")
d_bas <- d_bas %>% mutate(
across(all_of(factorvars), as.factor)
)
skewvars <- c("rec_age", "donor_age", "rec_bmi_d0", "anastomosis_time_minutes", "gs")
lapply(factorvars, make_bar_plot, data=d_bas, by="event")
lapply(skewvars, make_histogram, data=d_bas)
baseline_table <- CreateTableOne(vars=tablevars,
strata="event",
data=d_bas,
factorVars=factorvars)
baseline_table <- print(baseline_table, nonnormal = skewvars, formatOptions = list(big.mark = ","),
quote = FALSE, noSpaces = TRUE, printToggle = FALSE)
baseline_table
# 4: Inspect Follow-up data -----
d_long <- d_long %>% mutate(
across(all_of(factorvars), as.factor)
)
pdf("plots/template_followup.pdf")
lapply(factorvars, make_bar_plot, data=d_long, by="event")
dev.off()
survfit(Surv(d_bas$stime/365.25, as.integer(d_bas$event) > 1) ~ 1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.