knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
TrainCode <- read_csv("data_org/TrainCode.csv")
ValidCode <- read_csv("data_org/ValidCode.csv")
TrainSurv <- read_csv("data_org/TrainSurv.csv")
ValidSurv <- read_csv("data_org/ValidSurv.csv")
f <- function(data, var) {
  data2 <- data %>% 
    mutate(code = substr(var, 5, 5)) %>%
    filter(.data[[var]] > 0) %>%
    select(code, id, time = month, .data[[var]]) %>%  
    slice(rep(1:n(), times = .data[[var]])) %>% 
    select(-.data[[var]])
  return(data2)
}
follow_up_time <- bind_rows(
  bind_cols(TrainCode[, 1:2], train_valid = 1), 
  bind_cols(ValidCode[, 1:2], train_valid = 2)) %>%
  distinct() 
follow_up_time
longitudinal <- bind_rows(
  f(TrainCode, "pred1"),
  f(TrainCode, "pred2"),
  f(TrainCode, "pred3"),  
  f(ValidCode, "pred1"),
  f(ValidCode, "pred2"),
  f(ValidCode, "pred3")) %>%
  arrange(code, id, time)
longitudinal
survival <- bind_rows(
  TrainSurv[,-4], 
  ValidSurv[,-4])
survival
write_csv(follow_up_time, "follow_up_time.csv")
write_csv(longitudinal, "longitudinal.csv")
write_csv(survival, "survival.csv")
usethis::use_data(follow_up_time, overwrite = TRUE)
usethis::use_data(longitudinal, overwrite = TRUE)
usethis::use_data(survival, overwrite = TRUE)

sample data for the validation with a new data set

set.seed(123)
tmp = follow_up_time$id[follow_up_time$train_valid == 2]
sid = sample(tmp, size=350)

wk1 = follow_up_time[follow_up_time$id %in% sid,]
new_follow_up_time=data.frame(id=wk1$id + 100000,fu_time=wk1$fu_time)

new_longitudinal = longitudinal[longitudinal$id %in% sid,]
new_longitudinal$id = new_longitudinal$id  + 100000

new_survival = survival[survival$id %in% sid,]
new_survival$id = new_survival$id  + 100000
usethis::use_data(new_follow_up_time, overwrite = TRUE)
usethis::use_data(new_longitudinal, overwrite = TRUE)
usethis::use_data(new_survival, overwrite = TRUE)
proc.time()


celehs/PETLER documentation built on Sept. 3, 2021, 8:21 a.m.