In celehs/PETLER: Multi-modal Automated Survival Time Annotation

Overview

This package re-implements the first step of the {MASTA} package to extract features from longitudinal encounter records. Compared to {MASTA}, the input data of {IFPCA} is more compact and memory efficent. Click HERE to view input data structure.

Installation

Load the package into R.

library(MASTA)
library(data.table)

Data Example

Data Preparation

url <- "https://raw.githubusercontent.com/celehs/IFPCA/master/data-raw/"
follow_up_train <- fread(paste0(url, "follow_up_train.csv"))[1:1000]
follow_up_train <- fread(paste0(url, "follow_up_train.csv"))[1:600] #---- labeled data only
follow_up_valid <- fread(paste0(url, "follow_up_valid.csv"))

fu_train <- follow_up_train$fu_time
fu_valid <- follow_up_valid$fu_time
names(fu_train) <- follow_up_train$id
names(fu_valid) <- follow_up_valid$id

#--- the number of longitudinal codes
number_of_codes = 3

D=list()
for (i in 1:number_of_codes){
time_code <- fread(paste0(url, "time_code",i,".csv"))
time <- time_code$month
names(time) <- time_code$id
idx = names(time) %in% c(names(fu_train),names(fu_valid))
D[[i]]=time[idx]
}

Run the 1st step (feature selection)

TrainFt = c()
ValidFt = c()
TrainPK = c()
ValidPK = c()
TrainN = c()
ValidN = c()

for (i in 1:length(D)){
 time=D[[i]]
 ans <- ifpca(time, fu_train, fu_valid)

#--- create an object for fitting --
Ft_name = colnames(ans$TrainFt) ; Ft_name = paste0(Ft_name,i) ; Ft_name ; colnames(ans$TrainFt) = Ft_name
TrainFt = cbind(TrainFt,ans$TrainFt)
Ft_name = colnames(ans$ValidFt) ; Ft_name = paste0(Ft_name,i) ; Ft_name ; colnames(ans$ValidFt) = Ft_name
ValidFt = cbind(ValidFt, ans$ValidFt)
TrainPK = cbind(TrainPK, ans$TrainPK) 
ValidPK = cbind(ValidPK, ans$ValidPK) 
if(i ==1 ){ TrainN  = ans$TrainN ; ValidN  = ans$ValidN ;}
if(i !=1 ){
  TrainN  = cbind(TrainN, ans$TrainN[,2]) 
  ValidN  = cbind(ValidN, ans$ValidN[,2]) 
}
}

colnames(TrainPK)=colnames(ValidPK) = paste0("pred",1:length(D)) 
colnames(TrainN)=colnames(ValidN) = c("id",paste0("pred",1:length(D),"_total"))

Z=list()
Z$TrainFt = TrainFt
Z$ValidFt = ValidFt
Z$TrainPK = TrainPK
Z$ValidPK = ValidPK
Z$TrainN = TrainN
Z$ValidN = ValidN

Data preparation for the 2nd step (fitting)

url <- "https://raw.githubusercontent.com/celehs/MASTA/master/data-raw/data_org/"
TrainSurv <- fread(paste0(url, "TrainSurv.csv"))
ValidSurv <- fread(paste0(url, "ValidSurv.csv"))
#load("data_org.rda")
TrainSurv <- data.frame(TrainSurv)
ValidSurv <- data.frame(ValidSurv)
colnames(TrainSurv)=colnames(ValidSurv) = c("case","delta","sx","sc",paste0("base_pred",1:3))

Z$nn = nrow(TrainSurv)
Z$codes = paste0("pred",1:3) 
Z$Tend <- 1
Z$TrainSurv <- TrainSurv
Z$ValidSurv <- ValidSurv 
Z$TrainSurv_pred_org <- TrainSurv[,-c(1:4)]
Z$ValidSurv_pred_org <- ValidSurv[,-c(1:4)]
str(Z)
save(Z,file="./data/data-for-fit.RData")