This package re-implements the first step of the {MASTA} package to extract features from longitudinal encounter records. Compared to {MASTA}, the input data of {IFPCA} is more compact and memory efficent. Click HERE to view input data structure.
Install development version from GitHub.
## install.packages("remotes") remotes::install_github("celehs/IFPCA", force=TRUE)
Load the package into R.
library(data.table) library(IFPCA)
url <- "https://raw.githubusercontent.com/celehs/IFPCA/master/data-raw/" follow_up_train <- fread(paste0(url, "follow_up_train.csv"))[1:1000] follow_up_train <- fread(paste0(url, "follow_up_train.csv"))[1:600] #---- labeled data only follow_up_valid <- fread(paste0(url, "follow_up_valid.csv")) fu_train <- follow_up_train$fu_time fu_valid <- follow_up_valid$fu_time names(fu_train) <- follow_up_train$id names(fu_valid) <- follow_up_valid$id #--- the number of longitudinal codes number_of_codes = 3 D=list() for (i in 1:number_of_codes){ time_code <- fread(paste0(url, "time_code",i,".csv")) time <- time_code$month names(time) <- time_code$id idx = names(time) %in% c(names(fu_train),names(fu_valid)) D[[i]]=time[idx] }
TrainFt = c() ValidFt = c() TrainPK = c() ValidPK = c() TrainN = c() ValidN = c() for (i in 1:length(D)){ time=D[[i]] ans <- ifpca(time, fu_train, fu_valid) #--- create an object for fitting -- Ft_name = colnames(ans$TrainFt) ; Ft_name = paste0(Ft_name,i) ; Ft_name ; colnames(ans$TrainFt) = Ft_name TrainFt = cbind(TrainFt,ans$TrainFt) Ft_name = colnames(ans$ValidFt) ; Ft_name = paste0(Ft_name,i) ; Ft_name ; colnames(ans$ValidFt) = Ft_name ValidFt = cbind(ValidFt, ans$ValidFt) TrainPK = cbind(TrainPK, ans$TrainPK) ValidPK = cbind(ValidPK, ans$ValidPK) if(i ==1 ){ TrainN = ans$TrainN ; ValidN = ans$ValidN ;} if(i !=1 ){ TrainN = cbind(TrainN, ans$TrainN[,2]) ValidN = cbind(ValidN, ans$ValidN[,2]) } } colnames(TrainPK)=colnames(ValidPK) = paste0("pred",1:length(D)) colnames(TrainN)=colnames(ValidN) = c("id",paste0("pred",1:length(D),"_total")) Z=list() Z$TrainFt = TrainFt Z$ValidFt = ValidFt Z$TrainPK = TrainPK Z$ValidPK = ValidPK Z$TrainN = TrainN Z$ValidN = ValidN
url <- "https://raw.githubusercontent.com/celehs/MASTA/master/data-raw/data_org/" TrainSurv <- fread(paste0(url, "TrainSurv.csv")) ValidSurv <- fread(paste0(url, "ValidSurv.csv")) #load("data_org.rda") TrainSurv <- data.frame(TrainSurv) ValidSurv <- data.frame(ValidSurv) colnames(TrainSurv)=colnames(ValidSurv) = c("case","delta","sx","sc",paste0("base_pred",1:3)) Z$nn = nrow(TrainSurv) Z$codes = paste0("pred",1:3) Z$Tend <- 1 Z$TrainSurv <- TrainSurv Z$ValidSurv <- ValidSurv Z$TrainSurv_pred_org <- TrainSurv[,-c(1:4)] Z$ValidSurv_pred_org <- ValidSurv[,-c(1:4)] str(Z)
library(survival) library(doParallel) library(foreach) library(data.table) library(survC1) library(rootSolve) library(splines) library(gglasso) library(glmnet) library(rpart) library(rpart.utils) url <- "https://raw.githubusercontent.com/celehs/MASTA/master/R/" source(paste0(url,"masta-fit.R")) source(paste0(url,"masta-fit-bs-grplasso.R")) source(paste0(url,"masta-fit-npmle.R")) source(paste0(url,"masta-fit-cheng.R")) source(paste0(url,"MASTA.R"))
#-- This is gives an error with N=600 ---- "ht" becomes too small number <-100000 #-- This works with N=1000 b=masta.fit(Z, cov_group = NULL, thresh = 0.7, PCAthresh = 0.9, seed = 1234, seed2 = 100) b
#-- This is equivalent to TrainN and ValidN are NULL (This works with N=600) b=masta.fit(Z, cov_group = NULL, thresh = 1.0, PCAthresh = 0.9, seed = 1234, seed2 = 100) b
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.