knitr::opts_chunk$set(collapse = TRUE) options(width=100)
In step I, we use the masta.fpca
function to perform a functional PCA (Wu et al. 2013), which is used to estimate the mean density functions (and covariance functions) using longitudinal encounter data.
library(MASTA)
system.time(obj <- masta.fpca(data_org))
TrainCode <- data_org$TrainCode TrainCode[, monthstd := month / fu_time] DF.t <- rbind( data.frame(code = "pred1", t = with(TrainCode, rep(monthstd, pred1))), data.frame(code = "pred2", t = with(TrainCode, rep(monthstd, pred2))), data.frame(code = "pred3", t = with(TrainCode, rep(monthstd, pred3)))) DF.d <- rbind( cbind(code = "pred1", obj$FPCA$pred1$mean), cbind(code = "pred2", obj$FPCA$pred2$mean), cbind(code = "pred3", obj$FPCA$pred3$mean)) tbl <- table(DF.t$code) labels <- paste0(names(tbl), " (freq:", as.vector(tbl), ")") DF.t$code <- factor(DF.t$code, labels = labels) DF.d$code <- factor(DF.d$code, labels = labels) # DF.d$freq <- as.vector(tbl[as.character(DF.d$code)])
library(ggplot2) ggplot(DF.t, aes(x = t)) + geom_histogram(aes(y = ..density..), color="gray", fill="lightblue", bins = 100) + geom_line(aes(x = x, y = f_mu), data = DF.d, colour = "blue") + facet_wrap(~ code, ncol = 3, scales = "free_y") + xlab("x") + theme_bw(base_size = 14)
dens1 <- obj$FPCA[[1]]$ValidPred$densities dens2 <- obj$FPCA[[2]]$ValidPred$densities dens3 <- obj$FPCA[[3]]$ValidPred$densities all <- base::intersect( base::intersect( colnames(dens1)[-1], colnames(dens2)[-1]), colnames(dens3)[-1])
f <- function(k) { ValidCode <- data_org$ValidCode ValidCode[, monthstd := month / fu_time] DF.t <- rbind( data.frame(code = "pred1", t = with(ValidCode[id == all[k], ], rep(monthstd, pred1))), data.frame(code = "pred2", t = with(ValidCode[id == all[k], ], rep(monthstd, pred2))), data.frame(code = "pred3", t = with(ValidCode[id == all[k], ], rep(monthstd, pred3)))) DF.d <- rbind( data.frame(code = "pred1", x = dens1[, 1], y = dens1[, all[k]]), data.frame(code = "pred2", x = dens2[, 1], y = dens2[, all[k]]), data.frame(code = "pred3", x = dens3[, 1], y = dens3[, all[k]])) tbl <- table(DF.t$code) labels <- paste0(names(tbl), " (freq:", as.vector(tbl), ")") DF.t$code <- factor(DF.t$code, labels = labels) DF.d$code <- factor(DF.d$code, labels = labels) # DF.d$freq <- as.vector(tbl[as.character(DF.d$code)]) ggplot(DF.t, aes(x = t)) + geom_histogram(aes(y = ..density..), color="gray", fill="lightblue", bins = 20) + geom_line(aes(x = x, y = y), data = DF.d, colour = "blue") + facet_wrap(~ code, ncol = 3, scales = "free_y") + xlab("x") + ggtitle(paste("ID:", all[k])) + theme_bw(base_size = 14) }
The estimated mean density functions (and covariance functions) from FPCA are then used to obtain subject-specific densities, from which features will be extracted.
f(1) f(2) f(3)
Important features from subject-specific densities and FPCA may include:
first code time (1stCode)
peak time (Pk)
change point time (ChP)
first functional PC score (1stScore)
log of total number of codes (logN)
head(obj$ValidFt[, 1:5]) head(obj$ValidFt[, 6:10]) head(obj$ValidFt[, 11:15])
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.