#' Creation of a generic test PED dataframe
#'
#' This function creates a list that contains (possibly) necessary PED frames
#' for prediction.
#' The list features three elements:
#' \code{[[1]]}: the PED frame using the original end times (original=.
#' (Could be used for validation loss)
#' \code{[[2]]}: the PED frame using the maximally observed end time for all
#' observations (transformed). (Could be used for testing)
#' \code{[[3]]}: the raw (no PED) data (raw_data).
#' The fucntions uses \code{pammtools::as_ped} in the backend.
#'
#' @param data a data.frame (or variant) for which the test set(s) should be
#' computed.
#' @param ped_train a PED data set (ee pammtools package) that has been used for
#' training
#' @return a list of length 3. (See description)
#' @export
#' @author Philipp Kopper
make_test_ped <- function(data, ped_train) {
trafo_args <- attr(ped_train, "trafo_args")
formula <- trafo_args[["formula"]]
not <- which(names(trafo_args) == "formula")
trafo_args <- trafo_args[(1:length(trafo_args))[-not]]
timevar <- as.character(formula[[2]])[2]
if (is.data.frame(data)) {
# only static covars
data2 <- data
data2[[timevar]] <- max(data[[timevar]])
list(original = do.call(as_ped, append(list(data, formula), trafo_args)),
transformed = do.call(as_ped, append(list(data2, formula), trafo_args)),
raw_data = data)
} else {
stop("Not yet implemented for longi. data.")
}
}
#' Transform PED data to an array / tensor
#'
#' This function transforms PED data (from pammtools) to an array or tensor
#' for further usage in tensorflow.
#' The array is constructed from the PED so that the first dimension has same
#' length as there are unique IDs in the PED. The second dimension reflects all
#' distinct intervals per ID and the third dimension features all model features
#' effectively used by the PAM(M).
#' The third axis features the offset (\code{[, , 1]}) all linear covariates and
#' all spline covariates in a already transformed manner using mgcv::gam for
#' the transformation.
#' The transformation is based on the formula argument.
#' @param ped a PED (see pammtools) data frame.
#' @param formula a formula for a PAM(M) (see pammtools/mgcv)
#' @param training a boolean indicating if a training or test set is to be
#' constructed.
#' @param training_data only relevant if training == TRUE. The training PED
#' that has been used to fit the model is to be tested.
#' @param id a string indicating the name of the ID variable.
#' @return A list of class ped_tensor and length 2.
#' \code{[[1]]} is the associated input (X) and \code{[[2]]} the associated target (y).
#' @export
#' @author Philipp Kopper
#' @import pammtools
#' @import dplyr
#' @import stats
from_ped_to_tensor <-
function(ped, formula = ped_status ~ s(tend, k = 9L, bs = "cr"),
training = TRUE, training_data = NULL, id = "id") {
if (training) {
base_line_pamm <- pamm(as.formula(formula), data = ped)
mm <- model.matrix(base_line_pamm)[, -1]
} else {
base_line_pamm <- pamm(as.formula(formula), data = training_data)
mm <- predict(base_line_pamm, newdata = ped, type = "lpmatrix")[, -1]
}
dims_X <- c(length(unique(ped$id)), length(unique(ped$tend)), ncol(mm) + 1)
dims_y <- c(length(unique(ped$id)), length(unique(ped$tend)), 1)
X <- array(0, dims_X, dimnames = list(unique(ped[[id]]), unique(ped$tend),
c("offset", colnames(mm))))
y <- array(0, dims_y, dimnames = list(unique(ped[[id]]), unique(ped$tend)))
wgs <- y
for (i in 1:dim(X)[1]) {
current_id <- as.integer(dimnames(X)[[1]][i])
current_length <- sum(ped[[id]] == current_id)
X[i, 1:current_length, 1] <- ped[current_id == ped[[id]], "offset"]
X[i, 1:current_length, -1] <-
mm[current_id == ped[[id]], dimnames(X)[[3]][-1]]
y[i, 1:current_length, ] <- ped[current_id == ped[[id]], "ped_status"]
wgs[i, 1:current_length, ] <- 1
}
indicator <- rep(0, dim(X)[3])
names(indicator) <- dimnames(X)[[3]]
indicator[dimnames(X)[[3]] %in% colnames(ped)] <- 1
indicator[1] <- 0
for (i in 1:length(base_line_pamm$smooth)) {
ind_names <- paste(base_line_pamm$smooth[[i]]$label, ".",
1:base_line_pamm$smooth[[i]]$df, sep = "")
indicator[ind_names] <- i + 1
}
class(X) <- c("ped_tensor_X", class(X))
class(y) <- c("ped_tensor_y", class(y))
attr(X, "trafo_args") <- attr(ped, "trafo_args")
attr(X, "raw") <- ped
res <- list(X = X, y = y)
S <- sapply(base_line_pamm$smooth, "[[", "S")
attr(res, "P") <- vector_list_prod(base_line_pamm$sp / nrow(ped), S)
attr(res, "weights") <- wgs
attr(res, "indicator") <- indicator
attr(res, "trafo_args") <- attr(ped, "trafo_args")
attr(res, "coeffs") <- base_line_pamm$coefficients
class(res) <- "ped_tensor"
res
}
#' Transform unstructured data to an array / tensor that fits to the PED
#' tensor (BETA).
#'
#' This function transforms the unstrucutred data part.
#' For static covariates (format == "standard") basically nothing happens.
#' All other options are not yet implemented.
#' This current version, however, covers all previous use cases such as
#' Kopper et al. (2020).
#' @param data a list of tensors. In the current implementeation each tensor
#' must be processible by the respective network supplied to deeppam().
#' @param ped_tensor the associated tensor brought into PED format by
#' from_ped_to_tensor().
#' @param format a character indicating which transformation is used.
#' Currently only "standard has been implemented.
#' @return a list of tensors to be supplied to the deep part of the network
#' constructed by deeppam().
#' @export
#' @author Philipp Kopper
from_unstructured_to_deeppam_tensor <- function(data = list(), ped_tensor = NULL,
format = "standard") {
if (length(data) == 0) {
stop("No data supplied.")
}
if (format == "standard") {
data
} else if (format == "ped") {
stop("ped not implemented yet.")
#to_ped_tensors(data, ped)
} else {
stop((c("Currently you have to supply more advanced longitudinal data",
" formats manually. This function only covers two simple special",
" cases: No transformation and a classical PED transformation.",
" for static covaraites. This, however, will cover most primary",
" use cases. There is no guarantee that custom. long. format",
" can be modelled by DeepPAM.")))
}
# can vaguely stay the same for standard
}
#' Attach test data to DeepPAM model for testing via pec::pec.
#'
#' This function modifies the keras model constructed using deeppam() to feature
#' the data it should be tested on.
#' This step is necessary due to the rigorous preprocessing needed for DeepPAM.
#' newdata_str MUST be equivalent to the newdata argument of pec::pec.
#' Internally, the test data is attached via the attributes of the R object.
#' @param model a keras model of additional subclass deeppam which should be
#' evaluated.
#' @param newdata_str the strucured new data for evaluation. A regular data set.
#' newdata_str MUST be identical to the newdata argument used in pec:pec() later
#' on.
#' @param newdata_unstr the unstructured part of the data for evalution.
#' Must match newdata_str.
#' @param training_ped the PED used for training.
#' @param formula the formula used to fit the model. (can be solved via attribute)
#' @return the same model as inputted with a modified attribute set.
#' The model can be handed over to pec::pec() only after this step.
#' @export
#' @author Philipp Kopper
add_test_data <- function(model, newdata_str, newdata_unstr = list(),
training_ped, formula = ped_status ~ s(tend, k = 9L)) {
test_ped <- make_test_ped(newdata_str, training_ped)
ped_tensor_test <- from_ped_to_tensor(test_ped$transformed,
formula, training = FALSE,
training_data = training_ped)
attachment <- vector(mode = "list", 1)
attachment[[1]] <- ped_tensor_test$X
attr(model, "test_data") <- append(attachment, list(newdata_unstr))
model
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.