#' Prepare data function
#'
#' This function prepares and generates training and review sets by splitting the dataset in half according to a time variable . Factor variables are removed.
#' @param study_data The study data as a data frame. No default
#' @param outcome The outcome variable as a string. Default: "s30d".
#' @export
PrepDataForAnalysis <- function(
study_data,
outcome = "composite"
)
{
split.dataset <- function(
dataset,
time_variable,
outcome
)
{
df <- dataset[order(dataset[,time_variable]),]
## Get 3/4 of patients
top_split <- floor(nrow(df)*0.75)
## Create set for training
train <- df[1:top_split,]
## Create set for review
review <- df[!(row.names(df) %in% row.names(train)),]
## Use dates function to split dataset by seqn or doar depending on the
## main function argument test
sets <- list(x_train = train,
x_review = review)
return(sets)
}
set.sets <- function(
sets,
time_variable,
outcome
)
{
## Extract tc from review set
tc <- sets$x_review$tc
## Change levels of outcome factor from "No" and "Yes", to 0 and 1. Then
## outcome is extracted from the sets
y_training_and_review <- lapply(sets,
function(the_set)
{
levels(the_set[[outcome]]) <- c(0,1)
the_set[[outcome]] <- as.numeric(as.character(the_set[[outcome]]))
return(the_set[[outcome]])
})
names(y_training_and_review) <- c("y_train", "y_review")
## Remove tc, outcome and time_variable from sets
x_sets <- lapply(sets,
function (the_set) the_set[, !(names(the_set) %in% c(outcome,
"tc",
time_variable))])
## Do median imputation on training and review set separately
x_sets <- lapply(x_sets, DoMedianImputation)
return (list(sets = x_sets,
tc = tc,
y_train = y_training_and_review$y_train,
y_review = y_training_and_review$y_review,
y_name = outcome,
y_levels = levels(study_data[, outcome])))
}
## Split
time_variable <- "doar"
datasets <- lapply(levels(as.factor(study_data$centre)), function(level) study_data[study_data$centre == level, ])
sets_list <- lapply(datasets, function(dataset) {
dataset$centre <- NULL
split.dataset(dataset,
outcome = outcome,
time_variable = time_variable)
})
sets <- list()
sets$x_train <- do.call(rbind, lapply(sets_list, function(set) set$x_train))
sets$x_review <- do.call(rbind, lapply(sets_list, function(set) set$x_review))
## Set
return_list <- set.sets(sets, time_variable, outcome)
class(return_list) <- c(class(return_list), "prepped") # Add prepped to return list class
return(return_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.