Nothing
## ----echo = FALSE, message = FALSE, warning = FALSE---------------------------
library(PatientLevelPrediction)
## ----echo = TRUE, eval=FALSE--------------------------------------------------
# attr(param, "settings") <- list(
# seed = 12,
# modelName = "Special classifier"
# )
## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# setMadeUp <- function(a = c(1, 4, 10), b = 2, seed = NULL) {
# # add input checks here...
#
# param <- split(
# expand.grid(
# a = a,
# b = b
# ),
# 1:(length(a) * length(b))
# )
#
# attr(param, "settings") <- list(
# modelName = "Made Up",
# requiresDenseMatrix = TRUE,
# seed = seed
# )
#
# # now create list of all combinations:
# result <- list(
# fitFunction = "fitMadeUp", # this will be called to train the made up model
# param = param
# )
# class(result) <- "modelSettings"
#
# return(result)
# }
## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# fitMadeUp <- function(trainData, modelSettings, search, analysisId) {
# param <- modelSettings$param
#
# # **************** code to train the model here
# # trainedModel <- this code should apply each hyper-parameter combination
# # (param[[i]]) using the specified search (e.g., cross validation)
# # then pick out the best hyper-parameter setting
# # and finally fit a model on the whole train data using the
# # optimal hyper-parameter settings
# # ****************
#
# # **************** code to apply the model to trainData
# # prediction <- code to apply trainedModel to trainData
# # ****************
#
# # **************** code to get variable importance (if possible)
# # varImp <- code to get importance of each variable in trainedModel
# # ****************
#
#
# # construct the standard output for a model:
# result <- list(
# model = trainedModel,
# prediction = prediction, # the train and maybe the cross validation predictions for the trainData
# preprocessing = list(
# featureEngineering = attr(trainData$covariateData, "metaData")$featureEngineering,
# tidyCovariates = attr(trainData$covariateData, "metaData")$tidyCovariateDataSettings,
# requireDenseMatrix = attr(param, "settings")$requiresDenseMatrix,
# ),
# modelDesign = list(
# outcomeId = attr(trainData, "metaData")$outcomeId,
# targetId = attr(trainData, "metaData")$targetId,
# plpDataSettings = attr(trainData, "metaData")$plpDataSettings,
# covariateSettings = attr(trainData, "metaData")$covariateSettings,
# populationSettings = attr(trainData, "metaData")$populationSettings,
# featureEngineeringSettings = attr(trainData$covariateData, "metaData")$featureEngineeringSettings,
# prerocessSettings = attr(trainData$covariateData, "metaData")$prerocessSettings,
# modelSettings = list(
# model = attr(param, "settings")$modelName, # the model name
# param = param,
# finalModelParameters = param[[bestInd]], # best hyper-parameters
# extraSettings = attr(param, "settings")
# ),
# splitSettings = attr(trainData, "metaData")$splitSettings,
# sampleSettings = attr(trainData, "metaData")$sampleSettings
# ),
# trainDetails = list(
# analysisId = analysisId,
# developmentDatabase = attr(trainData, "metaData")$cdmDatabaseSchema,
# attrition = attr(trainData, "metaData")$attrition,
# trainingTime = timeToTrain, # how long it took to train the model
# trainingDate = Sys.Date(),
# hyperParamSearch = hyperSummary # the hyper-parameters and performance data.frame
# ),
# covariateImportance = merge(trainData$covariateData$covariateRef, varImp, by = "covariateId") # add variable importance to covariateRef if possible
# )
# class(result) <- "plpModel"
# attr(result, "predictionFunction") <- "madeupPrediction"
# attr(result, "modelType") <- "binary"
# return(result)
# }
## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# madeupPrediction <- function(plpModel, data, cohort) {
# # ************* code to do prediction for each rowId in cohort
# # predictionValues <- code to do prediction here returning the predicted risk
# # (value) for each rowId in cohort
# #**************
#
# prediction <- merge(cohort, predictionValues, by = "rowId")
# attr(prediction, "metaData") <- list(modelType = attr(plpModel, "modelType"))
# return(prediction)
# }
## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# setMadeUp <- function(a = c(1, 4, 6), b = 2, seed = NULL) {
# # add input checks here...
#
# if (is.null(seed)) {
# seed <- sample(100000, 1)
# }
#
# param <- split(
# expand.grid(
# a = a,
# b = b
# ),
# 1:(length(a) * length(b))
# )
#
# attr(param, "settings") <- list(
# modelName = "Made Up",
# requiresDenseMatrix = TRUE,
# seed = seed
# )
#
# # now create list of all combinations:
# result <- list(
# fitFunction = "fitMadeUp", # this will be called to train the made up model
# param = param
# )
# class(result) <- "modelSettings"
#
# return(result)
# }
## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# fitMadeUp <- function(trainData, modelSettings, search, analysisId) {
# # set the seed for reproducibility
# param <- modelSettings$param
# set.seed(attr(param, "settings")$seed)
#
# # add folds to labels:
# trainData$labels <- merge(trainData$labels, trainData$folds, by = "rowId")
# # convert data into sparse R Matrix:
# mappedData <- toSparseM(trainData, map = NULL)
# matrixData <- mappedData$dataMatrix
# labels <- mappedData$labels
# covariateRef <- mappedData$covariateRef
#
# # ============= STEP 1 ======================================
# # pick the best hyper-params and then do final training on all data...
# writeLines("Cross validation")
# paramSel <- lapply(
# param,
# function(x) {
# do.call(
# madeUpModel,
# list(
# param = x,
# final = FALSE,
# data = matrixData,
# labels = labels
# )
# )
# }
# )
# hyperSummary <- do.call(rbind, lapply(paramSel, function(x) x$hyperSum))
# hyperSummary <- as.data.frame(hyperSummary)
# hyperSummary$auc <- unlist(lapply(paramSel, function(x) x$auc))
# paramSel <- unlist(lapply(paramSel, function(x) x$auc))
# bestInd <- which.max(paramSel)
#
# # get cross val prediction for best hyper-parameters
# prediction <- param.sel[[bestInd]]$prediction
# prediction$evaluationType <- "CV"
#
# writeLines("final train")
# finalResult <- do.call(
# madeUpModel,
# list(
# param = param[[bestInd]],
# final = TRUE,
# data = matrixData,
# labels = labels
# )
# )
#
# trainedModel <- finalResult$model
#
# # prediction risk on training data:
# finalResult$prediction$evaluationType <- "Train"
#
# # get CV and train prediction
# prediction <- rbind(prediction, finalResult$prediction)
#
# varImp <- covariateRef %>% dplyr::collect()
# # no feature importance available
# vqrImp$covariateValue <- 0
#
# timeToTrain <- Sys.time() - start
#
# # construct the standard output for a model:
# result <- list(
# model = trainedModel,
# prediction = prediction,
# preprocessing = list(
# featureEngineering = attr(trainData$covariateData, "metaData")$featureEngineering,
# tidyCovariates = attr(trainData$covariateData, "metaData")$tidyCovariateDataSettings,
# requireDenseMatrix = attr(param, "settings")$requiresDenseMatrix,
# ),
# modelDesign = list(
# outcomeId = attr(trainData, "metaData")$outcomeId,
# targetId = attr(trainData, "metaData")$targetId,
# plpDataSettings = attr(trainData, "metaData")$plpDataSettings,
# covariateSettings = attr(trainData, "metaData")$covariateSettings,
# populationSettings = attr(trainData, "metaData")$populationSettings,
# featureEngineeringSettings = attr(trainData$covariateData, "metaData")$featureEngineeringSettings,
# prerocessSettings = attr(trainData$covariateData, "metaData")$prerocessSettings,
# modelSettings = list(
# model = attr(param, "settings")$modelName, # the model name
# param = param,
# finalModelParameters = param[[bestInd]], # best hyper-parameters
# extraSettings = attr(param, "settings")
# ),
# splitSettings = attr(trainData, "metaData")$splitSettings,
# sampleSettings = attr(trainData, "metaData")$sampleSettings
# ),
# trainDetails = list(
# analysisId = analysisId,
# developmentDatabase = attr(trainData, "metaData")$cdmDatabaseSchema,
# attrition = attr(trainData, "metaData")$attrition,
# trainingTime = timeToTrain, # how long it took to train the model
# trainingDate = Sys.Date(),
# hyperParamSearch = hyperSummary # the hyper-parameters and performance data.frame
# ),
# covariateImportance = merge(trainData$covariateData$covariateRef, varImp, by = "covariateId") # add variable importance to covariateRef if possible
# )
# class(result) <- "plpModel"
# attr(result, "predictionFunction") <- "madeupPrediction"
# attr(result, "modelType") <- "binary"
# return(result)
# }
## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# madeUpModel <- function(param, data, final = FALSE, labels) {
# if (final == FALSE) {
# # add value column to store all predictions
# labels$value <- rep(0, nrow(labels))
# attr(labels, "metaData") <- list(modelType = "binary")
#
# foldPerm <- c() # this holds CV aucs
# for (index in 1:max(labels$index)) {
# model <- madeup::model(
# x = data[labels$index != index, ], # remove left out fold
# y = labels$outcomeCount[labels$index != index],
# a = param$a,
# b = param$b
# )
#
# # predict on left out fold
# pred <- stats::predict(model, data[labels$index == index, ])
# labels$value[labels$index == index] <- pred
#
# # calculate auc on help out fold
# aucVal <- computeAuc(labels[labels$index == index, ])
# foldPerm <- c(foldPerm, aucVal)
# }
# auc <- computeAuc(labels) # overal AUC
# } else {
# model <- madeup::model(
# x = data,
# y = labels$outcomeCount,
# a = param$a,
# b = param$b
# )
#
# pred <- stats::predict(model, data)
# labels$value <- pred
# attr(labels, "metaData") <- list(modelType = "binary")
# auc <- computeAuc(labels)
# foldPerm <- auc
# }
#
# result <- list(
# model = model,
# auc = auc,
# prediction = labels,
# hyperSum = c(a = a, b = b, fold_auc = foldPerm)
# )
#
# return(result)
# }
## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# madeupPrediction <- function(plpModel, data, cohort) {
# if (class(data) == "plpData") {
# # convert
# matrixObjects <- toSparseM(
# plpData = data,
# cohort = cohort,
# map = plpModel$covariateImportance %>%
# dplyr::select("columnId", "covariateId")
# )
#
# newData <- matrixObjects$dataMatrix
# cohort <- matrixObjects$labels
# } else {
# newData <- data
# }
#
# if (class(plpModel) == "plpModel") {
# model <- plpModel$model
# } else {
# model <- plpModel
# }
#
# cohort$value <- stats::predict(model, newData)
#
# # fix the rowIds to be the old ones
# # now use the originalRowId and remove the matrix rowId
# cohort <- cohort %>%
# dplyr::select(-"rowId") %>%
# dplyr::rename(rowId = "originalRowId")
#
# attr(cohort, "metaData") <- list(modelType = attr(plpModel, "modelType"))
# return(cohort)
# }
## ----tidy=TRUE,eval=TRUE------------------------------------------------------
citation("PatientLevelPrediction")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.