AddingCustomModels.R
In PatientLevelPrediction: Develop Clinical Prediction Models Using the Common Data Model

## ----echo = FALSE, message = FALSE, warning = FALSE---------------------------
library(PatientLevelPrediction)

## ----echo = TRUE, eval=FALSE--------------------------------------------------
# attr(param, "settings") <- list(
#   seed = 12,
#   modelName = "Special classifier"
# )

## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# setMadeUp <- function(a = c(1, 4, 10), b = 2, seed = NULL) {
#   # add input checks here...
# 
#   param <- split(
#     expand.grid(
#       a = a,
#       b = b
#     ),
#     1:(length(a) * length(b))
#   )
# 
#   attr(param, "settings") <- list(
#     modelName = "Made Up",
#     requiresDenseMatrix = TRUE,
#     seed = seed
#   )
# 
#   # now create list of all combinations:
#   result <- list(
#     fitFunction = "fitMadeUp", # this will be called to train the made up model
#     param = param
#   )
#   class(result) <- "modelSettings"
# 
#   return(result)
# }

## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# fitMadeUp <- function(trainData, modelSettings, search, analysisId) {
#   param <- modelSettings$param
# 
#   # **************** code to train the model here
#   # trainedModel <- this code should apply each hyper-parameter combination
#   # (param[[i]]) using the specified search (e.g., cross validation)
#   #                 then pick out the best hyper-parameter setting
#   #                 and finally fit a model on the whole train data using the
#   #                 optimal hyper-parameter settings
#   # ****************
# 
#   # **************** code to apply the model to trainData
#   # prediction <- code to apply trainedModel to trainData
#   # ****************
# 
#   # **************** code to get variable importance (if possible)
#   # varImp <- code to get importance of each variable in trainedModel
#   # ****************
# 
# 
#   # construct the standard output for a model:
#   result <- list(
#     model = trainedModel,
#     prediction = prediction, # the train and maybe the cross validation predictions for the trainData
#     preprocessing = list(
#       featureEngineering = attr(trainData$covariateData, "metaData")$featureEngineering,
#       tidyCovariates = attr(trainData$covariateData, "metaData")$tidyCovariateDataSettings,
#       requireDenseMatrix = attr(param, "settings")$requiresDenseMatrix,
#     ),
#     modelDesign = list(
#       outcomeId = attr(trainData, "metaData")$outcomeId,
#       targetId = attr(trainData, "metaData")$targetId,
#       plpDataSettings = attr(trainData, "metaData")$plpDataSettings,
#       covariateSettings = attr(trainData, "metaData")$covariateSettings,
#       populationSettings = attr(trainData, "metaData")$populationSettings,
#       featureEngineeringSettings = attr(trainData$covariateData, "metaData")$featureEngineeringSettings,
#       prerocessSettings = attr(trainData$covariateData, "metaData")$prerocessSettings,
#       modelSettings = list(
#         model = attr(param, "settings")$modelName, # the model name
#         param = param,
#         finalModelParameters = param[[bestInd]], # best hyper-parameters
#         extraSettings = attr(param, "settings")
#       ),
#       splitSettings = attr(trainData, "metaData")$splitSettings,
#       sampleSettings = attr(trainData, "metaData")$sampleSettings
#     ),
#     trainDetails = list(
#       analysisId = analysisId,
#       developmentDatabase = attr(trainData, "metaData")$cdmDatabaseSchema,
#       attrition = attr(trainData, "metaData")$attrition,
#       trainingTime = timeToTrain, # how long it took to train the model
#       trainingDate = Sys.Date(),
#       hyperParamSearch = hyperSummary # the hyper-parameters and performance data.frame
#     ),
#     covariateImportance = merge(trainData$covariateData$covariateRef, varImp, by = "covariateId") # add variable importance to covariateRef if possible
#   )
#   class(result) <- "plpModel"
#   attr(result, "predictionFunction") <- "madeupPrediction"
#   attr(result, "modelType") <- "binary"
#   return(result)
# }

## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# madeupPrediction <- function(plpModel, data, cohort) {
#   # ************* code to do prediction for each rowId in cohort
#   # predictionValues <- code to do prediction here returning the predicted risk
#   #               (value) for each rowId in cohort
#   #**************
# 
#   prediction <- merge(cohort, predictionValues, by = "rowId")
#   attr(prediction, "metaData") <- list(modelType = attr(plpModel, "modelType"))
#   return(prediction)
# }

## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# setMadeUp <- function(a = c(1, 4, 6), b = 2, seed = NULL) {
#   # add input checks here...
# 
#   if (is.null(seed)) {
#     seed <- sample(100000, 1)
#   }
# 
#   param <- split(
#     expand.grid(
#       a = a,
#       b = b
#     ),
#     1:(length(a) * length(b))
#   )
# 
#   attr(param, "settings") <- list(
#     modelName = "Made Up",
#     requiresDenseMatrix = TRUE,
#     seed = seed
#   )
# 
#   # now create list of all combinations:
#   result <- list(
#     fitFunction = "fitMadeUp", # this will be called to train the made up model
#     param = param
#   )
#   class(result) <- "modelSettings"
# 
#   return(result)
# }

## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# fitMadeUp <- function(trainData, modelSettings, search, analysisId) {
#   # set the seed for reproducibility
#   param <- modelSettings$param
#   set.seed(attr(param, "settings")$seed)
# 
#   # add folds to labels:
#   trainData$labels <- merge(trainData$labels, trainData$folds, by = "rowId")
#   # convert data into sparse R Matrix:
#   mappedData <- toSparseM(trainData, map = NULL)
#   matrixData <- mappedData$dataMatrix
#   labels <- mappedData$labels
#   covariateRef <- mappedData$covariateRef
# 
#   # ============= STEP 1 ======================================
#   # pick the best hyper-params and then do final training on all data...
#   writeLines("Cross validation")
#   paramSel <- lapply(
#     param,
#     function(x) {
#       do.call(
#         madeUpModel,
#         list(
#           param = x,
#           final = FALSE,
#           data = matrixData,
#           labels = labels
#         )
#       )
#     }
#   )
#   hyperSummary <- do.call(rbind, lapply(paramSel, function(x) x$hyperSum))
#   hyperSummary <- as.data.frame(hyperSummary)
#   hyperSummary$auc <- unlist(lapply(paramSel, function(x) x$auc))
#   paramSel <- unlist(lapply(paramSel, function(x) x$auc))
#   bestInd <- which.max(paramSel)
# 
#   # get cross val prediction for best hyper-parameters
#   prediction <- param.sel[[bestInd]]$prediction
#   prediction$evaluationType <- "CV"
# 
#   writeLines("final train")
#   finalResult <- do.call(
#     madeUpModel,
#     list(
#       param = param[[bestInd]],
#       final = TRUE,
#       data = matrixData,
#       labels = labels
#     )
#   )
# 
#   trainedModel <- finalResult$model
# 
#   # prediction risk on training data:
#   finalResult$prediction$evaluationType <- "Train"
# 
#   # get CV and train prediction
#   prediction <- rbind(prediction, finalResult$prediction)
# 
#   varImp <- covariateRef %>% dplyr::collect()
#   # no feature importance available
#   vqrImp$covariateValue <- 0
# 
#   timeToTrain <- Sys.time() - start
# 
#   # construct the standard output for a model:
#   result <- list(
#     model = trainedModel,
#     prediction = prediction,
#     preprocessing = list(
#       featureEngineering = attr(trainData$covariateData, "metaData")$featureEngineering,
#       tidyCovariates = attr(trainData$covariateData, "metaData")$tidyCovariateDataSettings,
#       requireDenseMatrix = attr(param, "settings")$requiresDenseMatrix,
#     ),
#     modelDesign = list(
#       outcomeId = attr(trainData, "metaData")$outcomeId,
#       targetId = attr(trainData, "metaData")$targetId,
#       plpDataSettings = attr(trainData, "metaData")$plpDataSettings,
#       covariateSettings = attr(trainData, "metaData")$covariateSettings,
#       populationSettings = attr(trainData, "metaData")$populationSettings,
#       featureEngineeringSettings = attr(trainData$covariateData, "metaData")$featureEngineeringSettings,
#       prerocessSettings = attr(trainData$covariateData, "metaData")$prerocessSettings,
#       modelSettings = list(
#         model = attr(param, "settings")$modelName, # the model name
#         param = param,
#         finalModelParameters = param[[bestInd]], # best hyper-parameters
#         extraSettings = attr(param, "settings")
#       ),
#       splitSettings = attr(trainData, "metaData")$splitSettings,
#       sampleSettings = attr(trainData, "metaData")$sampleSettings
#     ),
#     trainDetails = list(
#       analysisId = analysisId,
#       developmentDatabase = attr(trainData, "metaData")$cdmDatabaseSchema,
#       attrition = attr(trainData, "metaData")$attrition,
#       trainingTime = timeToTrain, # how long it took to train the model
#       trainingDate = Sys.Date(),
#       hyperParamSearch = hyperSummary # the hyper-parameters and performance data.frame
#     ),
#     covariateImportance = merge(trainData$covariateData$covariateRef, varImp, by = "covariateId") # add variable importance to covariateRef if possible
#   )
#   class(result) <- "plpModel"
#   attr(result, "predictionFunction") <- "madeupPrediction"
#   attr(result, "modelType") <- "binary"
#   return(result)
# }

## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# madeUpModel <- function(param, data, final = FALSE, labels) {
#   if (final == FALSE) {
#     # add value column to store all predictions
#     labels$value <- rep(0, nrow(labels))
#     attr(labels, "metaData") <- list(modelType = "binary")
# 
#     foldPerm <- c() # this holds CV aucs
#     for (index in 1:max(labels$index)) {
#       model <- madeup::model(
#         x = data[labels$index != index, ], # remove left out fold
#         y = labels$outcomeCount[labels$index != index],
#         a = param$a,
#         b = param$b
#       )
# 
#       # predict on left out fold
#       pred <- stats::predict(model, data[labels$index == index, ])
#       labels$value[labels$index == index] <- pred
# 
#       # calculate auc on help out fold
#       aucVal <- computeAuc(labels[labels$index == index, ])
#       foldPerm <- c(foldPerm, aucVal)
#     }
#     auc <- computeAuc(labels) # overal AUC
#   } else {
#     model <- madeup::model(
#       x = data,
#       y = labels$outcomeCount,
#       a = param$a,
#       b = param$b
#     )
# 
#     pred <- stats::predict(model, data)
#     labels$value <- pred
#     attr(labels, "metaData") <- list(modelType = "binary")
#     auc <- computeAuc(labels)
#     foldPerm <- auc
#   }
# 
#   result <- list(
#     model = model,
#     auc = auc,
#     prediction = labels,
#     hyperSum = c(a = a, b = b, fold_auc = foldPerm)
#   )
# 
#   return(result)
# }

## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# madeupPrediction <- function(plpModel, data, cohort) {
#   if (class(data) == "plpData") {
#     # convert
#     matrixObjects <- toSparseM(
#       plpData = data,
#       cohort = cohort,
#       map = plpModel$covariateImportance %>%
#         dplyr::select("columnId", "covariateId")
#     )
# 
#     newData <- matrixObjects$dataMatrix
#     cohort <- matrixObjects$labels
#   } else {
#     newData <- data
#   }
# 
#   if (class(plpModel) == "plpModel") {
#     model <- plpModel$model
#   } else {
#     model <- plpModel
#   }
# 
#   cohort$value <- stats::predict(model, newData)
# 
#   # fix the rowIds to be the old ones
#   # now use the originalRowId and remove the matrix rowId
#   cohort <- cohort %>%
#     dplyr::select(-"rowId") %>%
#     dplyr::rename(rowId = "originalRowId")
# 
#   attr(cohort, "metaData") <- list(modelType = attr(plpModel, "modelType"))
#   return(cohort)
# }

## ----tidy=TRUE,eval=TRUE------------------------------------------------------
citation("PatientLevelPrediction")

Any scripts or data that you put into this service are public.

PatientLevelPrediction documentation built on April 3, 2025, 9:58 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

PatientLevelPrediction
Develop Clinical Prediction Models Using the Common Data Model

inst/doc/AddingCustomModels.R
In PatientLevelPrediction: Develop Clinical Prediction Models Using the Common Data Model

Try the PatientLevelPrediction package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PatientLevelPrediction Develop Clinical Prediction Models Using the Common Data Model

inst/doc/AddingCustomModels.R In PatientLevelPrediction: Develop Clinical Prediction Models Using the Common Data Model

Try the PatientLevelPrediction package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PatientLevelPrediction
Develop Clinical Prediction Models Using the Common Data Model

inst/doc/AddingCustomModels.R
In PatientLevelPrediction: Develop Clinical Prediction Models Using the Common Data Model