# @file ExistingSklearn.R
#
# Copyright 2025 Observational Health Data Sciences and Informatics
#
# This file is part of PatientLevelPrediction
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Plug an existing scikit learn python model into the
#' PLP framework
#'
#' @details
#' This function lets users add an existing scikit learn model that is saved as
#' model.pkl into PLP format. covariateMap is a mapping between standard
#' covariateIds and the model columns. The user also needs to specify the
#' covariate settings and population settings as these are used to determine
#' the standard PLP model design.
#'
#' @param modelLocation The location of the folder that contains the model as
#' model.pkl
#' @param covariateMap A data.frame with the columns: columnId and covariateId.
#' `covariateId` from FeatureExtraction is the standard OHDSI covariateId.
#' `columnId` is the column location the model expects that covariate to be in.
#' For example, if you had a column called 'age' in your model and this was the
#' 3rd column when fitting the model, then the values for columnId would be 3,
#' covariateId would be 1002 (the covariateId for age in years) and
#' @param isPickle If the model should be saved as a pickle set this to TRUE if
#' it should be saved as json set this to FALSE.
#' @param targetId Add the development targetId here
#' @param outcomeId Add the development outcomeId here
#' @param populationSettings Add development population settings (this includes the time-at-risk settings).
#' @param restrictPlpDataSettings Add development restriction settings
#' @param covariateSettings Add the covariate settings here to specify how the model covariates are created from the OMOP CDM
#' @param featureEngineering Add any feature engineering here (e.g., if you need to modify the covariates before applying the model)
#' This is a list of lists containing a string named funct specifying the engineering function to call and settings that are inputs to that
#' function. funct must take as input trainData (a plpData object) and settings (a list).
#' @param tidyCovariates Add any tidyCovariates mappings here (e.g., if you need to normalize the covariates)
#' @param requireDenseMatrix Specify whether the model needs a dense matrix (TRUE or FALSE)
#'
#' @return
#' An object of class plpModel, this is a list that contains:
#' model (the location of the model.pkl),
#' preprocessing (settings for mapping the covariateIds to the model
#' column mames),
#' modelDesign (specification of the model design),
#' trainDetails (information about the model fitting) and
#' covariateImportance.
#'
#' You can use the output as an input in PatientLevelPrediction::predictPlp to
#' apply the model and calculate the risk for patients.
#' @export
createSklearnModel <- function(
modelLocation = "/model", # model needs to be saved here as "model.pkl"
covariateMap = data.frame(
columnId = 1:2,
covariateId = c(1, 2),
),
isPickle = TRUE,
targetId = NULL,
outcomeId = NULL,
populationSettings = createStudyPopulationSettings(),
restrictPlpDataSettings = createRestrictPlpDataSettings(),
covariateSettings = FeatureExtraction::createDefaultCovariateSettings(),
featureEngineering = NULL,
tidyCovariates = NULL,
requireDenseMatrix = FALSE
) {
checkSklearn()
checkFileExists(modelLocation)
checkIsClass(covariateMap, "data.frame")
checkBoolean(isPickle)
checkDataframe(covariateMap, c("columnId", "covariateId"),
columnTypes = list(c("numeric", "integer"), c("numeric", "integer"))
)
checkIsClass(targetId, c("numeric", "NULL"))
checkIsClass(outcomeId, c("numeric", "NULL"))
checkIsClass(populationSettings, c("NULL", "populationSettings"))
checkIsClass(restrictPlpDataSettings , c("NULL", "restrictPlpDataSettings"))
checkIsClass(covariateSettings, c("list", "NULL", "covariateSettings"))
checkIsClass(requireDenseMatrix, c("logical"))
# start to make the plpModel
existingModel <- list(model = "existingSklearn")
class(existingModel) <- "modelSettings"
plpModel <- list(
preprocessing = list(
featureEngineering = featureEngineering,
tidyCovariates = tidyCovariates,
requireDenseMatrix = requireDenseMatrix
),
covariateImportance = data.frame(
columnId = covariateMap$columnId,
covariateId = covariateMap$covariateId,
included = TRUE
),
modelDesign = PatientLevelPrediction::createModelDesign(
targetId = targetId,
outcomeId = outcomeId,
modelSettings = existingModel,
covariateSettings = covariateSettings,
populationSettings = populationSettings,
restrictPlpDataSettings = restrictPlpDataSettings,
preprocessSettings = PatientLevelPrediction::createPreprocessSettings(
minFraction = 0,
normalize = FALSE,
removeRedundancy = FALSE
),
splitSettings = PatientLevelPrediction::createDefaultSplitSetting(splitSeed = 123)
),
model = modelLocation,
trainDetails = list(
analysisId = "existingSklearn",
developmentDatabase = "unknown",
developmentDatabaseId = "unknown",
trainingTime = -1,
modelName = "existingSklearn"
)
)
attr(plpModel, "modelType") <- "binary"
attr(plpModel, "saveType") <- "file"
attr(plpModel, "predictionFunction") <- "predictPythonSklearn"
attr(plpModel, "saveToJson") <- !isPickle
class(plpModel) <- "plpModel"
return(plpModel)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.