R/helpers.R

Defines functions predictFunction.glm getModel runModel getPopulationSettings getAnalyses

getAnalyses <- function(settings, outputFolder,cdmDatabaseName){
  
  cohorts <- system.file("settings", 'CohortsToCreate.csv', package = "EmcWaltersDementiaModel")
  cohorts <- read.csv(cohorts)
  
    settingLoc <- system.file("settings","settings.csv", package = "EmcWaltersDementiaModel")
    analysesSettings <- read.csv(settingLoc)
    analysesSettings$modelSettingsId <- 1:nrow(analysesSettings)
    
  analysesSettings$analysisId <- paste0('Analysis_', analysesSettings$analysisId)
  
  # adding extras for shiny
  analysesSettings$cohortName <- analysesSettings$targetCohortName
  analysesSettings$devDatabase <- 'NA'
  analysesSettings$valDatabase <- cdmDatabaseName
  analysesSettings$modelSettingName <- analysesSettings$modelName
  analysesSettings$populationSettingId <- 1
  analysesSettings$covariateSettingId <- analysesSettings$modelName
  
  if(!dir.exists(file.path(outputFolder,cdmDatabaseName))){
    dir.create(file.path(outputFolder,cdmDatabaseName))
  }
  write.csv(analysesSettings, file.path(outputFolder,cdmDatabaseName, 'settings.csv'))
  return(analysesSettings)
}


getPopulationSettings <- function(){
  
  runSettingsLoc <- system.file("settings", 'existingModelList.json', package = "EmcWaltersDementiaModel")
  settings <- ParallelLogger::loadSettingsFromJson(runSettingsLoc )
  
  return(settings$populationSettings)
}


# takes as input model name - then load that json
runModel <- function(modelName, 
                     analysisId,
                     connectionDetails,
                     cohortCovariateDatabaseSchema,
                     cohortCovariateTable,
                     getPlpSettings, 
                     createPopulationSettings,
                     outputFolder,
                     cdmDatabaseName){
  
  runSettingsLoc <- system.file("settings", 'existingModelList.json', package = "EmcWaltersDementiaModel")
  
  settings <- ParallelLogger::loadSettingsFromJson(runSettingsLoc )
  ind <- which(unlist(lapply(settings$modelSettings, function(x) x$details$modelName))==modelName)
  # if ind is empty return no model
  if(length(ind)==0){
    ParallelLogger::logError(paste0('No model called: ',modelName , ' in json settings'))
  }
  runSettings <- processModelJson(settings$modelSetting[[ind]], cohortCovariateDatabaseSchema, cohortCovariateTable)
  
  # create cohort covariates:
  createCovariateCohorts(connectionDetails = connectionDetails,
                         cdmDatabaseSchema = getPlpSettings$cdmDatabaseSchema,
                         vocabularyDatabaseSchema = getPlpSettings$cdmDatabaseSchema,
                         cohortDatabaseSchema = cohortCovariateDatabaseSchema,
                         cohortTable = cohortCovariateTable,
                         oracleTempSchema = getPlpSettings$oracleTempSchema,
                         cohortVarsToCreate = runSettings$cohorts)
  
  
  # extract data
  getPlpSettings$covariateSettings = runSettings$covariateSettings
  plpData <- do.call(PatientLevelPrediction::getPlpData, getPlpSettings)
  
  PatientLevelPrediction::savePlpData(plpData = plpData, file.path(outputFolder,cdmDatabaseName, 'plpData'))
  # plpData <- PatientLevelPrediction::loadPlpData(file.path(outputFolder,cdmDatabaseName, 'plpData'))
  
  # get population
  createPopulationSettings$plpData <- plpData
  population <- do.call(PatientLevelPrediction::createStudyPopulation, createPopulationSettings)
  
  # apply model
  plpModel <- getModel(modelName, 
                       analysisId,
                       getPlpSettings$cohortId,
                       createPopulationSettings$outcomeId, 
                       population,
                       runSettings$model)
  
  result <- PatientLevelPrediction::applyModel(population = population, 
                                               plpData = plpData, 
                                               plpModel = plpModel,
                                               calculatePerformance = T)
  
  if(is.null(result)){
    return(NULL)
  }
  
  result$inputSetting$database <- cdmDatabaseName
  result$inputSetting$modelSettings <- list(model = 'existing model', name = modelName)
  result$inputSetting$dataExtrractionSettings$covariateSettings <- runSettings$covariateSettings
  result$inputSetting$populationSettings <- attr(population, "metaData")
  result$executionSummary  <- list()
  result$model <- plpModel
  result$analysisRef <- list()
  result$covariateSummary <- tryCatch({PatientLevelPrediction:::covariateSummary(plpData = plpData, population = population, model = plpModel)},
                                      error = function(e){ParallelLogger::logError(e); return(NULL)})
  
  return(result)
}

getModel <- function(modelName, analysisId,cohortId,outcomeId, population, modelSettings){
  
  predictionFunction <- do.call(paste0('predictFunction.',modelSettings$modelFunction), modelSettings$settings)
  
   
  plpModel <- list(model = modelName,
                   analysisId = analysisId,
                   hyperParamSearch = NULL,
                   index = NULL,
                   trainCVAuc = NULL,
                   modelSettings = list(model = modelName, 
                                        modelParameters = NULL),
                   metaData = NULL,
                   populationSettings = attr(population, "metaData"),
                   trainingTime = NULL,
                   varImp = predictionFunction$varImp,
                   dense = T,
                   cohortId = cohortId,
                   outcomeId = outcomeId,
                   covariateMap = NULL,
                   predict = predictionFunction$predict
  )
  attr(plpModel, "type") <- 'existing'
  class(plpModel) <- 'plpModel'
  
  return(plpModel)
}  
 

# add the functions for the exisitng models here 
#======= add custom function here...
predictFunction.glm <- function(coefficients,
                                finalMapping,
                                predictionType){
  
  finalMapping <- eval(str2lang(paste0(finalMapping, collapse = ' ')))
  
  offset <- c(27.501, 
              27.501,
              0, 0, 0, 0,
              0, 0, 0, 0, 0, 0, 
              65.608,
              0,
              0,
              0,
              0)
  
  exponent <- c(1,
                2,
                1, 1, 1, 1,
                1, 1, 1, 1, 1, 1,
                1,
                1,
                1,
                1,
                1)
  
  coefficients$offset <- offset
  coefficients$exponent <- exponent
  
  predictionFunction <- function(plpData, population, coeff = coefficients, type = predictionType){
    
    plpData$covariateData$coefficients <- coeff
    on.exit(plpData$covariateData$coefficients <- NULL, add = TRUE)
    
    popTemp <- population[c("rowId", "cohortStartDate")]
    popTemp$indexCovariate <- format(as.Date(popTemp$cohortStartDate, format="%Y/%m/%d"),"%Y")
    
    prediction <- plpData$covariateData$covariates %>% 
      dplyr::inner_join(plpData$covariateData$coefficients, by= 'covariateId') %>% 
      dplyr::mutate(values = ((covariateValue-offset)**exponent)*points) %>%
      dplyr::group_by(rowId) %>%
      dplyr::summarise(value = sum(values, na.rm = TRUE)) %>%
      dplyr::select(rowId, value) %>% 
      dplyr::collect() 
    
    prediction <- merge(population, prediction, by ="rowId", all.x = TRUE)
    prediction$value[is.na(prediction$value)] <- 0
    
    prediction$indexCovariate <- 0.04477 * (as.numeric(popTemp$indexCovariate) - 2003.719)
    
    # add any final mapping here (e.g., add intercept and mapping)
    prediction$value <- finalMapping(prediction$value + prediction$indexCovariate)
    
    metaData <- list(predictionType = type,
                     cohortId = attr(population,'metaData')$cohortId,
                     outcomeId = attr(population,'metaData')$outcomeId,
                     timepoint = attr(population,'metaData')$riskWindowEnd)
    
    attr(prediction, "metaData") <- metaData
    
    
    return(prediction)
  }
  
  varImp <- coefficients
  colnames(varImp)[colnames(varImp)=='points'] <- 'covariateValue'
  
  return(list(predict = predictionFunction,
              varImp = varImp))
}
mi-erasmusmc/EmcWaltersDementiaModel documentation built on July 19, 2021, 4:16 p.m.