# @file predict.R
#
# Copyright 2020 Observational Health Data Sciences and Informatics
#
# This file is part of PatientLevelPrediction
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' predictPlp
#'
#' @description
#' Predict the risk of the outcome using the input plpModel for the input plpData
#' @details
#' The function applied the trained model on the plpData to make predictions
#' @param plpModel An object of type \code{plpModel} - a patient level prediction model
#' @param population The population created using createStudyPopulation() who will have their risks predicted
#' @param plpData An object of type \code{plpData} - the patient level prediction
#' data extracted from the CDM.
#' @param index A data frame containing rowId: a vector of rowids and index: a vector of doubles the same length as the rowIds. If used, only the rowIds with a negative index value are used to calculate the prediction.
#'
#' @return
#' A dataframe containing the prediction for each person in the population with an attribute metaData containing prediction details.
#'
# parent predict that calls the others
#' @export
predictPlp <- function(plpModel, population, plpData, index=NULL){
if(is.null(plpModel))
stop('No model input')
if(is.null(population))
stop('No population input')
if(is.null(plpData))
stop('No plpData input')
# check logger
if(length(ParallelLogger::getLoggers())==0){
logger <- ParallelLogger::createLogger(name = "SIMPLE",
threshold = "INFO",
appenders = list(ParallelLogger::createConsoleAppender(layout = ParallelLogger::layoutTimestamp)))
ParallelLogger::registerLogger(logger)
}
# apply the feature transformations
if(!is.null(index)){
ParallelLogger::logTrace(paste0('Calculating prediction for ',sum(index$index<0),' in test set'))
ind <- population$rowId%in%index$rowId[index$index<0]
} else{
ParallelLogger::logTrace(paste0('Calculating prediction for ',nrow(population),' in dataset'))
ind <- rep(T, nrow(population))
}
# do the predction on the new data
if(class(plpModel)=='plpModel'){
# extract the classifier type
prediction <- plpModel$predict(plpData=plpData,population=population[ind,])
if(nrow(prediction)!=nrow(population[ind,]))
ParallelLogger::logWarn(paste0('Dimension mismatch between prediction and population test cases. Population test: ',nrow(population[ind, ]), '-- Prediction:', nrow(prediction) ))
} else{
ParallelLogger::logError('Non plpModel input')
stop()
}
metaData <- list(predictionType="binary",
cohortId = attr(population,'metaData')$cohortId,
outcomeId = attr(population,'metaData')$outcomeId)
attr(prediction, "metaData") <- metaData
return(prediction)
}
# default patient level prediction prediction
predict.plp <- function(plpModel,population, plpData, ...){
covariateData <- limitCovariatesToPopulation(plpData$covariateData, population$rowId)
ParallelLogger::logTrace('predict.plp - predictingProbabilities start')
prediction <- predictProbabilities(plpModel$model, population, covariateData)
ParallelLogger::logTrace('predict.plp - predictingProbabilities end')
return(prediction)
}
# for gxboost
predict.xgboost <- function(plpModel,population, plpData, ...){
result <- toSparseM(plpData, population, map=plpModel$covariateMap)
data <- result$data[population$rowId,]
prediction <- data.frame(rowId=population$rowId,
value=stats::predict(plpModel$model, data)
)
prediction <- merge(population, prediction, by='rowId', all.x=T, fill=0)
prediction <- prediction[,colnames(prediction)%in%c('rowId','subjectId','cohortStartDate','outcomeCount','indexes', 'value')] # need to fix no index issue
attr(prediction, "metaData") <- list(predictionType = "binary")
return(prediction)
}
predict.pythonReticulate <- function(plpModel, population, plpData){
e <- environment()
reticulate::source_python(system.file(package='PatientLevelPrediction','python','predictFunctions.py'), envir = e)
ParallelLogger::logInfo('Mapping covariates...')
if(!is.null(plpData$timeRef)){
pdata <- toSparseTorchPython(plpData,population, map=plpModel$covariateMap, temporal=T)
pdata <- pdata$data
fun_predict <- python_predict_temporal
} else {
newData <- toSparseM(plpData, population, map=plpModel$covariateMap)
included <- plpModel$varImp$covariateId[plpModel$varImp$included>0] # does this include map?
included <- newData$map$newCovariateId[newData$map$oldCovariateId%in%included]
pdata <- reticulate::r_to_py(newData$data[,included])
fun_predict <- python_predict
}
# save population
if('indexes'%in%colnames(population)){
population$rowIdPython <- population$rowId-1 # -1 to account for python/r index difference
pPopulation <- as.matrix(population[,c('rowIdPython','outcomeCount','indexes')])
} else {
population$rowIdPython <- population$rowId-1 # -1 to account for python/r index difference
pPopulation <- as.matrix(population[,c('rowIdPython','outcomeCount')])
}
# run the python predict code:
ParallelLogger::logInfo('Executing prediction...')
result <- fun_predict(population = pPopulation,
plpData = pdata,
model_loc = plpModel$model,
dense = ifelse(is.null(plpModel$dense),0,plpModel$dense),
autoencoder = F)
#get the prediction from python and reformat:
ParallelLogger::logInfo('Returning results...')
prediction <- result
prediction <- as.data.frame(prediction)
attr(prediction, "metaData") <- list(predictionType="binary")
if(ncol(prediction)==4){
colnames(prediction) <- c('rowId','outcomeCount','indexes', 'value')
} else {
colnames(prediction) <- c('rowId','outcomeCount', 'value')
}
# add 1 to rowId from python:
prediction$rowId <- prediction$rowId+1
# add subjectId and date:
prediction <- merge(prediction,
population[,c('rowId','subjectId','cohortStartDate')],
by='rowId')
return(prediction)
}
predict.pythonAuto <- function(plpModel, population, plpData){
ParallelLogger::logInfo('Mapping covariates...')
if(!is.null(plpData$timeRef)){
pdata <- toSparseTorchPython(plpData,population, map=plpModel$covariateMap, temporal=T)
pdata <- pdata$data
} else {
newData <- toSparseM(plpData, population, map=plpModel$covariateMap)
included <- plpModel$varImp$covariateId[plpModel$varImp$included>0] # does this include map?
included <- newData$map$newCovariateId[newData$map$oldCovariateId%in%included]
pdata <- reticulate::r_to_py(newData$data[,included])
}
# save population
if('indexes'%in%colnames(population)){
population$rowIdPython <- population$rowId-1 # -1 to account for python/r index difference
pPopulation <- as.matrix(population[,c('rowIdPython','outcomeCount','indexes')])
} else {
population$rowIdPython <- population$rowId-1 # -1 to account for python/r index difference
pPopulation <- as.matrix(population[,c('rowIdPython','outcomeCount')])
}
# run the python predict code:
ParallelLogger::logInfo('Executing prediction...')
e <- environment()
reticulate::source_python(system.file(package='PatientLevelPrediction','python','predictFunctions.py'), envir = e)
result <- python_predict(population = pPopulation,
plpData = pdata,
model_loc = plpModel$model,
dense = ifelse(is.null(plpModel$dense),0,plpModel$dense),
autoencoder = T)
#get the prediction from python and reformat:
ParallelLogger::logInfo('Returning results...')
prediction <- result
prediction <- as.data.frame(prediction)
attr(prediction, "metaData") <- list(predictionType="binary")
if(ncol(prediction)==4){
colnames(prediction) <- c('rowId','outcomeCount','indexes', 'value')
} else {
colnames(prediction) <- c('rowId','outcomeCount', 'value')
}
# add 1 to rowId from python:
prediction$rowId <- prediction$rowId+1
# add subjectId and date:
prediction <- merge(prediction,
population[,c('rowId','subjectId','cohortStartDate')],
by='rowId')
return(prediction)
}
predict.knn <- function(plpData, population, plpModel, ...){
covariateData <- limitCovariatesToPopulation(plpData$covariateData, population$rowId)
prediction <- BigKnn::predictKnn(covariates = covariateData$covariates,
cohorts= population[,!colnames(population)%in%'cohortStartDate'],
indexFolder = plpModel$model,
k = plpModel$modelSettings$modelParameters$k,
weighted = TRUE,
threads = plpModel$modelSettings$threads)
# can add: threads = 1 in the future
# return the cohorts as a data frame with the prediction added as
# a new column with the column name 'value'
prediction <- merge(population, prediction[,c('rowId','value')], by='rowId',
all.x=T, fill=0)
prediction$value[is.na(prediction$value)] <- 0
return(prediction)
}
predict.deep <- function(plpModel, population, plpData, ...){
temporal <- !is.null(plpData$timeRef)
ParallelLogger::logDebug(paste0('timeRef null: ',is.null(plpData$timeRef)))
if(temporal){
ParallelLogger::logTrace('temporal')
result<-toSparseM(plpData,population,map=plpModel$covariateMap, temporal=T)
data <-result$data[population$rowId,,]
if(!is.null(plpModel$useVae)){
if(plpModel$useVae==TRUE){
data<- plyr::aaply(as.array(data), 2, function(x) predict(plpModel$vaeEncoder, x, batch_size = plpModel$vaeBatchSize))
data<-aperm(data, perm = c(2,1,3))#rearrange of dimension
}}
batch_size <- min(2000, length(population$rowId))
maxVal <- length(population$rowId)
batches <- lapply(1:ceiling(maxVal/batch_size), function(x) ((x-1)*batch_size+1):min((x*batch_size),maxVal))
prediction <- population
prediction$value <- 0
for(batch in batches){
pred <- keras::predict_on_batch(plpModel$model, as.array(data[batch,,]))
if(is.null(dim(pred))){
prediction$value[batch] <- as.double(pred)
} else{
prediction$value[batch] <- as.double(pred[,2])
}
}
prediction <- prediction[,colnames(prediction)%in%c('rowId','subjectId','cohortStartDate','outcomeCount','indexes', 'value')] # need to fix no index issue
return(prediction)
} else{
result<-toSparseM(plpData,population,map=plpModel$covariateMap, temporal=F)
data <-result$data[population$rowId,]
batch_size <- min(2000, length(population$rowId))
maxVal <- length(population$rowId)
batches <- lapply(1:ceiling(maxVal/batch_size), function(x) ((x-1)*batch_size+1):min((x*batch_size),maxVal))
prediction <- population
prediction$value <- 0
for(batch in batches){
pred <- keras::predict_on_batch(plpModel$model, as.array(data[batch,]))
prediction$value[batch] <- as.double(pred[,2])
}
prediction <- prediction[,colnames(prediction)%in%c('rowId','subjectId','cohortStartDate','outcomeCount','indexes', 'value')] # need to fix no index issue
return(prediction)
}
}
predict.BayesianDeep <- function(plpModel, population, plpData, ...){
temporal <- !is.null(plpData$timeRef)
ParallelLogger::logDebug(paste0('timeRef null: ',is.null(plpData$timeRef)))
if(temporal){
ParallelLogger::logTrace('temporal')
result<-toSparseM(plpData,population,map=plpModel$covariateMap, temporal=T)
data <-result$data[population$rowId,,]
if(!is.null(plpModel$useVae)){
if(plpModel$useVae==TRUE){
data<- plyr::aaply(as.array(data), 2, function(x) predict(plpModel$vaeEncoder, x, batch_size = plpModel$vaeBatchSize))
data<-aperm(data, perm = c(2,1,3))#rearrange of dimension
}}
batch_size <- min(2000, length(population$rowId))
maxVal <- length(population$rowId)
batches <- lapply(1:ceiling(maxVal/batch_size), function(x) ((x-1)*batch_size+1):min((x*batch_size),maxVal))
prediction <- population
prediction$value <- 0
prediction$epistemicUncertainty <- 0
prediction$aleatoricUncertainty <- 0
for(batch in batches){
num_MC_samples = 100
output_dim = 2
pred <- keras::predict_on_batch(plpModel$model, as.array(data[batch,,]))
MC_samples <- array(0, dim = c(num_MC_samples, length(batch), 2 * output_dim))
for (k in 1:num_MC_samples){
MC_samples[k,, ] = predict(plpModel$model, as.array(data[batch,,]))
#keras::predict_proba(model, as.array(plpData[population$rowId[population$indexes==index],,][batch,,]))
}
pred <- apply(MC_samples[,,output_dim], 2, mean)
epistemicUncertainty <- apply(MC_samples[,,output_dim], 2, var)
logVar = MC_samples[, , output_dim * 2]
if(length(dim(logVar))<=1){
aleatoricUncertainty = exp(mean(logVar))
}else{
aleatoricUncertainty = exp(colMeans(logVar))
}
prediction$value[batch] <- pred
prediction$epistemicUncertainty[batch] = epistemicUncertainty
prediction$aleatoricUncertainty[batch] = aleatoricUncertainty
#writeLines(paste0(dim(pred[,2]), collapse='-'))
#writeLines(paste0(pred[1,2], collapse='-'))
}
prediction$value[prediction$value>1] <- 1
prediction$value[prediction$value<0] <- 0
prediction <- prediction[,colnames(prediction)%in%c('rowId','subjectId','cohortStartDate','outcomeCount','indexes', 'value',
'epistemicUncertainty', 'aleatoricUncertainty')] # need to fix no index issue
return(prediction)
} else{
result<-toSparseM(plpData,population,map=plpModel$covariateMap, temporal=F)
data <-result$data[population$rowId,]
batch_size <- min(2000, length(population$rowId))
maxVal <- length(population$rowId)
batches <- lapply(1:ceiling(maxVal/batch_size), function(x) ((x-1)*batch_size+1):min((x*batch_size),maxVal))
prediction <- population
prediction$value <- 0
for(batch in batches){
num_MC_samples = 100
output_dim =2
MC_samples <- array(0, dim = c(num_MC_samples, length(batch), 2 * output_dim))
for (k in 1:num_MC_samples){
MC_samples[k,, ] = predict(plpModel$model, as.array(data[batch,,]))
#keras::predict_proba(model, as.array(plpData[population$rowId[population$indexes==index],,][batch,,]))
}
pred <- apply(MC_samples[,,output_dim], 2, mean)
epistemicUncertainty <- apply(MC_samples[,,output_dim], 2, var)
logVar = MC_samples[, , output_dim * 2]
if(length(dim(logVar))<=1){
aleatoricUncertainty = exp(mean(logVar))
}else{
aleatoricUncertainty = exp(colMeans(logVar))
}
prediction$value[batch] <- pred
prediction$epistemicUncertainty[batch] = epistemicUncertainty
prediction$aleatoricUncertainty[batch] = aleatoricUncertainty
#writeLines(paste0(dim(pred[,2]), collapse='-'))
#writeLines(paste0(pred[1,2], collapse='-'))
}
prediction$value[prediction$value>1] <- 1
prediction$value[prediction$value<0] <- 0
prediction <- prediction[,colnames(prediction)%in%c('rowId','subjectId','cohortStartDate','outcomeCount','indexes', 'value',
'epistemicUncertainty', 'aleatoricUncertainty')] # need to fix no index issue
return(prediction)
}
}
predict.deepEnsemble <- function(plpModel, population, plpData, ...){
temporal <- !is.null(plpData$timeRef)
ParallelLogger::logDebug(paste0('timeRef null: ',is.null(plpData$timeRef)))
if(temporal){
ParallelLogger::logTrace('temporal')
result<-toSparseM(plpData,population,map=plpModel$covariateMap, temporal=T)
data <-result$data[population$rowId,,]
if(!is.null(plpModel$useVae)){
if(plpModel$useVae==TRUE){
data<- plyr::aaply(as.array(data), 2, function(x) predict(plpModel$vaeEncoder, x, batch_size = plpModel$vaeBatchSize))
data<-aperm(data, perm = c(2,1,3))#rearrange of dimension
}}
batch_size <- min(2000, length(population$rowId))
maxVal <- length(population$rowId)
batches <- lapply(1:ceiling(maxVal/batch_size), function(x) ((x-1)*batch_size+1):min((x*batch_size),maxVal))
prediction <- population
prediction$value <- 0
prediction$sigmas <- 0
for(batch in batches){
for (i in seq(plpModel$modelSettings$modelParameters$numberOfEnsembleNetwork)){
if(i==1){
muMatrix <- data.frame()
sigmaMatrix <-data.frame()
}
c(mu,sigma) %<-% plpModel$model[[i]](inputs=list(as.array(data[batch,,])))
muMatrix<-rbind(muMatrix,t(as.data.frame(mu[,2])))
sigmaMatrix<-rbind(sigmaMatrix,t(as.data.frame(sigma[,2])))
}
muMean <- apply(muMatrix,2,mean)
muSq <- muMatrix^2
sigmaSq <- sigmaMatrix^2
sigmaMean <- apply(sigmaMatrix,2,mean)
sigmaResult=apply(muSq+sigmaSq,2, mean)- muMean^2
prediction$value[batch] <- c(muMean)
prediction$sigmas[batch] <- c(sigmaResult)
}
prediction <- prediction[,colnames(prediction)%in%c('rowId','subjectId','cohortStartDate','outcomeCount','indexes', 'value', 'sigmas')] # need to fix no index issue
#If the prediction value is negative, please add values to make all the values to positive.
if(min(prediction$value)<0){prediction$value = prediction$value+ (min(prediction$value)* (-1)) }
return(prediction)
} else{
result<-toSparseM(plpData,population,map=plpModel$covariateMap, temporal=F)
data <-result$data[population$rowId,]
batch_size <- min(2000, length(population$rowId))
maxVal <- length(population$rowId)
batches <- lapply(1:ceiling(maxVal/batch_size), function(x) ((x-1)*batch_size+1):min((x*batch_size),maxVal))
prediction <- population
prediction$value <- 0
prediction$sigmas <- 0
for(batch in batches){
for (i in seq(plpModel$modelSettings$modelParameters$numberOfEnsembleNetwork)){
if(i==1){
muMatrix <- data.frame()
sigmaMatrix <-data.frame()
}
c(mu,sigma) %<-% plpModel$model[[i]](inputs=list(as.array(data[batch,,])))
muMatrix<-rbind(muMatrix,t(as.data.frame(mu[,2])))
sigmaMatrix<-rbind(sigmaMatrix,t(as.data.frame(sigma[,2])))
}
muMean <- apply(muMatrix,2,mean)
muSq <- muMatrix^2
sigmaSq <- sigmaMatrix^2
sigmaMean <- apply(sigmaMatrix,2,mean)
sigmaResult=apply(muSq+sigmaSq,2, mean)- muMean^2
prediction$value[batch] <- c(muMean)
prediction$sigmas[batch] <- c(sigmaResult)
}
prediction <- prediction[,colnames(prediction)%in%c('rowId','subjectId','cohortStartDate','outcomeCount','indexes', 'value', 'sigmas')] # need to fix no index issue
if(min(prediction$value)<0){prediction$value = prediction$value+ (min(prediction$value)* (-1)) }
return(prediction)
}
}
predict.deepMulti <- function(plpModel, population, plpData, ...){
repeats <- attr(plpModel, 'inputs')
temporal <- !is.null(plpData$timeRef)
ParallelLogger::logDebug('timeRef null: ',paste0(is.null(plpData$timeRef)))
if(temporal){
ParallelLogger::logTrace('temporal')
result<-toSparseM(plpData,population,map=plpModel$covariateMap, temporal=T)
ParallelLogger::logDebug('result$data dim: ',paste0(dim(result$data), collapse = '-'))
ParallelLogger::logDebug('population dim: ',paste0(dim(population), collapse = '-'))
data <-result$data[population$rowId,,]
ParallelLogger::logInfo('running batch prediction')
batch_size <- min(2000, length(population$rowId))
maxVal <- length(population$rowId)
batches <- lapply(1:ceiling(maxVal/batch_size), function(x) ((x-1)*batch_size+1):min((x*batch_size),maxVal))
prediction <- population
prediction$value <- 0
for(batch in batches){
ParallelLogger::logDebug('batch length: ', length(batch))
dat <- list()
length(dat) <- repeats
for( i in 1:repeats) {dat[[i]] <- as.array(data[batch,,])}
pred <- keras::predict_on_batch(plpModel$model, dat)
if(is.null(dim(pred))){
ParallelLogger::logDebug('Pred length: ', length(pred))
prediction$value[batch] <- as.double(pred)
} else{
ParallelLogger::logDebug('Pred dim: ', paste0(dim(pred), collapse = '-'))
prediction$value[batch] <- as.double(pred[,2])
}
}
prediction <- prediction[,colnames(prediction)%in%c('rowId','subjectId','cohortStartDate','outcomeCount','indexes', 'value')] # need to fix no index issue
return(prediction)
} else{
result<-toSparseM(plpData,population,map=plpModel$covariateMap, temporal=F)
data <-result$data[population$rowId,]
batch_size <- min(2000, length(population$rowId))
maxVal <- length(population$rowId)
batches <- lapply(1:ceiling(maxVal/batch_size), function(x) ((x-1)*batch_size+1):min((x*batch_size),maxVal))
prediction <- population
prediction$value <- 0
for(batch in batches){
dat <- list()
length(dat) <- repeats
for( i in 1:repeats) {dat[[i]] <- as.array(data[batch,,])}
pred <- keras::predict_on_batch(plpModel$model, dat)
prediction$value[batch] <- as.double(pred)
}
prediction <- prediction[,colnames(prediction)%in%c('rowId','subjectId','cohortStartDate','outcomeCount','indexes', 'value')] # need to fix no index issue
return(prediction)
}
}
#' Create predictive probabilities
#'
#' @details
#' Generates predictions for the population specified in plpData given the model.
#'
#' @return
#' The value column in the result data.frame is: logistic: probabilities of the outcome, poisson:
#' Poisson rate (per day) of the outome, survival: hazard rate (per day) of the outcome.
#'
#' @param predictiveModel An object of type \code{predictiveModel} as generated using
#' \code{\link{fitPlp}}.
#' @param population The population to calculate the prediction for
#' @param covariates The covariate part of PlpData containing the covariates for the population
#' @export
predictProbabilities <- function(predictiveModel, population, covariateData) {
start <- Sys.time()
ParallelLogger::logTrace('predictProbabilities - predictAndromeda start')
prediction <- predictAndromeda(predictiveModel$coefficients,
population,
covariateData,
predictiveModel$modelType)
ParallelLogger::logTrace('predictProbabilities - predictAndromeda end')
prediction$time <- NULL
attr(prediction, "modelType") <- predictiveModel$modelType
attr(prediction, "cohortId") <- attr(population, "metadata")$cohortId
attr(prediction, "outcomeId") <- attr(population, "metadata")$outcomeId
delta <- Sys.time() - start
ParallelLogger::logInfo("Prediction took ", signif(delta, 3), " ", attr(delta, "units"))
return(prediction)
}
#' Generated predictions from a regression model
#'
#' @param coefficients A names numeric vector where the names are the covariateIds, except for the
#' first value which is expected to be the intercept.
#' @param population A data frame containing the population to do the prediction for
#' @param covariateData An andromeda object containing the covariateData with predefined columns
#' (see below).
#' @param modelType Current supported types are "logistic", "poisson", "cox" or "survival".
#'
#' @details
#' These columns are expected in the outcome object: \tabular{lll}{ \verb{rowId} \tab(integer) \tab
#' Row ID is used to link multiple covariates (x) to a single outcome (y) \cr \verb{time} \tab(real)
#' \tab For models that use time (e.g. Poisson or Cox regression) this contains time \cr \tab
#' \tab(e.g. number of days) \cr } These columns are expected in the covariates object: \tabular{lll}{
#' \verb{rowId} \tab(integer) \tab Row ID is used to link multiple covariates (x) to a single outcome
#' (y) \cr \verb{covariateId} \tab(integer) \tab A numeric identifier of a covariate \cr
#' \verb{covariateValue} \tab(real) \tab The value of the specified covariate \cr }
#'
#' @export
predictAndromeda <- function(coefficients, population, covariateData, modelType = "logistic") {
if (!(modelType %in% c("logistic", "poisson", "survival","cox"))) {
stop(paste("Unknown modelType:", modelType))
}
if (!FeatureExtraction::isCovariateData(covariateData)){
stop("Needs correct covariateData")
}
intercept <- coefficients[names(coefficients)%in%'(Intercept)']
if(length(intercept)==0) intercept <- 0
coefficients <- coefficients[!names(coefficients)%in%'(Intercept)']
coefficients <- data.frame(beta = as.numeric(coefficients),
covariateId = as.numeric(names(coefficients)))
coefficients <- coefficients[coefficients$beta != 0, ]
if(sum(coefficients$beta != 0)>0){
covariateData$coefficients <- coefficients
prediction <- covariateData$covariates %>%
dplyr::inner_join(covariateData$coefficients, by= 'covariateId') %>%
dplyr::mutate(values = covariateValue*beta) %>%
dplyr::group_by(rowId) %>%
dplyr::summarise(value = sum(values, na.rm = TRUE)) %>%
dplyr::select(rowId, value)
prediction <- as.data.frame(prediction)
prediction <- merge(population, prediction, by ="rowId", all.x = TRUE, fill = 0)
prediction$value[is.na(prediction$value)] <- 0
prediction$value <- prediction$value + intercept
} else{
warning('Model had no non-zero coefficients so predicted same for all population...')
prediction <- population
prediction$value <- rep(0, nrow(population)) + intercept
}
if (modelType == "logistic") {
link <- function(x) {
return(1/(1 + exp(0 - x)))
}
prediction$value <- link(prediction$value)
} else if (modelType == "poisson" || modelType == "survival" || modelType == "cox") {
prediction$value <- exp(prediction$value)
if(max(prediction$value)>1){
prediction$value <- prediction$value/max(prediction$value)
}
}
return(prediction)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.