# @file Plotting.R
#
# Copyright 2021 Observational Health Data Sciences and Informatics
#
# This file is part of PatientLevelPrediction
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Plot the outcome incidence over time
#'
#' @details
#' This creates a survival plot that can be used to pick a suitable time-at-risk period
#'
#' @param plpData The plpData object returned by running getPlpData()
#' @param outcomeId The cohort id corresponding to the outcome
#' @param populationSettings The population settings created using \code{createStudyPopulationSettings}
#' @param riskTable (binary) Whether to include a table at the bottom of the plot showing the number of people at risk over time
#' @param confInt (binary) Whether to include a confidence interval
#' @param yLabel (string) The label for the y-axis
#'
#' @return
#' TRUE if it ran
#'
#' @export
outcomeSurvivalPlot <- function(
plpData,
outcomeId,
populationSettings = createStudyPopulationSettings(
binary = T,
includeAllOutcomes = T,
firstExposureOnly = FALSE,
washoutPeriod = 0,
removeSubjectsWithPriorOutcome = TRUE,
priorOutcomeLookback = 99999,
requireTimeAtRisk = F,
riskWindowStart = 1,
startAnchor = 'cohort start',
riskWindowEnd = 3650,
endAnchor = "cohort start"
),
riskTable = T,
confInt= T,
yLabel = 'Fraction of those who are outcome free in target population'
)
{
ensure_installed("survminer")
if(missing(plpData)){
stop('plpData missing')
}
if(missing(outcomeId)){
stop('outcomeId missing')
}
if(class(plpData)!='plpData'){
stop('Incorrect plpData object')
}
if(!outcomeId%in%unique(plpData$outcomes$outcomeId)){
stop('outcome id not in data')
}
populationSettings$plpData <- plpData
populationSettings$outcomeId <- outcomeId
population <- do.call(
what = 'createStudyPopulation',
args = list(
plpData = plpData,
outcomeId = outcomeId,
populationSettings = populationSettings
)
)
population$daysToEvent[is.na(population$daysToEvent)] <- population$survivalTime[is.na(population$daysToEvent)]
survivalFit <- survival::survfit(
survival::Surv(daysToEvent, outcomeCount)~cohortId,
#riskDecile,
population,
conf.int = TRUE
)
if(!is.null(survivalFit$surv)){
yliml <- min(survivalFit$surv)
} else{
yliml <- 0.5
}
result <- survminer::ggsurvplot(
fit=survivalFit,
data = population,
risk.table = riskTable,
pval = F,
xlim = c(0,populationSettings$riskWindowEnd),
ylim = c(yliml*0.95,1),
conf.int = confInt,
ggtheme = ggplot2::theme_minimal(),
risk.table.y.text.col = T,
risk.table.y.text = FALSE,
ylab = yLabel
)
return(result)
}
#' Plot all the PatientLevelPrediction plots
#'
#' @details
#' Create a directory with all the plots
#'
#' @param plpResult Object returned by the runPlp() function
#' @param saveLocation Name of the directory where the plots should be saved (NULL means no saving)
#' @param typeColumn The name of the column specifying the evaluation type
#' (to stratify the plots)
#'
#' @return
#' TRUE if it ran
#'
#' @export
plotPlp <- function(
plpResult,
saveLocation = NULL,
typeColumn = 'evaluation'
){
# check inputs
if(!is.null(saveLocation)){
if(!dir.exists(saveLocation)){
dir.create(saveLocation, recursive =T)
}
}
# run each of the plots:
tryCatch({
plotSparseRoc(
plpResult = plpResult,
typeColumn = typeColumn,
saveLocation = saveLocation,
fileName = 'sparseROC.pdf'
)},
error = function(e){ParallelLogger::logError('Issue with plotSparseRoc')})
tryCatch({
plotPredictedPDF(
plpResult,
saveLocation = saveLocation,
fileName = 'predictedPDF.pdf',
typeColumn = typeColumn)
},
error = function(e){ParallelLogger::logError('Issue with plotPredictedPDF')})
tryCatch({
plotPreferencePDF(
plpResult,
saveLocation = saveLocation,
fileName = 'preferencePDF.pdf',
typeColumn = typeColumn)
},
error = function(e){ParallelLogger::logError('Issue with plotPreferencePDF')})
tryCatch({
plotPrecisionRecall(
plpResult,
saveLocation = saveLocation,
fileName = 'precisionRecall.pdf',
typeColumn = typeColumn)
},
error = function(e){ParallelLogger::logError('Issue with plotPrecisionRecall')})
tryCatch({
plotF1Measure(
plpResult,
saveLocation = saveLocation,
fileName = 'f1Measure.pdf',
typeColumn = typeColumn)
},
error = function(e){ParallelLogger::logError('Issue with plotF1Measure')})
tryCatch({
plotDemographicSummary(
plpResult,
saveLocation = saveLocation,
fileName = 'demographicSummary.pdf',
typeColumn = typeColumn)
},
error = function(e){ParallelLogger::logError('Issue with plotDemographicSummary')})
# add smooth calibration
tryCatch({
plotSmoothCalibration(
plpResult = plpResult,
smooth = 'loess',
typeColumn = typeColumn,
saveLocation = saveLocation,
fileName = 'smoothCalibration.pdf'
)
},
error = function(e){ParallelLogger::logError('Issue with plotSmoothCalibration')})
tryCatch({
plotSparseCalibration(
plpResult,
saveLocation = saveLocation,
fileName = 'sparseCalibration.pdf',
typeColumn = typeColumn)
},
error = function(e){ParallelLogger::logError('Issue with plotSparseCalibration')})
tryCatch({
plotSparseCalibration2(
plpResult,
saveLocation = saveLocation,
fileName = 'sparseCalibrationConventional.pdf',
typeColumn = typeColumn)
},
error = function(e){ParallelLogger::logError('Issue with plotSparseCalibration2')})
tryCatch({
plotPredictionDistribution(
plpResult,
saveLocation = saveLocation,
fileName = 'predictionDistribution.pdf',
typeColumn = typeColumn)
},
error = function(e){ParallelLogger::logError('Issue with plotPredictionDistribution')})
tryCatch({
plotVariableScatterplot(
plpResult$covariateSummary,
saveLocation = saveLocation,
fileName = 'variableScatterplot.pdf'
)
},
error = function(e){ParallelLogger::logError('Issue with plotVariableScatterplot')})
if(sum(c('TrainWithNoOutcome_CovariateMean', 'TestWithNoOutcome_CovariateMean') %in% colnames(plpResult$covariateSummary))==2){
tryCatch({
plotGeneralizability(
plpResult$covariateSummary,
saveLocation = saveLocation,
fileName = 'generalizability.pdf'
)
},
error = function(e){ParallelLogger::logError('Issue with plotGeneralizability')})
}
return(invisible(TRUE))
}
#=============
# THERSHOLDSUMMARY PLOTS
#=============
#' Plot the ROC curve using the sparse thresholdSummary data frame
#'
#' @details
#' Create a plot showing the Receiver Operator Characteristics (ROC) curve.
#'
#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
#' @param typeColumn The name of the column specifying the evaluation type
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example
#' 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
#' supported file formats.
#'
#' @return
#' A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
#' format.
#'
#' @export
plotSparseRoc <- function(
plpResult,
typeColumn = 'evaluation',
saveLocation = NULL,
fileName = 'roc.png'
){
evalTypes <- unique(plpResult$performanceEvaluation$thresholdSummary[,typeColumn])
plots <- list()
length(plots) <- length(evalTypes)
for(i in 1:length(evalTypes)){
evalType <- evalTypes[i]
x <- plpResult$performanceEvaluation$thresholdSummary %>%
dplyr::filter(.data[[typeColumn]] == evalType) %>%
dplyr::select(.data$falsePositiveRate, .data$sensitivity)
#x <- thresholdSummary[,c('falsePositiveRate','sensitivity')]
x <- x[order(x$falsePositiveRate, x$sensitivity),]
# add the bit to get the step
stepsExtra <- cbind(x[-1,1], x[-nrow(x),2])
colnames( stepsExtra) <- colnames(x)
x <- rbind(c(1,1), x, stepsExtra, c(0,0))
x <- x[order(x$falsePositiveRate, x$sensitivity),]
plots[[i]] <- ggplot2::ggplot(
x,
ggplot2::aes(
.data$falsePositiveRate,
.data$sensitivity
)
) +
ggplot2::geom_polygon(fill = "blue", alpha = 0.2) +
ggplot2::geom_line(size=1) +
ggplot2::geom_abline(intercept = 0, slope = 1,linetype = 2) +
ggplot2::scale_x_continuous("1 - specificity", limits=c(0,1)) +
ggplot2::scale_y_continuous("Sensitivity", limits=c(0,1)) +
ggplot2::ggtitle(evalType)
}
plot <- gridExtra::marrangeGrob(plots, nrow=length(plots), ncol=1)
if (!is.null(saveLocation)){
if(!dir.exists(saveLocation)){
dir.create(saveLocation, recursive = T)
}
ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400)
}
return(plot)
}
#' Plot the Predicted probability density function, showing prediction overlap between true and false cases
#'
#' @details
#' Create a plot showing the predicted probability density function, showing prediction overlap between true and false cases
#'
#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
#' @param typeColumn The name of the column specifying the evaluation type
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example
#' 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
#' supported file formats.
#'
#' @return
#' A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
#' format.
#'
#' @export
plotPredictedPDF <- function(
plpResult,
typeColumn = 'evaluation',
saveLocation = NULL,
fileName = 'PredictedPDF.png'
)
{
evalTypes <- unique(plpResult$performanceEvaluation$thresholdSummary[,typeColumn])
plots <- list()
length(plots) <- length(evalTypes)
for(ind in 1:length(evalTypes)){
evalType <- evalTypes[ind]
x <- plpResult$performanceEvaluation$thresholdSummary %>%
dplyr::filter(.data[[typeColumn]] == evalType) %>%
dplyr::select(
.data$predictionThreshold,
.data$truePositiveCount,
.data$trueNegativeCount,
.data$falsePositiveCount,
.data$falseNegativeCount
)
x<- x[order(x$predictionThreshold,-x$truePositiveCount, -x$falsePositiveCount),]
x$out <- c(x$truePositiveCount[-length(x$truePositiveCount)]-x$truePositiveCount[-1], x$truePositiveCount[length(x$truePositiveCount)])
x$nout <- c(x$falsePositiveCount[-length(x$falsePositiveCount)]-x$falsePositiveCount[-1], x$falsePositiveCount[length(x$falsePositiveCount)])
vals <- c()
for(i in 1:length(x$predictionThreshold)){
if(i!=length(x$predictionThreshold)){
upper <- x$predictionThreshold[i+1]} else {upper <- min(x$predictionThreshold[i]+0.01,1)}
val <- x$predictionThreshold[i]+stats::runif(x$out[i])*(upper-x$predictionThreshold[i])
vals <- c(val, vals)
}
vals <- vals[!is.na(vals)] #assigned
vals2 <- c()
for(i in 1:length(x$predictionThreshold)){
if(i!=length(x$predictionThreshold)){
upper <- x$predictionThreshold[i+1]} else {upper <- min(x$predictionThreshold[i]+0.01,1)}
val2 <- x$predictionThreshold[i]+stats::runif(x$nout[i])*(upper-x$predictionThreshold[i])
vals2 <- c(val2, vals2)
}
vals2 <- vals2[!is.na(vals2)] #assigned
x <- rbind(data.frame(variable=rep('outcome',length(vals)), value=vals),
data.frame(variable=rep('No outcome',length(vals2)), value=vals2)
)
plots[[ind]] <- ggplot2::ggplot(x, ggplot2::aes(x=.data$value,
group=.data$variable,
fill=.data$variable)) +
ggplot2::geom_density(ggplot2::aes(x=.data$value, fill=.data$variable), alpha=.3) +
ggplot2::scale_x_continuous("Prediction Threshold") + #, limits=c(0,1)) +
ggplot2::scale_y_continuous("Density") +
ggplot2::guides(fill=ggplot2::guide_legend(title="Class")) +
ggplot2::ggtitle(evalType)
}
plot <- gridExtra::marrangeGrob(plots, nrow=length(plots), ncol=1)
if (!is.null(saveLocation)){
if(!dir.exists(saveLocation)){
dir.create(saveLocation, recursive = T)
}
ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400)
}
return(plot)
}
#' Plot the preference score probability density function, showing prediction overlap between true and false cases
#' #'
#' @details
#' Create a plot showing the preference score probability density function, showing prediction overlap between true and false cases
#' #'
#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
#' @param typeColumn The name of the column specifying the evaluation type
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example
#' 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
#' supported file formats.
#'
#' @return
#' A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
#' format.
#'
#' @export
plotPreferencePDF <- function(
plpResult,
typeColumn = 'evaluation',
saveLocation = NULL,
fileName = 'plotPreferencePDF.png'
){
evalTypes <- unique(plpResult$performanceEvaluation$thresholdSummary[,typeColumn])
plots <- list()
length(plots) <- length(evalTypes)
for(ind in 1:length(evalTypes)){
evalType <- evalTypes[ind]
x <- plpResult$performanceEvaluation$thresholdSummary %>%
dplyr::filter(.data[[typeColumn]] == evalType) %>%
dplyr::select(
.data$preferenceThreshold,
.data$truePositiveCount,
.data$trueNegativeCount,
.data$falsePositiveCount,
.data$falseNegativeCount
)
x<- x[order(x$preferenceThreshold,-x$truePositiveCount, x$trueNegativeCount),]
x$out <- c(x$truePositiveCount[-length(x$truePositiveCount)]-x$truePositiveCount[-1], x$truePositiveCount[length(x$truePositiveCount)])
x$nout <- c(x$falsePositiveCount[-length(x$falsePositiveCount)]-x$falsePositiveCount[-1], x$falsePositiveCount[length(x$falsePositiveCount)])
vals <- c()
for(i in 1:length(x$preferenceThreshold)){
if(i!=length(x$preferenceThreshold)){
upper <- x$preferenceThreshold[i+1]} else {upper <- 1}
val <- x$preferenceThreshold[i]+stats::runif(x$out[i])*(upper-x$preferenceThreshold[i])
vals <- c(val, vals)
}
vals <- vals[!is.na(vals)]
vals2 <- c()
for(i in 1:length(x$preferenceThreshold)){
if(i!=length(x$preferenceThreshold)){
upper <- x$preferenceThreshold[i+1]} else {upper <- 1}
val2 <- x$preferenceThreshold[i]+stats::runif(x$nout[i])*(upper-x$preferenceThreshold[i])
vals2 <- c(val2, vals2)
}
vals2 <- vals2[!is.na(vals2)]
x <- rbind(data.frame(variable=rep('outcome',length(vals)), value=vals),
data.frame(variable=rep('No outcome',length(vals2)), value=vals2)
)
plots[[ind]] <- ggplot2::ggplot(x, ggplot2::aes(x=.data$value,
group=.data$variable,
fill=.data$variable)) +
ggplot2::geom_density(ggplot2::aes(x=.data$value, fill=.data$variable), alpha=.3) +
ggplot2::scale_x_continuous("Preference Threshold")+#, limits=c(0,1)) +
ggplot2::scale_y_continuous("Density") +
ggplot2::guides(fill=ggplot2::guide_legend(title="Class")) +
ggplot2::ggtitle(evalType)
}
plot <- gridExtra::marrangeGrob(plots, nrow=length(plots), ncol=1)
if (!is.null(saveLocation)){
if(!dir.exists(saveLocation)){
dir.create(saveLocation, recursive = T)
}
ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400)
}
return(plot)
}
#' Plot the precision-recall curve using the sparse thresholdSummary data frame
#'
#' @details
#' Create a plot showing the precision-recall curve using the sparse thresholdSummary data frame
#'
#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
#' @param typeColumn The name of the column specifying the evaluation type
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example
#' 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
#' supported file formats.
#'
#' @return
#' A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
#' format.
#'
#' @export
plotPrecisionRecall <- function(
plpResult,
typeColumn = 'evaluation',
saveLocation = NULL,
fileName = 'roc.png'
){
evalTypes <- unique(plpResult$performanceEvaluation$thresholdSummary[,typeColumn])
plots <- list()
length(plots) <- length(evalTypes)
for(i in 1:length(evalTypes)){
evalType <- evalTypes[i]
N <- max(plpResult$performanceEvaluation$thresholdSummary %>%
dplyr::filter(.data[[typeColumn]] == evalType) %>%
dplyr::select(.data$falseCount) %>%
dplyr::pull(), na.rm = T)
O <- max(plpResult$performanceEvaluation$thresholdSummary %>%
dplyr::filter(.data[[typeColumn]] == evalType) %>%
dplyr::select(.data$trueCount) %>%
dplyr::pull(), na.rm = T)
inc <- O/(O + N)
x <- plpResult$performanceEvaluation$thresholdSummary %>%
dplyr::filter(.data[[typeColumn]] == evalType) %>%
dplyr::select(.data$positivePredictiveValue, .data$sensitivity)
plots[[i]] <- ggplot2::ggplot(x, ggplot2::aes(.data$sensitivity, .data$positivePredictiveValue)) +
ggplot2::geom_line(size=1) +
ggplot2::scale_x_continuous("Recall")+#, limits=c(0,1)) +
ggplot2::scale_y_continuous("Precision") + #, limits=c(0,1))
ggplot2::geom_hline(yintercept = inc, linetype="dashed",
color = "red", size=1) +
ggplot2::ggtitle(evalType)
}
plot <- gridExtra::marrangeGrob(plots, nrow=length(plots), ncol=1)
if (!is.null(saveLocation)){
if(!dir.exists(saveLocation)){
dir.create(saveLocation, recursive = T)
}
ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400)
}
return(plot)
}
#' Plot the F1 measure efficiency frontier using the sparse thresholdSummary data frame
#'
#' @details
#' Create a plot showing the F1 measure efficiency frontier using the sparse thresholdSummary data frame
#'
#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
#' @param typeColumn The name of the column specifying the evaluation type
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example
#' 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
#' supported file formats.
#'
#' @return
#' A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
#' format.
#'
#' @export
plotF1Measure <- function(
plpResult,
typeColumn = 'evaluation',
saveLocation = NULL,
fileName = 'roc.png'
){
evalTypes <- unique(plpResult$performanceEvaluation$thresholdSummary[,typeColumn])
plots <- list()
length(plots) <- length(evalTypes)
for(i in 1:length(evalTypes)){
evalType <- evalTypes[i]
x <- plpResult$performanceEvaluation$thresholdSummary %>%
dplyr::filter(.data[[typeColumn]] == evalType) %>%
dplyr::select(.data$predictionThreshold, .data$f1Score)
if(sum(is.nan(x$f1Score))>0){
x <- x[!is.nan(x$f1Score),]
if(nrow(x)==0){return(NULL)}
}
plots[[i]] <- ggplot2::ggplot(x, ggplot2::aes(.data$predictionThreshold, .data$f1Score)) +
ggplot2::geom_line(size=1) +
ggplot2::geom_point(size=1) +
ggplot2::scale_x_continuous("predictionThreshold")+#, limits=c(0,1)) +
ggplot2::scale_y_continuous("F1Score") +#, limits=c(0,1))
ggplot2::ggtitle(evalType)
}
plot <- gridExtra::marrangeGrob(plots, nrow=length(plots), ncol=1)
if (!is.null(saveLocation)){
if(!dir.exists(saveLocation)){
dir.create(saveLocation, recursive = T)
}
ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400)
}
return(plot)
}
#=============
# DEMOGRAPHICSUMMARY PLOTS
#=============
#' Plot the Observed vs. expected incidence, by age and gender
#'
#' @details
#' Create a plot showing the Observed vs. expected incidence, by age and gender
#' #'
#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
#' @param typeColumn The name of the column specifying the evaluation type
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example
#' 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
#' supported file formats.
#'
#' @return
#' A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
#' format.
#'
#' @export
plotDemographicSummary <- function(
plpResult,
typeColumn = 'evaluation',
saveLocation = NULL,
fileName = 'roc.png'
)
{
evalTypes <- unique(plpResult$performanceEvaluation$demographicSummary[,typeColumn])
plots <- list()
length(plots) <- length(evalTypes)
for(i in 1:length(evalTypes)){
evalType <- evalTypes[i]
x <- plpResult$performanceEvaluation$demographicSummary %>%
dplyr::filter(.data[[typeColumn]] == evalType) %>%
dplyr::select(
.data$ageGroup,
.data$genGroup,
.data$averagePredictedProbability,
.data$PersonCountAtRisk,
.data$PersonCountWithOutcome
)
# remove -1 values:
x$averagePredictedProbability[is.na(x$averagePredictedProbability)] <- 0
x <- x[x$PersonCountWithOutcome != -1,]
if(nrow(x)==0){
return(NULL)
}
x$observed <- x$PersonCountWithOutcome/x$PersonCountAtRisk
x <- x[,colnames(x)%in%c('ageGroup','genGroup','averagePredictedProbability','observed')]
# if age or gender missing add
if(sum(colnames(x)=='ageGroup')==1 && sum(colnames(x)=='genGroup')==0 ){
x$genGroup = rep('Non', nrow(x))
evaluation$demographicSummary$genGroup = rep('Non', nrow(evaluation$demographicSummary))
}
if(sum(colnames(x)=='ageGroup')==0 && sum(colnames(x)=='genGroup')==1 ){
x$ageGroup = rep('-1', nrow(x))
evaluation$demographicSummary$ageGroup = rep('-1', nrow(evaluation$demographicSummary))
}
x <- tidyr::pivot_longer(
data = x,
cols = colnames(x)[!colnames(x) %in% c('ageGroup','genGroup')],
names_to = 'variable',
values_to = "value"
)
#x <- reshape2::melt(x, id.vars=c('ageGroup','genGroup'))
# 1.96*StDevPredictedProbability
ci <- plpResult$performanceEvaluation$demographicSummary %>%
dplyr::filter(.data[[typeColumn]] == evalType) %>%
dplyr::select(
.data$ageGroup,
.data$genGroup,
.data$averagePredictedProbability,
.data$StDevPredictedProbability
)
ci$StDevPredictedProbability[is.na(ci$StDevPredictedProbability)] <- 1
ci$lower <- ci$averagePredictedProbability-1.96*ci$StDevPredictedProbability
ci$lower[ci$lower <0] <- 0
ci$upper <- ci$averagePredictedProbability+1.96*ci$StDevPredictedProbability
ci$upper[ci$upper >1] <- max(ci$upper[ci$upper <1])
x$age <- gsub('Age group:','', x$ageGroup)
x$age <- factor(x$age,levels=c(" 0-4"," 5-9"," 10-14",
" 15-19"," 20-24"," 25-29"," 30-34"," 35-39"," 40-44",
" 45-49"," 50-54"," 55-59"," 60-64"," 65-69"," 70-74",
" 75-79"," 80-84"," 85-89"," 90-94"," 95-99","-1"),ordered=TRUE)
x <- merge(x, ci[,c('ageGroup','genGroup','lower','upper')], by=c('ageGroup','genGroup'))
x <- x[!is.na(x$value),]
plots[[i]] <- ggplot2::ggplot(data=x,
ggplot2::aes(x=.data$age,
group=interaction(.data$variable,.data$genGroup))) +
ggplot2::geom_line(ggplot2::aes(y=.data$value, group=.data$variable,
color=.data$variable,
linetype = .data$variable))+
ggplot2::geom_ribbon(data=x[x$variable!='observed',],
ggplot2::aes(ymin=.data$lower, ymax=.data$upper
, group=.data$genGroup),
fill="blue", alpha=0.2) +
ggplot2::facet_grid(.~ .data$genGroup, scales = "free") +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 1)) +
ggplot2::scale_y_continuous("Fraction") +
ggplot2::scale_x_discrete("Age") +
ggplot2::scale_color_manual(values = c("royalblue4","red"),
guide = ggplot2::guide_legend(title = NULL),
labels = c("Expected", "Observed")) +
ggplot2::guides(linetype=FALSE) +
ggplot2::ggtitle(evalType)
}
plot <- gridExtra::marrangeGrob(plots, nrow=length(plots), ncol=1)
if (!is.null(saveLocation)){
if(!dir.exists(saveLocation)){
dir.create(saveLocation, recursive = T)
}
ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400)
}
return(plot)
}
#=============
# CALIBRATIONSUMMARY PLOTS
#=============
#' Plot the calibration
#'
#' @details
#' Create a plot showing the calibration
#' #'
#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
#' @param typeColumn The name of the column specifying the evaluation type
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example
#' 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
#' supported file formats.
#'
#' @return
#' A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
#' format.
#'
#' @export
plotSparseCalibration <- function(
plpResult,
typeColumn = 'evaluation',
saveLocation = NULL,
fileName = 'roc.png'
){
evalTypes <- unique(plpResult$performanceEvaluation$calibrationSummary[,typeColumn])
plots <- list()
length(plots) <- length(evalTypes)
for(i in 1:length(evalTypes)){
evalType <- evalTypes[i]
x <- plpResult$performanceEvaluation$calibrationSummary %>%
dplyr::filter(.data[[typeColumn]] == evalType) %>%
dplyr::select(.data$averagePredictedProbability, .data$observedIncidence)
maxVal <- max(x$averagePredictedProbability,x$observedIncidence)
model <- stats::lm(observedIncidence~averagePredictedProbability, data=x)
res <- model$coefficients
names(res) <- c('Intercept','Gradient')
# confidence int
interceptConf <- stats::confint(model)[1,]
gradientConf <- stats::confint(model)[2,]
cis <- data.frame(lci = interceptConf[1]+seq(0,1,length.out = nrow(x))*gradientConf[1],
uci = interceptConf[2]+seq(0,1,length.out = nrow(x))*gradientConf[2],
x=seq(0,1,length.out = nrow(x)))
x <- cbind(x, cis)
# TODO: CHECK INPUT
plots[[i]] <- ggplot2::ggplot(data=x,
ggplot2::aes(x=.data$averagePredictedProbability,
y=.data$observedIncidence
)) +
ggplot2::geom_ribbon(ggplot2::aes(ymin=.data$lci,ymax=.data$uci, x=x),
fill="blue", alpha=0.2) +
ggplot2::geom_point(size=1, color='darkblue') +
ggplot2::coord_cartesian(ylim = c(0, maxVal), xlim =c(0,maxVal)) +
ggplot2::geom_abline(intercept = 0, slope = 1, linetype = 2, size=1,
show.legend = TRUE) +
ggplot2::geom_abline(intercept = res['Intercept'], slope = res['Gradient'],
linetype = 1,show.legend = TRUE,
color='darkblue') +
ggplot2::scale_x_continuous("Average Predicted Probability") +
ggplot2::scale_y_continuous("Observed Fraction With Outcome") +
ggplot2::ggtitle(evalType)
}
plot <- gridExtra::marrangeGrob(plots, nrow=length(plots), ncol=1)
if (!is.null(saveLocation)){
if(!dir.exists(saveLocation)){
dir.create(saveLocation, recursive = T)
}
ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400)
}
return(plot)
}
#=============
# CALIBRATIONSUMMARY PLOTS 2
#=============
#' Plot the conventional calibration
#'
#' @details
#' Create a plot showing the calibration
#' #'
#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
#' @param typeColumn The name of the column specifying the evaluation type
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example
#' 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
#' supported file formats.
#'
#' @return
#' A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
#' format.
#'
#' @export
plotSparseCalibration2 <- function(
plpResult,
typeColumn = 'evaluation',
saveLocation = NULL,
fileName = 'roc.png'
){
evalTypes <- unique(plpResult$performanceEvaluation$calibrationSummary[,typeColumn])
plots <- list()
length(plots) <- length(evalTypes)
for(i in 1:length(evalTypes)){
evalType <- evalTypes[i]
x <- plpResult$performanceEvaluation$calibrationSummary %>%
dplyr::filter(.data[[typeColumn]] == evalType) %>%
dplyr::select(.data$averagePredictedProbability, .data$observedIncidence, .data$PersonCountAtRisk)
cis <- apply(x, 1, function(x) stats::binom.test(round(x[2]*x[3]), x[3], alternative = c("two.sided"), conf.level = 0.95)$conf.int)
x$lci <- cis[1,]
x$uci <- cis[2,]
maxes <- max(max(x$averagePredictedProbability), max(x$observedIncidence))*1.1
limits <- ggplot2::aes(ymax = .data$uci, ymin= .data$lci)
plots[[i]] <- ggplot2::ggplot(data=x,
ggplot2::aes(x=.data$averagePredictedProbability, y=.data$observedIncidence
)) +
ggplot2::geom_point(size=2, color='black') +
ggplot2::geom_errorbar(limits) +
ggplot2::geom_line(colour='darkgrey') +
ggplot2::geom_abline(intercept = 0, slope = 1, linetype = 5, size=0.4,
show.legend = TRUE) +
ggplot2::scale_x_continuous("Average Predicted Probability") +
ggplot2::scale_y_continuous("Observed Fraction With Outcome") +
ggplot2::coord_cartesian(xlim = c(0, maxes), ylim=c(0,maxes)) +
ggplot2::ggtitle(evalType)
}
plot <- gridExtra::marrangeGrob(plots, nrow=length(plots), ncol=1)
if (!is.null(saveLocation)){
if(!dir.exists(saveLocation)){
dir.create(saveLocation, recursive = T)
}
ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400)
}
return(plot)
}
#=============
# SMOOTHCALIBRATION plot
#=============
#' Plot the smooth calibration as detailed in Calster et al. "A calibration heirarchy for risk models
#' was defined: from utopia to empirical data" (2016)
#'
#' @details
#' Create a plot showing the smoothed calibration #'
#' @param plpResult The result of running \code{\link{runPlp}} function. An object containing the
#' model or location where the model is save, the data selection settings, the
#' preprocessing and training settings as well as various performance measures
#' obtained by the model.
#' @param smooth options: 'loess' or 'rcs'
#' @param span This specifies the width of span used for loess. This will allow for faster
#' computing and lower memory usage.
#' @param nKnots The number of knots to be used by the rcs evaluation. Default is 5
#' @param scatter plot the decile calibrations as points on the graph. Default is False
#' @param bins The number of bins for the histogram. Default is 20.
#' @param sample If using loess then by default 20,000 patients will be sampled to save time
#' @param typeColumn The name of the column specifying the evaluation type
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example
#' 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
#' supported file formats.
#' @return
#' A ggplot object.
#'
#' @export
plotSmoothCalibration <- function(plpResult,
smooth = "loess",
span = 0.75,
nKnots = 5,
scatter = FALSE,
bins = 20,
sample = TRUE,
typeColumn = 'evaluation',
saveLocation = NULL,
fileName = "smoothCalibration.pdf") {
if (!smooth %in% c("loess", "rcs")) stop(ParallelLogger::logError("Smooth type must be either 'loess' or 'rcs"))
if (nKnots < 3) stop(ParallelLogger::logError("Number of knots must be larger than 3"))
evalTypes <- unique(plpResult$performanceEvaluation$calibrationSummary[,typeColumn])
plots <- list()
length(plots) <- length(evalTypes)
failedEvalType <- rep(FALSE, length(evalTypes))
names(failedEvalType) <- evalTypes
for (i in seq_along(evalTypes)) {
evalType <- evalTypes[i]
ParallelLogger::logInfo(paste("Smooth calibration plot for "), evalType)
if('prediction'%in%names(plpResult)) {
x <- plpResult$performanceEvaluation$calibrationSummary %>%
dplyr::filter(.data[[typeColumn]] == evalType) %>%
dplyr::select(.data$averagePredictedProbability, .data$observedIncidence)
prediction <- plpResult$prediction %>% dplyr::filter(.data$evaluationType == evalType)
maxVal <- max(x$averagePredictedProbability, x$observedIncidence)
maxes <- max(max(x$averagePredictedProbability), max(x$observedIncidence))
if (smooth == "rcs") {
if (missing(nKnots)) {
ParallelLogger::logInfo("Number of knots for the restricted cubic spline smoothing was automatically set to 5")
}
}
y <- prediction$outcomeCount
p <- prediction$value
nma <- !is.na(p + y) # select only non-missing cases
sumNA <- sum(!nma)
if (sumNA > 0)
warning(ParallelLogger::logWarn(paste(sumNA, "observations deleted due to NA probabilities or outcomes")))
y <- y[nma]
p <- p[nma]
logit <- log(p/(1 - p)) # delete cases with 0 and 1 probs
nonInf <- !is.infinite(logit)
sumNonInf <- sum(!nonInf)
if (sumNonInf > 0)
warning(ParallelLogger::logWarn(paste(sumNonInf, "observations deleted due to probabilities of 0 or 1")))
y <- y[nonInf]
p <- p[nonInf]
y <- y[order(p)]
p <- p[order(p)]
if (smooth == "loess") {
if(sample){
if(length(p)>40000){
inds <- unique(c(0,seq(0, length(p), by = floor(length(p)/20000)),
length(p)))
p <- p[inds]
y <- y[inds]
} else if(length(p)>20000){
inds <- sample(length(p), 20000)
p <- p[inds]
y <- y[inds]
}
}
# loess
smoothData <- data.frame(y, p)
fit <- stats::loess(y ~ p, degree = 2)
predictedFit <- stats::predict(fit, se = TRUE)
smoothData <- smoothData %>%
dplyr::mutate(
calibration = predictedFit$fit,
se = predictedFit$se,
lci = .data$calibration - stats::qt(.975, predictedFit$df) * .data$se,
uci = .data$calibration + stats::qt(.975, predictedFit$df) * .data$se
)
xlim <- ylim <- c(0, 1)
smoothPlot <- plotSmoothCalibrationLoess(data = smoothData, span = span) +
ggplot2::coord_cartesian(
xlim = xlim,
ylim = ylim
)
} else {
# Restricted cubic splines
smoothData <- data.frame(y, p)
smoothPlot <- plotSmoothCalibrationRcs(data = smoothData, nKnots = nKnots)
if (is.character(smoothPlot)) {
plots[[i]] <- smoothPlot
failedEvalType[evalTypes[i]] <- TRUE
next
}
xlim <- ylim <- c(0, 1)
smoothPlot <- smoothPlot +
ggplot2::coord_cartesian(
xlim = xlim,
ylim = ylim
)
}
# construct the plot grid
if (scatter) {
smoothPlot <- smoothPlot +
ggplot2::geom_point(
data = x,
ggplot2::aes(
x = .data$averagePredictedProbability,
y = .data$observedIncidence),
color = "black",
size = 2
)
}
# Histogram object detailing the distibution of event/noevent for each probability interval
count <- NULL
histPlot <- ggplot2::ggplot() +
ggplot2::geom_histogram(
data = prediction,
ggplot2::aes(
x = .data$value,
y = ggplot2::after_stat(count),
fill = as.character(.data$outcomeCount) #MAYBE ISSUE
),
bins = bins,
position = "stack",
alpha = 0.5,
boundary = 0,
closed = "left"
) +
ggplot2::facet_grid(.data$outcomeCount ~ ., scales = "free_y") +
ggplot2::scale_fill_discrete(name = "Outcome") +
ggplot2::theme(
strip.background = ggplot2::element_blank(),
strip.text = ggplot2::element_blank()
) +
ggplot2::labs(x = "Predicted Probability") +
ggplot2::coord_cartesian(xlim = xlim)
} else {
# use calibrationSummary
sparsePred <- plpResult$performanceEvaluation$calibrationSummary %>%
dplyr::filter(.data[[typeColumn]] == evalType) %>%
dplyr::mutate(
y = .data$observedIncidence,
p = .data$averagePredictedProbability
)
smoothPlot <- plotSmoothCalibrationLoess(data = sparsePred, span = span)
# construct the plot grid
if (scatter) {
smoothPlot <- smoothPlot +
ggplot2::geom_point(
data = sparsePred,
ggplot2::aes(
x = .data$p,
y = .data$y
),
size = 1,
color = "black",
show.legend = FALSE
)
}
# Histogram object detailing the distibution of event/noevent for each probability interval
popData1 <- sparsePred[, c('averagePredictedProbability', 'PersonCountWithOutcome')]
popData1$Label <- "Outcome"
colnames(popData1) <- c('averagePredictedProbability', 'PersonCount', "Label")
popData2 <- sparsePred[,c('averagePredictedProbability', 'PersonCountAtRisk')]
popData2$Label <- "No Outcome"
popData2$PersonCountAtRisk <- -1 * (popData2$PersonCountAtRisk - popData1$PersonCount)
colnames(popData2) <- c('averagePredictedProbability', 'PersonCount', "Label")
popData <- rbind(popData1, popData2)
popData$averagePredictedProbability <- factor(popData$averagePredictedProbability)
histPlot <- ggplot2::ggplot(
data = popData,
ggplot2::aes(
y = .data$averagePredictedProbability,
x = .data$PersonCount,
fill = .data$Label
)
) +
ggplot2::geom_bar(
data = popData[popData$Label == "Outcome",],
stat = "identity"
) +
ggplot2::geom_bar(
data = popData[popData$Label == "No Outcome",],
stat = "identity"
) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::scale_x_continuous(labels = abs) +
ggplot2::coord_flip( ) +
ggplot2::theme(
axis.title.x=ggplot2::element_blank(),
axis.text.x=ggplot2::element_blank(),
axis.ticks.x=ggplot2::element_blank()
)
}
if (!failedEvalType[i]) {
plots[[i]]$smoothPlot <- smoothPlot
plots[[i]]$histPlot <- histPlot
}
}
names(plots) <- tolower(evalTypes)
if (!is.null(saveLocation)) {
if(!dir.exists(saveLocation)) {
dir.create(saveLocation, recursive = T)
}
for (i in seq_along(evalTypes)) {
if (!failedEvalType[i]) {
gridPlot <- gridExtra::grid.arrange(
plots[[i]]$smoothPlot,
plots[[i]]$histPlot,
ncol = 1,
nrow = 2,
heights=c(2,1)
)
fileNameComponents <- unlist(strsplit(fileName, split = "\\."))
n <- length(fileNameComponents)
if (n > 2) {
actualFileName <- paste(
fileNameComponents[1:(n - 1)],
collapse = "."
)
} else {
actualFileName = fileNameComponents[1]
}
saveFileName <- paste0(
actualFileName,
evalTypes[i],
".",
fileNameComponents[n]
)
ggplot2::ggsave(
file.path(saveLocation, saveFileName),
gridPlot,
width = 5,
height = 4.5,
dpi = 400
)
}
}
}
return(plots)
}
plotSmoothCalibrationLoess <- function(data, span = 0.75) {
fit <- stats::loess(y ~ p, data = data, degree = 2, span = span)
predictedFit <- stats::predict(fit, se = TRUE)
data <- data %>%
dplyr::mutate(
calibration = predictedFit$fit,
se = predictedFit$se,
lci = .data$calibration - stats::qt(.975, predictedFit$df) * .data$se,
uci = .data$calibration + stats::qt(.975, predictedFit$df) * .data$se
)
plot <- ggplot2::ggplot(
data = data,
ggplot2::aes(
x = .data$p,
y = .data$calibration
)
) +
ggplot2::geom_line(
ggplot2::aes(
color = "Loess",
linetype = "Loess"
)
) +
ggplot2::geom_ribbon(
ggplot2::aes(
ymin = .data$lci,
ymax = .data$uci
),
fill = "blue",
alpha = 0.2
) +
ggplot2::geom_segment(
ggplot2::aes(
x = 0,
xend = 1,
y = 0,
yend = 1,
color = "Ideal",
linetype = "Ideal"
)
) +
ggplot2::scale_linetype_manual(
name = "Models",
values = c(
Loess = "solid",
Ideal = "dashed"
)
) +
ggplot2::scale_color_manual(
name = "Models",
values = c(
Loess = "blue",
Ideal = "red"
)
) +
ggplot2::labs(
x = "Predicted Probability",
y = "Observed Probability"
)
return(plot)
}
plotSmoothCalibrationRcs <- function(data, nKnots) {
p <- data$p
for (k in nKnots:3) {
formSmooth <- paste0('y ~ rms::rcs(p, ', k, ')')
smoothFit <- suppressWarnings(rms::lrm(stats::as.formula(formSmooth), data = data, x = TRUE, y = TRUE))
smoothFitFail <- smoothFit$fail
if (smoothFitFail) {
if (k > 3) {
ParallelLogger::logInfo(paste0("Setting number of Knots to ", k, " led to estimation problems. Switching to nKnots = ", k-1))
} else {
ParallelLogger::logInfo(paste0('Unable to fit model'))
plot <- "Failed"
}
}
}
# If the fit failed for all nKnots return "Failed"
if (smoothFitFail) {
return(plot)
}
xRange <- seq(0, p[length(p)], length.out = 1000)
pred <- stats::predict(smoothFit, xRange, se.fit = T, type = "lp")
predXRange <- stats::plogis(pred$linear.predictors)
ciSmooth <- data.frame(
lci = stats::plogis(pred$linear.predictors - 1.96 * pred$se.fit),
uci = stats::plogis(pred$linear.predictors + 1.96 * pred$se.fit)
)
smoothData <- cbind(xRange, predXRange, ciSmooth)
plot <- ggplot2::ggplot(
data = smoothData,
ggplot2::aes(
x = .data$xRange,
y = .data$predXRange
)
) +
ggplot2::geom_line(
ggplot2::aes(
color = "rcs",
linetype = "rcs"
)
) +
ggplot2::geom_ribbon(
ggplot2::aes(
ymin = .data$lci,
ymax = .data$uci
),
fill = "blue",
alpha = 0.2
) +
ggplot2::geom_segment(
ggplot2::aes(
x = 0,
xend = 1,
y = 0,
yend = 1,
color = "Ideal",
linetype = "Ideal"
)
) +
ggplot2::scale_color_manual(
name = "Models",
values = c(rcs = "blue", Ideal = "red")
) +
ggplot2::scale_linetype_manual(
name = "Models",
values = c(rcs = "solid", Ideal = "dashed")
) +
ggplot2::labs(x = "", y = "Observed Probability")
return(plot)
}
#=============
# PREDICTIONSUMMARY PLOTS
#=============
#' Plot the side-by-side boxplots of prediction distribution, by class#'
#' @details
#' Create a plot showing the side-by-side boxplots of prediction distribution, by class
#' #'
#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
#' @param typeColumn The name of the column specifying the evaluation type
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example
#' 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
#' supported file formats.
#'
#' @return
#' A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
#' format.
#'
#' @export
plotPredictionDistribution <- function(
plpResult,
typeColumn = 'evaluation',
saveLocation = NULL,
fileName = 'PredictionDistribution.png'
){
evalTypes <- unique(plpResult$performanceEvaluation$predictionDistribution[,typeColumn])
plots <- list()
length(plots) <- length(evalTypes)
for(i in 1:length(evalTypes)){
evalType <- evalTypes[i]
x <- plpResult$performanceEvaluation$predictionDistribution %>%
dplyr::filter(.data[[typeColumn]] == evalType)
non05 <- x$P05PredictedProbability[x$class==0]
non95 <- x$P95PredictedProbability[x$class==0]
one05 <- x$P05PredictedProbability[x$class==1]
one95 <- x$P95PredictedProbability[x$class==1]
plots[[i]] <- ggplot2::ggplot(x, ggplot2::aes(x=as.factor(.data$class),
ymin=.data$MinPredictedProbability,
lower=.data$P25PredictedProbability,
middle=.data$MedianPredictedProbability,
upper=.data$P75PredictedProbability,
ymax=.data$MaxPredictedProbability,
color=as.factor(.data$class))) +
ggplot2::coord_flip() +
ggplot2::geom_boxplot(stat="identity") +
ggplot2::scale_x_discrete("Class") +
ggplot2::scale_y_continuous("Predicted Probability") +
ggplot2::theme(legend.position="none") +
ggplot2::geom_segment(ggplot2::aes(x = 0.9, y = non05,
xend = 1.1, yend = non05), color='red') +
ggplot2::geom_segment(ggplot2::aes(x = 0.9, y = non95,
xend = 1.1, yend = non95), color='red') +
ggplot2::geom_segment(ggplot2::aes(x = 1.9, y = one05,
xend = 2.1, yend = one05)) +
ggplot2::geom_segment(ggplot2::aes(x = 1.9, y = one95,
xend = 2.1, yend = one95)) +
ggplot2::ggtitle(evalType)
}
plot <- gridExtra::marrangeGrob(plots, nrow=length(plots), ncol=1)
if (!is.null(saveLocation)){
if(!dir.exists(saveLocation)){
dir.create(saveLocation, recursive = T)
}
ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400)
}
return(plot)
}
#=============
# COVARIATESUMMARY PLOTS
#=============
#' Plot the variable importance scatterplot
#'
#' @details
#' Create a plot showing the variable importance scatterplot
#' #'
#' @param covariateSummary A prediction object as generated using the
#' \code{\link{runPlp}} function.
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example
#' 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
#' supported file formats.
#'
#' @return
#' A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
#' format.
#'
#' @export
plotVariableScatterplot <- function(
covariateSummary,
saveLocation = NULL,
fileName = 'VariableScatterplot.png'
)
{
# remove the non-incidence variables from the plot
covariateSummary <- covariateSummary[covariateSummary$WithNoOutcome_CovariateMean <= 1,]
covariateSummary$size <- rep(0.1, nrow(covariateSummary))
covariateSummary$size[covariateSummary$covariateValue!=0] <- 0.5
plot <- ggplot2::ggplot(covariateSummary, ggplot2::aes(y=.data$WithOutcome_CovariateMean,
x=.data$WithNoOutcome_CovariateMean,
#size=abs(covariateValue)+0.1
size=.data$size
)) +
ggplot2::geom_point(ggplot2::aes(color = .data$size)) +
ggplot2::scale_size(range = c(0, 1)) +
ggplot2::scale_colour_gradient2(low = "red",mid = "blue", high="green") +
ggplot2::scale_y_continuous("Outcome Covariate Mean") +
ggplot2::scale_x_continuous("Non-outcome Covariate Mean") +
ggplot2::geom_abline(intercept = 0, slope = 1,linetype = 2) +
ggplot2::theme(legend.position="none")
if (!is.null(saveLocation)){
suppressWarnings(ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 3.5, dpi = 400))
}
return(plot)
}
#' Plot the train/test generalizability diagnostic
#'
#' @details
#' Create a plot showing the train/test generalizability diagnostic
#' #'
#' @param covariateSummary A prediction object as generated using the
#' \code{\link{runPlp}} function.
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example
#' 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
#' supported file formats.
#'
#' @return
#' A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
#' format.
#'
#' @export
plotGeneralizability <- function(
covariateSummary,
saveLocation = NULL,
fileName = 'Generalizability.png'){
covariateSummary$TrainWithOutcome_CovariateMean[is.na(covariateSummary$TrainWithOutcome_CovariateMean)] <- 0
covariateSummary$TestWithOutcome_CovariateMean[is.na(covariateSummary$TestWithOutcome_CovariateMean)] <- 0
covariateSummary <- covariateSummary[covariateSummary$TrainWithOutcome_CovariateMean <= 1,]
plot1 <- ggplot2::ggplot(covariateSummary,
ggplot2::aes(.data$TrainWithOutcome_CovariateMean,
.data$TestWithOutcome_CovariateMean)) +
ggplot2::geom_point() +
ggplot2::scale_x_continuous("Train set Mean") +
ggplot2::scale_y_continuous("Test Set Mean") +
ggplot2::theme(legend.position="none") +
ggplot2::geom_abline(intercept = 0, slope = 1,linetype = 2)+
ggplot2::ggtitle("Outcome")
covariateSummary$TrainWithNoOutcome_CovariateMean[is.na(covariateSummary$TrainWithNoOutcome_CovariateMean)] <- 0
covariateSummary$TestWithNoOutcome_CovariateMean[is.na(covariateSummary$TestWithNoOutcome_CovariateMean)] <- 0
plot2 <- ggplot2::ggplot(covariateSummary,
ggplot2::aes(.data$TrainWithNoOutcome_CovariateMean,
.data$TestWithNoOutcome_CovariateMean)) +
ggplot2::geom_point() +
ggplot2::scale_x_continuous("Train set Mean") +
ggplot2::scale_y_continuous("Test Set Mean") +
ggplot2::theme(legend.position="none") +
ggplot2::geom_abline(intercept = 0, slope = 1,linetype = 2) +
ggplot2::ggtitle("No Outcome")
plot <- gridExtra::grid.arrange(plot1, plot2, ncol=2)
if (!is.null(saveLocation))
ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 3.5, dpi = 400)
return(plot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.