#' Plot impact fractions corresponding to risk-quantiles over several risk factors
#'
#' @param x A PAF_q object. This is a dataframe that is created by running the function PAF_calc_continuous.
#' @param ... Other global arguments inherited by that might be passed to the ggplot routine
#' @return A ggplot2 plotting object for PAF_q over the differing risk factors in x
#' @export
#'
#' @examples
#' library(splines)
#' library(survival)
#' library(parallel)
#' options(boot.parallel="snow")
#' options(boot.ncpus=2)
#' # The above could be set to the number of available cores on the machine
#' model_continuous <- glm(formula = case ~ region * ns(age, df = 5) +
#' sex * ns(age, df = 5) + education +exercise + ns(diet, df = 3) +
#' alcohol + stress + ns(lipids,df = 3) + ns(waist_hip_ratio, df = 3) +
#' high_blood_pressure, family = "binomial", data = stroke_reduced)
#' out <- PAF_calc_continuous(model_continuous,riskfactor_vec=
#' c("diet","lipids","waist_hip_ratio"),q_vec=c(0.1,0.9),
#' ci=FALSE,calculation_method="B",data=stroke_reduced)
#' plot(out)
#' \donttest{
#' # example with more quantile points and including confidence intervals
#' # (more useful - but a bit slower to run)
#' out <- PAF_calc_continuous(model_continuous,riskfactor_vec=
#' c("diet","lipids","waist_hip_ratio"),q_vec=c(0.01, 0.1,0.3,0.5,0.7,0.9),
#' ci=TRUE,calculation_method="B",data=stroke_reduced)
#' plot(out)
#' }
plot.PAF_q <- function(x, ...){
#options(warn = -1)
if(!"riskfactor" %in% names(x)){
stop("data_frame should have a column named 'riskfactor' with the names of the risk factors or exposures being compared")
}
if(!"q_val" %in% names(x)){
stop("data_frame should have a column named 'q_val' specifying the intervention on the risk factor that is considered")
}
data_frame <- data.frame(riskfactor=x$riskfactor,q = x$q_val, paf_q=signif(x$paf_q,3))
riskfactor <- x$riskfactor
q_val <- x$q_val
pd <- ggplot2::position_dodge(0.01)
J <- ncol(data_frame)
if(J > 3){
colnames(data_frame)[(J-2):J] <- c("PAF","lower_CI", "upper_CI")
p <- ggplot2::ggplot(data_frame, ggplot2::aes(x=-q_val, y=PAF, colour=riskfactor)) + ggplot2::theme_grey(base_size = 20) +
ggplot2::geom_errorbar(ggplot2::aes(ymin=lower_CI, ymax=upper_CI),width=0.1, position=pd,size=1) + ggplot2::geom_line(ggplot2::aes(group=riskfactor,width=1),position=pd,size=1) +
ggplot2::labs(col = "Risk Factor")+ggplot2::ylab(expression(paste(hat(PAF[q]))))+ggplot2::xlab("q") + ggplot2::scale_x_continuous(breaks=c(-1,-.8,-0.6,-0.4,-0.2,0),labels=c(1,0.8,0.6,0.4,0.2,0))
}
if(J==3){
colnames(data_frame)[J] <- "PAF"
p <- ggplot2::ggplot(data_frame, ggplot2::aes(x=-q_val, y=PAF, colour=riskfactor)) + ggplot2::theme_grey(base_size = 20) + ggplot2::geom_line(ggplot2::aes(group=riskfactor,width=1),position=pd,size=1) +
ggplot2::labs(col = "Risk Factor")+ggplot2::ylab(expression(paste(hat(PAF[q]))))+ggplot2::xlab("q") + ggplot2::scale_x_continuous(breaks=c(-1,-.8,-0.6,-0.4,-0.2,0),labels=c(1,0.8,0.6,0.4,0.2,0))
}
p
}
plot_continuous_quick <- function(model,riskfactor,data,S = 10,ref_val=NA, ci_level=0.95,min_risk_q=.1,n_x=1000,plot_region=TRUE, plot_density=TRUE,theylab="OR",qlist){
data_orig <- data
if(n_x<=nrow(data)){
data <- data[sample(1:nrow(data),n_x,replace=FALSE),]
}
xmat <- model.matrix(model, data=data) # design matrix
col_indexes <- grep(paste('^.*',riskfactor,'.*$',sep=''),
colnames(xmat),perl=TRUE)
# calculate submatrix necessary for plot
submat <- xmat[,col_indexes]
# values of x in data
riskfactor_col <- grep(paste("^",riskfactor,"$",sep=''),colnames(data),perl=TRUE)
xvals <- data[,riskfactor_col]
model_frame <- model.matrix(model,data=data)
model_frame <- as.matrix(model_frame)
M <- median(xvals)
index <- which.min((xvals-M)^2)
if(!is.na(ref_val)){
index <- which.min((xvals-ref_val)^2)
}
median_mat <- matrix(rep(model_frame[index,],nrow(data)),nrow=nrow(data),byrow=TRUE)
model_frame <- model_frame-median_mat
pred <- model_frame[,col_indexes,drop=FALSE]%*%model$coefficients[col_indexes,drop=FALSE]
var <- diag(model_frame[,col_indexes,drop=FALSE]%*%vcov(model)[col_indexes,col_indexes,drop=FALSE]%*%t(model_frame[,col_indexes,drop=FALSE]))
z_val <- qnorm(1-(1-ci_level)/2)
lower <- exp(pred - z_val*sqrt(var))
upper <- exp(pred + z_val*sqrt(var))
middle <- exp(pred)
d <- data.frame(x=xvals,est=middle,lower=lower,upper=upper)
d <- d[order(d$x,decreasing=FALSE),]
gg <- ggplot2::ggplot(d, ggplot2::aes(x=x, y=middle)) + ggplot2::geom_line(ggplot2::aes(x=x, y=est),lwd=2) +
ggplot2::geom_ribbon(ggplot2::aes(ymin=lower, ymax=upper),alpha=0.5,fill='red')+ ggplot2::theme_grey(base_size = 20) +ggplot2::xlab(riskfactor)+ggplot2::ylab(theylab)
if(plot_region){
risk_quantiles <- risk_quantiles(riskfactor = riskfactor,data=data_orig,model=model,S=S,q=qlist)
d$min=0
d$max=0
for(j in 1:nrow(d)){
d$max[j] <- d$est[j]
index <- which.min((d$x[j]-risk_quantiles)^2)
if(as.numeric(names(risk_quantiles)[index])>min_risk_q) d$max[j] <- 0
}
#polygon(themat, col=adjustcolor( "blue", alpha.f = .2))
gg <- gg + ggplot2::geom_ribbon(ggplot2::aes(ymin=min, ymax=max),alpha=0.5,fill='blue',data=d)
}
if(plot_density){
d1 <- density(xvals)
scaling <- max(d$est)*0.7/max(pmax(d1$y))
xdensity=d1$x
ydensity=scaling*d1$y
newd <- data.frame(xdensity=xdensity,ydensity=ydensity)
newd <- newd[xdensity>=quantile(xvals,min(qlist))&xdensity<=quantile(xvals,max(qlist)),]
gg <- gg + ggplot2::geom_line(ggplot2::aes(x=xdensity,y=ydensity),lwd=2,col='black',data=newd)
}
gg
}
#' Plot hazard ratios, odds ratios or risk ratios comparing differing values of a continuous exposure to a reference level
#' @param model A fitted model (either glm, clogit or coxph)
#' @param riskfactor The string name of a continuous exposure or risk factor represented in the data and model
#' @param data Data frame used to fit the model
#' @param ref_val The reference value used in plotting. If left at NA, the median value of the risk factor is used
#' @param ci_level Numeric. A number between 0 and 1 specifying the confidence level
#' @param min_risk_q Default .1. A number between 0 and 1 representing the desired risk quantile for the continuous risk factor
#' @param plot_region Default TRUE. Logical specifying whether the targeted region corresponding to an intervention setting the continuous risk factor at a quantile min_risk_q or lower is to be plotted
#' @param plot_density Default TRUE. Logical specifying whether density of distribution of risk factor is to be added to the plot
#' @param n_x Default 10000. How many values of riskfactor will be used to plot spline (when interact=FALSE)
#' @param theylab Default "OR". Y-axis label of the plot
#' @param qlist Vector of quantile values for q, corresponding to the plotted values of PAF_q for each risk factor/exposure
#' @param interact Default "FALSE". Set to TRUE spline models enter as interactions.
#' @param S Default 10. The integer number of random samples used to calculate average differences in linear predictors. Only relevant to set when interact=TRUE
#' @return A ggplot2 plotting object
#' @export
#' @references Ferguson, J., Maturo, F., Yusuf, S. and O’Donnell, M., 2020. Population attributable fractions for continuously distributed exposures. Epidemiologic Methods, 9(1)
#'
#' @examples
#' library(survival)
#' library(splines)
#' model_continuous <- glm(formula = case ~ region * ns(age, df = 5) +
#' sex * ns(age, df = 5) + education +exercise + ns(diet, df = 3) +
#' alcohol + stress + ns(lipids,df = 3) + ns(waist_hip_ratio, df = 3) +
#' high_blood_pressure, family = "binomial", data = stroke_reduced)
#' plot_continuous(model_continuous,riskfactor="diet",data=stroke_reduced)
plot_continuous <- function(model,riskfactor,data,S = 10,ref_val=NA, ci_level=0.95,min_risk_q=.1,plot_region=TRUE, plot_density=TRUE,n_x=10000,theylab="OR", qlist=seq(from=0.001,to=0.999,by=0.001), interact=FALSE){
if(!interact){
return(plot_continuous_quick(model=model,riskfactor=riskfactor,data=data,ci_level=ci_level,min_risk_q=min_risk_q,n_x=n_x,plot_region=plot_region,plot_density=plot_density,theylab=theylab,qlist=qlist))
}
#options(warn = defaultW)
#options(warn = -1)
xmat <- model.matrix(model, data=data) # design matrix
col_indexes <- grep(paste('^.*',riskfactor,'.*$',sep=''),
colnames(xmat),perl=TRUE)
# calculate submatrix necessary for plot
submat <- xmat[,col_indexes]
# values of x in data
riskfactor_col <- grep(paste("^",riskfactor,"$",sep=''),colnames(data),perl=TRUE)
xvals <- data[,riskfactor_col]
# plot will be made at .001,.002,...,.999 quantiles of risk factor
plot_x_v <- quantile(xvals,qlist,na.rm=T)
# if sampling make dataframe same size as original by default
newS <- S
if(S<2) newS <- 2 # model.frame won't work on a dataset of size 1
data_s <- data[sample(1:nrow(data),newS),,drop=FALSE]
ind <- rep(1,S)
for(j in 1:ncol(data_s)){
if(is.factor(data[,j])) data_s[,j] <- factor(data_s[,j],levels=levels(data[,j]))
if(is.numeric(data[,j])) data_s[,j] <- as.numeric(data_s[,j])
}
newd <- data_s
newd[,riskfactor_col] <- plot_x_v[1]
a <- try(model_frame <-model.matrix(model,data=newd),silent=TRUE)
if(!substring(a[1],1,5)=="Error"){
model_frame <- model.matrix(model,data=newd)
for(i in 2:length(qlist)){
newd <- data_s
newd[,riskfactor_col] <- plot_x_v[i]
model_frame <- rbind(model_frame,model.matrix(model,data=newd))
ind <- c(ind, rep(i,S))
}
}else
{
N <- nrow(data)
model_frame <- model.matrix(model,data=rbind(newd,data[1:N,]))
model_frame <- model_frame[(1:(nrow(model_frame)-N)),]
for(i in 2:length(qlist)){
newd <- data_s
newd[,riskfactor_col] <- plot_x_v[i]
model_frame <- rbind(model_frame,model.matrix(model,data=rbind(newd,data[1:N,])))
model_frame <- model_frame[(1:(nrow(model_frame)-N)),]
ind <- c(ind, rep(i,S))
}
}
# collapse model_frame now over ind
model_frame <- as.data.frame(model_frame)
model_frame$ind <- ind
model_frame <- dplyr::group_by(model_frame,ind)
stuff <- dplyr::funs(mean)
model_frame <- dplyr::summarize_all(model_frame,stuff)
model_frame <- model_frame[,colnames(model_frame)!="ind"]
model_frame <- as.matrix(model_frame)
# new predictions (pred- pred at median)
i1 <- length(unique(ind))/2
if(i1%%1 == 0) i2 <- i1+1
if(i1%%1 == 0.5){
i1 <- i1+0.5
i2 <- i1-11
}
median <- (0.5*model_frame[i1,,drop=FALSE] + 0.5*model_frame[i1,,drop=FALSE])
if(!is.na(ref_val)){
if(sum(plot_x_v<ref_val)==0 | sum(plot_x_v<ref_val)>=length(plot_x_v)){
stop(paste0("specify reference riskfactor between ", signif(plot_x_v[1],3)," and ", signif(plot_x_v[length(plot_x_v)],3) ))
}
l <- length(plot_x_v[plot_x_v<ref_val])
w <- (ref_val-plot_x_v[l])/(plot_x_v[l+1]-plot_x_v[l])
median <- w*model_frame[l+1,,drop=FALSE]+(1-w)*model_frame[l,,drop=FALSE]
}
median_mat <- matrix(rep(median,length(qlist)),nrow=length(qlist),byrow=TRUE)
model_frame <- model_frame-median_mat
pred <- model_frame[,col_indexes,drop=FALSE]%*%model$coefficients[col_indexes,drop=FALSE]
var <- diag(model_frame[,col_indexes,drop=FALSE]%*%vcov(model)[col_indexes,col_indexes,drop=FALSE]%*%t(model_frame[,col_indexes,drop=FALSE]))
z_val <- qnorm(1-(1-ci_level)/2)
lower <- exp(pred - z_val*sqrt(var))
upper <- exp(pred + z_val*sqrt(var))
middle <- exp(pred)
d <- data.frame(x=plot_x_v,est=middle,lower=lower,upper=upper)
gg <- ggplot2::ggplot(d, ggplot2::aes(x=plot_x_v, y=middle)) + ggplot2::geom_line(ggplot2::aes(x=plot_x_v, y=middle),lwd=2) +
ggplot2::geom_ribbon(ggplot2::aes(ymin=lower, ymax=upper),alpha=0.5,fill='red')+ ggplot2::theme_grey(base_size = 20) +ggplot2::xlab(riskfactor)+ggplot2::ylab(theylab)
if(plot_region){
risk_quantiles <- risk_quantiles(riskfactor = riskfactor,data=data,model=model,S=S,q=qlist)
d$min=0
d$max=0
for(j in 1:nrow(d)){
d$max[j] <- d$est[j]
index <- which.min((d$x[j]-risk_quantiles)^2)
if(as.numeric(names(risk_quantiles)[index])>min_risk_q) d$max[j] <- 0
}
#polygon(themat, col=adjustcolor( "blue", alpha.f = .2))
gg <- gg + ggplot2::geom_ribbon(ggplot2::aes(ymin=min, ymax=max),alpha=0.5,fill='blue',data=d)
}
if(plot_density){
d1 <- density(xvals)
scaling <- max(d$est)*0.7/max(pmax(d1$y))
xdensity=d1$x
ydensity=scaling*d1$y
newd <- data.frame(xdensity=xdensity,ydensity=ydensity)
newd <- newd[xdensity>=quantile(xvals,min(qlist))&xdensity<=quantile(xvals,max(qlist)),]
gg <- gg + ggplot2::geom_line(ggplot2::aes(x=xdensity,y=ydensity),lwd=2,col='black',data=newd)
}
#options(warn = defaultW)
gg
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.