#' @title Piecewise linear Mendelian randomization
#'
#' @description `piecewise_mr` performs a Mendelian randomization (MR) analysis
#' by fitting a piecewise linear function to localised average causal effects.
#'
#' @name piecewise_mr
#'
#' @import metafor
#'
#' @param y vector of outcome values
#'
#' @param x vector of exposure values
#'
#' @param g the instrumental variable
#'
#' @param covar `data.frame` of covariates
#'
#' @param family a description of the error distribution and link function
#' to be used in the model and is a `character` string naming either the
#' gaussian (i.e. `"gaussian"` for continuous data) or binomial
#' (i.e. `"binomial"` for binary data) family function (default: `"gaussian"`)
#'
#' @param q the number of quantiles the exposure distribution is to be split
#' into within which a causal effect will be fitted, known as localised
#' average causal effects (LACE) (default: `10`)
#'
#' @param nboot the number of bootstrap replications (default: `100`)
#'
#' @param fig a `logical` statement as to whether the user wants the results
#' displayed in a figure (default: `TRUE`)
#'
#' @param ref the reference point for the figure, this is the value of the
#' function that represents the expected difference in the outcome compared
#' with this reference value when the exposure is set to different values
#' (default: `mean(x)`)
#'
#' @param pref_x the prefix/label for the x-axis (default: `"x"`)
#'
#' @param pref_x_ref the prefix for the reference value displayed on the y-axis
#' (default: `"x"`)
#'
#' @param pref_y the prefix/label for the y-axis (default: `"y"`)
#'
#' @param ci_quantiles the number of quantiles at which confidence intervals
#' are to be displayed (default: `10`)
#'
#' @param breaks breaks on the y-axis of the figure
#'
#' @return `piecewise_mr` returns a `list` of non-linear MR results from the
#' piecewise linear function MR approach:
#'
#' @return \item{n}{number of individuals}
#'
#' @return \item{model}{
#' the model specifications:
#' number of quantiles (`q`),
#' number of bootstrap replications performed (`nboot`)
#' }
#'
#' @return \item{coefficients}{
#' the regression estimates:
#' regression coefficients (`beta`),
#' standard errors of regression coefficients (`se`),
#' lower 95% confidence interval (`lci`),
#' upper 95% confidence interval (`uci`),
#' p-value (`pval`)
#' }
#'
#' @return \item{lace}{
#' the localised average causal effect estimate in each quantile:
#' regression coefficients (`beta`),
#' standard errors of regression coefficients (`se`),
#' lower 95% confidence interval (`lci`),
#' upper 95% confidence interval (`uci`),
#' p-value (`pval`)
#' }
#'
#' @return \item{xcoef}{
#' the association between the instrument and the exposure in each quantile:
#' regression coefficients (`beta`),
#' standard errors of regression coefficients (`se`)
#' }
#'
#' @return \item{p_tests}{
#' the p-value of the non-linearity tests:
#' p-value from the quadratic test (`quad`),
#' p-value from the Cochran Q test (`Q`)
#' }
#'
#' @return \item{p_heterogeneity}{
#' the p-value of heterogeneity:
#' p-value of the Cochran Q heterogeneity test (`Q`),
#' p-value from the trend test (`trend`).
#' }
#'
#' @examples
#' # IV (g), exposure (x) & outcome (y)
#' epsx <- rexp(10000)
#' u <- runif(10000, 0, 1)
#' g <- rbinom(10000, 2, 0.3)
#' epsy <- rnorm(10000)
#' ag <- 0.25
#' x <- 1 + ag * g + u + epsx
#' y <- 0.15 * x^2 + 0.8 * u + epsy
#'
#' # Covariates (covar)
#' c1 <- rnorm(10000)
#' c2 <- rnorm(10000)
#' c3 <- rbinom(10000, 2, 0.33)
#' covar <- data.frame(c1 = c1, c2 = c2, c3 = as.factor(c3))
#'
#' # Analyses
#' fp <- fracpoly_mr(
#' y = y, x = x, g = g, covar = covar,
#' family = "gaussian", q = 10, d = 1, ci = "model_se",
#' fig = TRUE
#' )
#' summary(fp)
#' plm <- piecewise_mr(
#' y = y, x = x, g = g, covar = covar,
#' family = "gaussian", q = 10, nboot = 100,
#' fig = TRUE
#' )
#' summary(plm)
#'
#' @author James Staley <jrstaley95@gmail.com>
#'
#' @export
#' @md
piecewise_mr <- function(y, x, g, covar = NULL,
family = "gaussian", q = 10, xpos = "mean", nboot = 100,
fig = TRUE, ref = mean(x), pref_x = "x", pref_x_ref = "x", pref_y = "y",
ci_quantiles = 10, breaks = NULL) {
# Errors
if (!(is.vector(y) && is.vector(x) && is.vector(g))) {
stop("either the outcome, exposure or instrument is not a vector")
}
if (!is.null(covar) && !is.data.frame(covar)) {
stop("covar has to be a data.frame")
}
if (
!(
(is.numeric(y) || is.integer(y)) &&
(is.numeric(x) || is.integer(x)) &&
(is.numeric(g) || is.integer(g))
)
) {
stop("either the outcome, exposure or instrument is not numeric")
}
if (length(y) <= 1) {
stop("the outcome is less than or equal to a single value")
}
if (length(y) != length(x) || length(y) != length(g)) {
stop(
"the number of observations for the outcome, exposure and instrument ",
"are not all the same"
)
}
if (!is.null(covar) && nrow(covar) != length(y)) {
stop(
"the number of observations for the outcome and covariates are not ",
"the same"
)
}
if (any(is.na(y)) || any(is.na(x)) || any(is.na(g))) {
stop(
"there are missing values in either the outcome, exposure or ",
"instrument"
)
}
if (!is.null(covar) && any(is.na(covar))) {
stop("there are missing values in the covariates")
}
if (!(family == "gaussian" || family == "binomial")) {
stop("family has to be equal to either \"gaussian\" or \"binomial\"")
}
if (family == "binomial" && any(!(y == 1 | y == 0))) {
stop("y has to be 0 or 1 if family is equal to \"binomial\"")
}
if ((length(y) / 10) < q) {
stop("the quantiles should contain at least 10 observations")
}
# Covariates
if (!is.null(covar)) {
covar <- model.matrix(as.formula(~.), data = covar)[, -1, drop = FALSE]
if (any(is.na(covar))) {
stop("there are missing values in the covariates")
}
}
# x0 (IV-Free)
ivf <- iv_free(y = y, x = x, g = g, covar = covar, q = q, family = family)
x0 <- ivf$x0
xcoef <- ivf$xcoef
x0q <- ivf$x0q
# LACE
loc <- lace(
y = y, x = x, g = g, covar = covar,
q = q, x0q = x0q, xc_sub = TRUE,
family = family, xpos = xpos
)
coef <- loc$coef / xcoef
coef_se <- loc$coef_se / xcoef
xmean <- loc$xmean
xcoef_sub <- loc$xcoef_sub
xcoef_sub_se <- loc$xcoef_sub_se
# Test of IV-exposure assumption
p_het <- 1 - pchisq(rma(xcoef_sub, vi = (xcoef_sub_se)^2)$QE, df = (q - 1))
p_het_trend <- rma.uni(
xcoef_sub ~ xmean,
vi = xcoef_sub_se^2,
method = "DL"
)$pval[2]
# Non-linearity tests
p_quadratic <- rma(coef ~ xmean, (coef_se)^2, method = "FE")$pval[2]
p_Q <- 1 - pchisq(rma(coef, vi = (coef_se)^2)$QE, df = (q - 1))
# Results
lci <- coef - 1.96 * coef_se
uci <- coef + 1.96 * coef_se
pval <- 2 * pnorm(-abs(coef / coef_se))
# Figure
if (fig == TRUE) {
figure <- piecewise_figure(
y = y, x = x, g = g, covar = covar,
q = q, xcoef = xcoef, coef = coef, x0q = x0q,
family = family, nboot = nboot, ref = ref,
pref_x = pref_x, pref_x_ref = pref_x_ref, pref_y = pref_y,
ci_quantiles = ci_quantiles, breaks = breaks
)
}
# Output
model <- as.matrix(data.frame(q = q, nboot = nboot))
lace <- as.matrix(
data.frame(beta = coef, se = coef_se, lci = lci, uci = uci, pval = pval)
)
rownames(lace) <- seq_len(nrow(lace))
xcoef_quant <- as.matrix(data.frame(beta = xcoef_sub, se = xcoef_sub_se))
rownames(xcoef_quant) <- seq_len(nrow(xcoef_quant))
p_tests <- as.matrix(data.frame(quad = p_quadratic, Q = p_Q))
p_heterogeneity <- as.matrix(data.frame(Q = p_het, trend = p_het_trend))
if (fig == FALSE) {
output <- list(
n = length(y), model = model,
lace = lace, xcoef = xcoef_quant,
p_tests = p_tests, p_heterogeneity = p_heterogeneity
)
} else {
output <- list(
n = length(y), model = model,
lace = lace, xcoef = xcoef_quant,
p_tests = p_tests, p_heterogeneity = p_heterogeneity,
figure = figure
)
}
class(output) <- "piecewise_mr"
# Return
return(output)
}
#' @title Print piecewise_mr
#'
#' @description print method for class `"piecewise_mr"`.
#'
#' @param x an object of class `"piecewise_mr"`
#'
#' @author James Staley <jrstaley95@gmail.com>
#'
#' @export
#' @md
print.piecewise_mr <- function(x, ...) {
cat("\nCall: \npiecewise_mr")
cat("\n\nCoefficients:\n")
cat(x$lace[, 1])
cat("\n\n")
if (!is.null(x$figure)) {
plot(x$figure)
}
}
#' @title Summary of piecewise_mr
#'
#' @description summary method for class `"piecewise_mr"`.
#'
#' @param x an object of class `"piecewise_mr"`
#'
#' @author James Staley <jrstaley95@gmail.com>
#'
#' @export
#' @md
summary.piecewise_mr <- function(x, ...) {
model <- as.data.frame(x$model)
n <- x$n
coefficients <- as.data.frame(x$lace)
p_tests <- as.data.frame(x$p_tests)
p_heterogeneity <- as.data.frame(x$p_heterogeneity)
if (is.null(x$figure)) {
output <- list(
model = model, n = n,
coefficients = coefficients,
p_tests = p_tests, p_heterogeneity = p_heterogeneity
)
}
if (!is.null(x$figure)) {
output <- list(
model = model, n = n,
coefficients = coefficients,
p_tests = p_tests, p_heterogeneity = p_heterogeneity,
figure = x$figure
)
}
class(output) <- "summary.piecewise_mr"
return(output)
}
#' @title Print summary of piecewise_mr
#'
#' @description print.summary method for class `"piecewise_mr"`.
#'
#' @param x an object of class `"piecewise_mr"`.
#'
#' @author James Staley <jrstaley95@gmail.com>
#'
#' @export
#' @md
print.summary.piecewise_mr <- function(x, ...) {
cat("Call: piecewise_mr")
cat(
"\n\nNumber of individuals: ", x$n, "; ",
"Quantiles: ", as.character(x$model$q), "; ",
"Number of bootstrap replications: ", as.character(x$model$nboot),
sep = ""
)
cat("\n\nLACE:\n")
names(x$coefficients) <- c(
"Estimate", "Std. Error", "95%CI Lower", "95%CI Upper", "p.value"
)
printCoefmat(x$coefficients, P.value = TRUE, has.Pvalue = TRUE)
cat("\nNon-linearity tests")
cat("\nQuadratic p-value:", signif(x$p_tests$quad, digits = 3))
cat("\nCochran Q p-value:", signif(x$p_tests$Q, digits = 3))
cat("\n\nHeterogeneity tests")
cat("\nCochran Q p-value:", signif(x$p_heterogeneity$Q, digits = 3))
cat("\nTrend p-value:", signif(x$p_heterogeneity$trend, digits = 3))
cat("\n")
if (!is.null(x$figure)) {
plot(x$figure)
}
}
#' @title Piecewise linear figure
#'
#' @description `piecewise_figure` plots the piecewise linear function.
#'
#' @name piecewise_figure
#'
#' @import matrixStats
#'
#' @import ggplot2
#'
#' @param y vector of outcome values
#'
#' @param x vector of exposure values
#'
#' @param g the instrumental variable
#'
#' @param covar `data.frame` of covariates
#'
#' @param q the number of quantiles the exposure distribution is to be split
#' into within which a causal effect will be fitted, known as localised
#' average causal effects (LACE) (default: `10`)
#'
#' @param xcoef the association between the exposure and the instrument
#'
#' @param coef the coefficients of the localised causal effects
#'
#' @param x0q quantiles of x0 (the IV-free exposure)
#'
#' @param family a description of the error distribution and link function
#' to be used in the model and is a `character` string naming either the
#' gaussian (i.e. `"gaussian"` for continuous data) or binomial
#' (i.e. `"binomial"` for binary data) family function (default: `"gaussian"`)
#'
#' @param nboot the number of bootstrap replications (default: `100`)
#'
#' @param ref the reference point for the figure, this is the value of the
#' function that represents the expected difference in the outcome compared
#' with this reference value when the exposure is set to different values
#' (default: `mean(x)`)
#'
#' @param pref_x the prefix/label for the x-axis (default: `"x"`)
#'
#' @param pref_x_ref the prefix for the reference value displayed on the y-axis
#' (default: `"x"`)
#'
#' @param pref_y the prefix/label for the y-axis (default: `"y"`)
#'
#' @param ci_quantiles the number of quantiles at which confidence intervals
#' are to be displayed (default: `10`)
#'
#' @param breaks breaks on the y-axis of the figure
#'
#' @return `piecewise_figure` returns a plot of the piecewise linear function.
#'
#' @author James Staley <jrstaley95@gmail.com>
#'
#' @noRd
#' @md
piecewise_figure <- function(y, x, g, covar = NULL,
q = 10, xcoef, coef, x0q,
family = "gaussian", nboot = 100, ref = mean(x),
pref_x = "x", pref_x_ref = "x", pref_y = "y",
ci_quantiles = 10, breaks = NULL) {
# Piecewise function
m <- NULL
l <- q + 1
prob <- 1 / q
quantiles_x <- quantile(x, probs = seq(0, 1, prob))
x_quantiles <- cut(x, quantiles_x, include.lowest = TRUE)
x_quantiles <- as.numeric(x_quantiles)
for (i in 1:(l - 1)) {
if (quantiles_x[i] <= ref && quantiles_x[(i + 1)] >= ref) {
ref_pos <- i + 1
}
}
for (k in 1:l) {
if (k == 1) {
m[k] <- quantile(x, probs = 0.0000000001)
}
if (k == l) {
m[k] <- quantile(x, probs = 0.9999999999)
}
if (k > 1 && k < l) {
m[k] <- max(x[x_quantiles == k - 1])
}
}
y_mm <- NULL
for (k in 1:l) {
if (k == 1) {
y_mm[k] <- 0
}
if (k >= 2) {
y_mm[k] <- (coef[k - 1] * m[k] - coef[k - 1] * m[k - 1]) + y_mm[k - 1]
}
if (k == ref_pos) {
y_ref <- (coef[k - 1] * ref - coef[k - 1] * m[k - 1]) + y_mm[k - 1]
}
}
y_mm_ref <- y_mm - y_ref
prob <- 1 / ci_quantiles
ci_quant <- as.numeric(quantile(x, probs = seq(0, 1, prob)))
x_quantiles_ci <- cut(x, ci_quant, include.lowest = TRUE)
x_quantiles_ci <- as.numeric(x_quantiles_ci)
y_mm_quant <- NULL
xmean_quant <- NULL
for (j in 1:ci_quantiles) {
x_ci <- mean(x[x_quantiles_ci == j])
xmean_quant[j] <- mean(x[x_quantiles_ci == j])
for (i in 1:(l - 1)) {
if (quantiles_x[i] <= x_ci && quantiles_x[(i + 1)] >= x_ci) {
ci_pos <- i + 1
}
}
for (k in 1:l) {
if (k == ci_pos) {
y_mm_quant[j] <- (coef[k - 1] * x_ci - coef[k - 1] * m[k - 1]) +
y_mm[k - 1]
}
}
}
y_mm_quant_ref <- y_mm_quant - y_ref
# Bootstrap CIs
non_p_boot <- matrix(NA, nrow = nboot, ncol = q)
for (i in 1:nboot) {
indices <- sample.int(length(y), size = length(y), replace = TRUE)
y_boot <- y[indices]
x_boot <- x[indices]
g_boot <- g[indices]
if (!is.null(covar)) {
covar_boot <- as.matrix(covar[indices, , drop = FALSE])
} else {
covar_boot <- NULL
}
x0qb <- x0q[indices]
for (j in 1:q) {
if (!is.null(covar_boot)) {
if (family == "gaussian") {
non_p_boot[i, j] <- (
lm(y_boot[x0qb == j] ~ g_boot[x0qb == j] + covar_boot[x0qb == j, , drop = FALSE])$coef[2]
) / xcoef
}
if (family == "binomial") {
non_p_boot[i, j] <- (
glm(y_boot[x0qb == j] ~ g_boot[x0qb == j] + covar_boot[x0qb == j, , drop = FALSE], family = binomial)$coef[2]
) / xcoef
}
} else {
if (family == "gaussian") {
non_p_boot[i, j] <- (
lm(y_boot[x0qb == j] ~ g_boot[x0qb == j])$coef[2]
) / xcoef
}
if (family == "binomial") {
non_p_boot[i, j] <- (
glm(y_boot[x0qb == j] ~ g_boot[x0qb == j], family = binomial)$coef[2]
) / xcoef
}
}
}
}
y_boot <- matrix(0, nrow = nrow(non_p_boot), ncol = l)
y_mm_boot_ref <- matrix(0, nrow = nrow(non_p_boot), ncol = l)
for (i in seq_len(nrow(non_p_boot))) {
for (k in 1:l) {
if (k == ref_pos) {
y_boot_ref <- (non_p_boot[i, k - 1] * ref - non_p_boot[i, k - 1] * m[k - 1]) +
y_boot[i, k - 1]
}
}
for (k in 1:l) {
if (k == 1) {
y_boot[i, k] <- 0
}
if (k >= 2) {
y_boot[i, k] <- (non_p_boot[i, k - 1] * m[k] - non_p_boot[i, k - 1] * m[k - 1]) +
y_boot[i, k - 1]
}
if (k == ref_pos) {
y_boot_ref <- (non_p_boot[i, k - 1] * ref - non_p_boot[i, k - 1] * m[k - 1]) +
y_boot[i, k - 1]
}
}
y_mm_boot_ref[i, ] <- y_boot[i, ] - y_boot_ref
}
y_quant_boot_ref <- matrix(NA, nrow = nrow(non_p_boot), ncol = ci_quantiles)
for (j in 1:ci_quantiles) {
x_ci <- mean(x[x_quantiles_ci == j])
for (w in 1:(l - 1)) {
if (quantiles_x[w] <= x_ci && quantiles_x[(w + 1)] >= x_ci) {
ci_pos <- w + 1
}
}
for (i in seq_len(nrow(non_p_boot))) {
for (k in 1:l) {
if (k == ref_pos) {
y_boot_ref <- (non_p_boot[i, k - 1] * ref - non_p_boot[i, k - 1] * m[k - 1]) +
y_boot[i, k - 1]
}
}
for (k in 1:l) {
if (k == ci_pos) {
y_quant_boot <- (non_p_boot[i, k - 1] * x_ci - non_p_boot[i, k - 1] * m[k - 1]) +
y_boot[i, k - 1]
}
}
y_quant_boot_ref[i, j] <- y_quant_boot - y_boot_ref
}
}
y_lci <- colQuantiles(y_quant_boot_ref, probs = 0.025)
y_uci <- colQuantiles(y_quant_boot_ref, probs = 0.975)
# Figure
plot.data <- data.frame(x = m, y = y_mm_ref)
plot.data.1 <- data.frame(
x = xmean_quant, y = y_mm_quant_ref, y_lci = y_lci, y_uci = y_uci
)
plot.data.2 <- data.frame(x = ref, y = 0)
if (family != "binomial") {
figure <- ggplot(data = plot.data, mapping = aes(x)) +
geom_hline(aes(yintercept = 0), colour = "grey") +
geom_line(aes(x = x, y = y), colour = "black") +
geom_errorbar(
mapping = aes(x = x, ymin = y_lci, ymax = y_uci), data = plot.data.1,
color = "grey", width = 0.025
) +
geom_point(
mapping = aes(x = x, y = y), data = plot.data.1,
colour = "black", size = 3
) +
geom_point(
mapping = aes(x = x, y = y), data = plot.data.2,
colour = "red", size = 3
) +
theme_bw() +
labs(
x = pref_x,
y = bquote(
.(pref_y) ~
" [" ~ .(pref_x_ref)["ref"] ~ "=" ~ .(round(ref, 2)) ~ "]"
)
) +
theme(
axis.title = element_text(vjust = 0.5, size = 16),
axis.text = element_text(size = 14),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
if (!is.null(breaks)) {
figure <- figure +
scale_y_continuous(breaks = breaks)
}
}
if (family == "binomial") {
pref_y <- paste0("Odds ratio of ", pref_y)
plot.data$y <- exp(plot.data$y)
plot.data.1$y <- exp(plot.data.1$y)
plot.data.1$y_lci <- exp(plot.data.1$y_lci)
plot.data.1$y_uci <- exp(plot.data.1$y_uci)
plot.data.2$y <- exp(plot.data.2$y)
figure <- ggplot(data = plot.data, mapping = aes(x)) +
geom_hline(aes(yintercept = 1), colour = "grey") +
geom_line(aes(x = x, y = y), colour = "black") +
geom_errorbar(
mapping = aes(x = x, ymin = y_lci, ymax = y_uci),
data = plot.data.1,
color = "grey", width = 0.025
) +
geom_point(
mapping = aes(x = x, y = y), data = plot.data.1,
colour = "black", size = 3
) +
geom_point(
mapping = aes(x = x, y = y), data = plot.data.2,
colour = "red", size = 3
) +
theme_bw() +
labs(
x = pref_x,
y = bquote(
.(pref_y) ~
" [" ~ .(pref_x_ref)["ref"] ~ "=" ~ .(round(ref, 2)) ~ "]"
)
) +
theme(
axis.title = element_text(vjust = 0.5, size = 16),
axis.text = element_text(size = 14),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
if (!is.null(breaks)) {
figure <- figure +
scale_y_continuous(trans = "log", breaks = breaks)
} else {
ybreaks <- ggplot_build(figure)$layout$panel_params[[1]]$y$breaks
figure <- figure +
scale_y_continuous(trans = "log", breaks = ybreaks)
}
}
# Return
return(figure)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.