system_metrics <- function(dat)
{
# Number of SNPs
# Sample size outcome
# Sample size exposure
metrics <- list()
metrics$nsnp <- nrow(dat)
metrics$nout <- mean(dat$samplesize.outcome, na.rm=TRUE)
metrics$nexp <- mean(dat$samplesize.exposure, na.rm=TRUE)
# F stats
# Fstat <- qf(dat$pval.exposure, 1, dat$samplesize.exposure, lower.tail=FALSE)
Fstat <- dat$beta.exposure^2 / dat$se.exposure^2
Fstat[is.infinite(Fstat)] <- 300
metrics$meanF <- mean(Fstat, na.rm=TRUE)
metrics$varF <- stats::var(Fstat, na.rm=TRUE)
metrics$medianF <- stats::median(Fstat, na.rm=TRUE)
# IF more than 1 SNP
if(nrow(dat) > 1)
{
# Egger-Isq
metrics$egger_isq <- Isq(abs(dat$beta.exposure), dat$se.exposure)
}
if(nrow(dat) > 2)
{
sct <- mr_sign(dat$beta.exposure, dat$beta.outcome, dat$se.exposure, dat$se.outcome)
metrics$sct <- -log10(sct$pval) * sign(sct$b)
# IF more than 2 SNP
ruck <- mr_rucker(dat)[[1]]
# Q_ivw
# Q_egger
# Q_diff
metrics$Isq <- (ruck$Q$Q[1] - (ruck$Q$df[1]-1))/ruck$Q$Q[1]
metrics$Isqe <- (ruck$Q$Q[2] - (ruck$Q$df[2]-1))/ruck$Q$Q[2]
metrics$Qdiff <- ruck$Q$Q[3]
# Intercept / se
metrics$intercept <- abs(ruck$intercept$Estimate[1]) / ruck$intercept$SE[1]
# Influential outliers
dfbeta_thresh <- 2 * nrow(dat)^-0.5
cooksthresh1 <- 4 / (nrow(dat) - 2)
cooksthresh2 <- 4 / (nrow(dat) - 3)
inf1 <- stats::influence.measures(ruck$lmod_ivw)$infmat
inf2 <- stats::influence.measures(ruck$lmod_egger)$infmat
metrics$dfb1_ivw <- sum(inf1[,1] > dfbeta_thresh) / nrow(dat)
metrics$dfb2_ivw <- sum(inf1[,2] > dfbeta_thresh) / nrow(dat)
metrics$dfb3_ivw <- sum(inf1[,3] > dfbeta_thresh) / nrow(dat)
metrics$cooks_ivw <- sum(inf1[,4] > cooksthresh1) / nrow(dat)
metrics$dfb1_egger <- sum(inf2[,1] > dfbeta_thresh) / nrow(dat)
metrics$dfb2_egger <- sum(inf2[,2] > dfbeta_thresh) / nrow(dat)
metrics$dfb3_egger <- sum(inf2[,3] > dfbeta_thresh) / nrow(dat)
metrics$cooks_egger <- sum(inf2[,4] > cooksthresh2) / nrow(dat)
# Homoscedasticity
if (!requireNamespace("car", quietly = TRUE)) {
stop(
"Package \"car\" must be installed to use this function.",
call. = FALSE
)
}
metrics$homosc_ivw <- car::ncvTest(ruck$lmod_ivw)$ChiSquare
metrics$homosc_egg <- car::ncvTest(ruck$lmod_egger)$ChiSquare
# Normality of residuals
metrics$shap_ivw <- stats::shapiro.test(stats::residuals(ruck$lmod_ivw))$statistic
metrics$shap_egger <- stats::shapiro.test(stats::residuals(ruck$lmod_egger))$statistic
metrics$ks_ivw <- stats::ks.test(stats::residuals(ruck$lmod_ivw), "pnorm")$statistic
metrics$ks_egger <- stats::ks.test(stats::residuals(ruck$lmod_egger), "pnorm")$statistic
}
return(metrics)
}
get_rsq <- function(dat)
{
stopifnot(length(unique(dat$exposure)) == 1)
stopifnot(length(unique(dat$outcome)) == 1)
stopifnot(length(unique(dat$units.exposure)) == 1)
stopifnot(length(unique(dat$units.outcome)) == 1)
dat$pval.exposure[dat$pval.exposure < 1e-300] <- 1e-300
if(dat$units.exposure[1] == "log odds")
{
ind1 <- !is.na(dat$beta.exposure) &
!is.na(dat$eaf.exposure) &
!is.na(dat$ncase.exposure) &
!is.na(dat$ncontrol.exposure)
dat$rsq.exposure <- NA
if(sum(ind1) > 0)
{
dat$rsq.exposure[ind1] <- get_r_from_lor(
dat$beta.exposure[ind1],
dat$eaf.exposure[ind1],
dat$ncase.exposure[ind1],
dat$ncontrol.exposure[ind1],
0.1
)^2
}
} else {
ind1 <- !is.na(dat$pval.exposure) & !is.na(dat$samplesize.exposure)
dat$rsq.exposure <- NA
if(sum(ind1) > 0)
{
dat$rsq.exposure[ind1] <- get_r_from_pn(
dat$pval.exposure[ind1],
dat$samplesize.exposure[ind1]
)
}
}
dat$pval.outcome[dat$pval.outcome < 1e-300] <- 1e-300
if(dat$units.outcome[1] == "log odds")
{
ind1 <- !is.na(dat$beta.outcome) &
!is.na(dat$eaf.outcome) &
!is.na(dat$ncase.outcome) &
!is.na(dat$ncontrol.outcome)
dat$rsq.outcome <- NA
if(sum(ind1) > 0)
{
dat$rsq.outcome[ind1] <- get_r_from_lor(
dat$beta.outcome[ind1],
dat$eaf.outcome[ind1],
dat$ncase.outcome[ind1],
dat$ncontrol.outcome[ind1],
0.1
)^2
}
} else {
ind1 <- !is.na(dat$pval.outcome) & !is.na(dat$samplesize.outcome)
dat$rsq.outcome <- NA
if(sum(ind1) > 0)
{
dat$rsq.outcome[ind1] <- get_r_from_pn(
dat$pval.outcome[ind1],
dat$samplesize.outcome[ind1]
)
}
}
return(dat)
}
#' Mixture of experts
#'
#' Based on the method described here \url{https://www.biorxiv.org/content/10.1101/173682v2}.
#' Once all MR methods have been applied to a summary set, you can then use the mixture of experts to predict the method most likely to be the most accurate.
#'
#' @param res Output from [mr_wrapper()].
#' @param rf The trained random forest for the methods. This is available to download at <https://www.dropbox.com/s/5la7y38od95swcf/rf.rdata?dl=0>.
#'
#' @details
#' The `mr_moe()` function modifies the `estimates` item in the list of results from the [mr_wrapper()] function. It does three things:
#' 1. Adds the MOE column, which is a predictor for each method for how well it performs in terms of high power and low type 1 error (scaled 0-1, where 1 is best performance).
#' 2. It renames the methods to be the estimating method + the instrument selection method. There are 4 instrument selection methods: Tophits (i.e. no filtering), directional filtering (DF, an unthresholded version of Steiger filtering), heterogeneity filtering (HF, removing instruments that make substantial (p < 0.05) contributions to Cochran's Q statistic), and DF + HF which is where DF is applied and the HF applied on top of that.
#' 3. It orders the table to be in order of best performing method.
#'
#' Note that the mixture of experts has only been trained on datasets with at least 5 SNPs. If your dataset has fewer than 5 SNPs this function might return errors.
#'
#' @export
#' @return List
#' @examples
#' \dontrun{
#' # Example of body mass index on coronary heart disease
#' # Extract and harmonise data
#' a <- extract_instruments("ieu-a-2")
#' b <- extract_outcome_data(a$SNP, 7)
#' dat <- harmonise_data(a, b)
#'
#' # Apply all MR methods
#' r <- mr_wrapper(dat)
#'
#' # Load the rf object containing the trained models
#' load("rf.rdata")
#' # Update the results with mixture of experts
#' r <- mr_moe(r, rf)
#'
#' # Now you can view the estimates, and see that they have
#' # been sorted in order from most likely to least likely to
#' # be accurate, based on MOE prediction
#' r[[1]]$estimates
#'}
mr_moe <- function(res, rf)
{
if (!requireNamespace("randomForest", quietly = TRUE)) {
stop(
"Package \"randomForest\" must be installed to use this function.",
call. = FALSE
)
}
lapply(res, function(x)
{
o <- try(mr_moe_single(x, rf))
if(inherits(o, "try-error"))
{
return(x)
} else {
return(o)
}
})
}
mr_moe_single <- function(res, rf)
{
if (!requireNamespace("randomForest", quietly = TRUE)) {
stop(
"Package \"randomForest\" must be installed to use this function.",
call. = FALSE
)
}
metric <- res$info[1,] %>% dplyr::select(-c(id.exposure, id.outcome, steiger_filtered, outlier_filtered, nsnp_removed))
methodlist <- names(rf)
pred <- lapply(methodlist, function(m)
{
d <- dplyr::tibble(
method = m,
MOE = predict(rf[[m]], metric, type="prob")[,2]
)
return(d)
}) %>%
bind_rows %>%
arrange(desc(MOE))
if("MOE" %in% names(res$estimates))
{
message("Overwriting previous MOE estimate")
res$estimates <- subset(res$estimates, select=-c(MOE, method2))
}
res$estimates$selection <- "DF + HF"
res$estimates$selection[!res$estimates$outlier_filtered & res$estimates$steiger_filtered] <- "DF"
res$estimates$selection[res$estimates$outlier_filtered & !res$estimates$steiger_filtered] <- "HF"
res$estimates$selection[!res$estimates$outlier_filtered & !res$estimates$steiger_filtered] <- "Tophits"
res$estimates$method2 <- paste(res$estimates$method, "-", res$estimates$selection)
res$estimates <- dplyr::left_join(res$estimates, pred, by=c("method2"="method")) %>% dplyr::arrange(dplyr::desc(MOE))
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.