#' A function to perform a one sample MR
#'
#' This function performs a one sample MR, including the observational (oe: outcome on exposure) analysis, the exposure on instrument (ei) analysis, and the Mendelian Randomization (MR) or instrumental variable analysis.
#' @param wdata a data frame of data with appropriate column names
#' @param outcome a string defining the outcome (column) name
#' @param exposure a string defining the exposure (column) name
#' @param instrument a string defining the instrument (column) name
#' @param covariates a vector of covariate (column) names
#' @param outcome_model_family gaussian or binomial- should the outcome be modeled as a gaussian or binomial
#' @param exposure_model_family gaussian or binomial - should the exposure be modeled as a gaussian or binomial
#' @param weights if needed, a string defining the weights (column) name
#' @param rnt_outcome TRUE or FALSE should the outcome trait be rank normal transformed
#' @keywords MR, one-sample MR, Wu Hausman test
#' @importFrom car Anova
#' @importFrom lmtest bptest waldtest
#' @return a vector of summary statistics
#' @export
#' @examples
#' osmr()
osmr = function( wdata,
outcome,
exposure,
instrument,
covariates = NA,
outcome_model_family = "gaussian",
exposure_model_family = "gaussian",
weights = NA,
rnt_outcome = FALSE){
### Define the model data
model_variables = na.omit( c(outcome, exposure, instrument, covariates, weights) )
mod_data = na.omit( wdata[, c(model_variables)] )
############################################
## I. rank normalize the dependent|outcome ?
############################################
if(outcome_model_family == "gaussian" & rnt_outcome == TRUE){
set.seed(2022)
mod_data[, outcome] = rntransform( mod_data[, outcome] )
}
################################################
## II. IF OUTCOME binomial make sure outcome is a factor
################################################
if(outcome_model_family == "binomial"){
mod_data[, outcome] = as.factor(mod_data[, outcome])
}
########################
### III. Build STAGE ONE
## model
########################
if( length(na.omit(covariates))>0 ){
S1_form = formula(paste0( exposure, " ~ ", paste0( covariates, collapse = " + ") , "+", instrument ) )
} else {
S1_form = formula( paste0( exposure, " ~ ", instrument ) )
}
#########################
## IV. RUN STAGE ONE
#########################
## STAGE ONE
if( is.na( weights) ){
fitX.LZ <- glm(formula = S1_form , data = mod_data, family = exposure_model_family)
} else {
w = grep( weights, colnames(mod_data) ); colnames(mod_data)[w] = "study_weights"
fitX.LZ <- glm(formula = S1_form , data = mod_data, weights = study_weights, family = exposure_model_family)
}
#########################
## IVa.
## Extract Exposure on Instrument (ei)
## Summary Statistics
#########################
res = residuals(fitX.LZ)
ei_n = length(res)
names(ei_n) = "ei_n"
ei_summary = summary(fitX.LZ)
ei_coef = ei_summary$coefficients[instrument, ]
ei_stat_n = paste0("ei_", gsub(" ","",names(ei_coef)[3] ) )
names( ei_coef ) = c("ei_beta","ei_se",ei_stat_n,"ei_P")
## R2
a = car::Anova(fitX.LZ, type = "II", test = "F")
ei_Fstat = a[instrument,3]
names(ei_Fstat) = "ei_Fstat"
ss = a[,1]
names(ss) = rownames(a)
ei_eta_sq = ss[instrument] / sum(ss)
names(ei_eta_sq) = "ei_eta_sq"
ei_stats = c(ei_n, ei_coef, ei_Fstat, ei_eta_sq)
########################
### V Build STAGE TWO
## model
## (!! OLS !!)
########################
if( length(na.omit(covariates))>0 ){
S2_form = formula(paste0( outcome, " ~ ", paste0( covariates, collapse = " + ") ,"+" , exposure ) )
} else {
S2_form = formula( paste0( outcome, " ~ ", exposure ) )
}
#########################
## VI. RUN STAGE TWO
#########################
## STAGE TWO
if( is.na( weights) ){
fitY.LX <- glm(formula = S2_form , data = mod_data, family = outcome_model_family)
} else {
w = grep( weights, colnames(mod_data) ); colnames(mod_data)[w] = "study_weights"
fitY.LX <- glm(formula = S2_form , data = mod_data, weights = study_weights, family = outcome_model_family)
}
#########################
## VIa.
## ---
## Extract Observational
## Summary Statistics
## ---
## regress outcome on exposure (oe)
#########################
## Sample Size
res = residuals(fitY.LX)
oe_n = length(res)
names(oe_n) = "oe_n"
## normality of residuals
if(outcome_model_family == "gaussian"){
oe_Wstat = normW(res)
names(oe_Wstat) = "oe_W"
}
if(outcome_model_family == "binomial"){
oe_Wstat = NA
names(oe_Wstat) = "oe_W"
}
## Breusch-Pagan test of homoskedasticity
oe_Breusch_Pagan_P = lmtest::bptest(fitY.LX)$p.value
names(oe_Breusch_Pagan_P) = "oe_Breusch_Pagan_P"
## Model summary Stats
oe_summary = summary(fitY.LX)
## Model coefficients
oe_coef = oe_summary$coefficients[exposure, ]
oe_stat_n = paste0("oe_", gsub(" ","",names(oe_coef)[3] ) )
names( oe_coef ) = c("oe_beta","oe_se",oe_stat_n,"oe_P")
## Type II ANOVA
a = car::Anova(fitY.LX, type = "II", test = "F")
## Exposure Fstat
oe_Fstat = a[exposure,3]
names(oe_Fstat) = "oe_Fstat"
## WALD TEST
# car::linearHypothesis(ivmod, paste0(exposure, "=0"), test = "F" )
oe_Wald_F_test = lmtest::waldtest(fitY.LX, exposure, test = "F" )
oe_Wald_F_test = c(oe_Wald_F_test$F[2], oe_Wald_F_test$`Pr(>F)`[2])
names(oe_Wald_F_test) = c("oe_Wald_F", "oe_Wald_P")
## Variance Explained (eta-sq or R2)
ss = a[,1]
names(ss) = rownames(a)
## Model Eta-sq
w = which(names(ss) == "Residuals")
oe_model_eta_sq = sum( ss[-w] ) / sum( ss )
names(oe_model_eta_sq) = "oe_model_eta_sq"
## Exposure Eta-sq
oe_exposure_eta_sq = ss[exposure] / sum(ss)
names(oe_exposure_eta_sq) = "oe_exposure_eta_sq"
oe_stats = c(oe_n, oe_coef, oe_Fstat, oe_Wald_F_test, oe_model_eta_sq, oe_exposure_eta_sq, oe_Breusch_Pagan_P)
#########################
## VII. RUN MR model
#########################
fitIV_ts <- ivtools::ivglm( estmethod = "ts", fitX.LZ = fitX.LZ, fitY.LX = fitY.LX, data = mod_data, ctrl = FALSE )
######################
### VIII. Summary Stats
######################
## IV model summary
s = summary(fitIV_ts)
## model coefficients
coef = s$coefficients
## Report beta, se, t-value, and P-value
MR_coef = coef[exposure,]
cn = gsub(" ","", names(MR_coef)[3] )
names(MR_coef) = c("beta","se", cn ,"P")
names(MR_coef) = paste0("MR_", names(MR_coef) )
## The re-fitted IV model (Y.LX)
ivmod = fitIV_ts$fitY.LX
## sample size in model
res = residuals( ivmod )
iv_n = length(res); names(iv_n) = "MR_n"
## normality of residuals
if(outcome_model_family == "gaussian"){
Wstat = normW(res)
names(Wstat) = "MR_res_Wstat"
} else {
Wstat = NA
names(Wstat) = "MR_res_Wstat"
}
## Breusch-Pagan test of homoskedasticity
Breusch_Pagan_P = lmtest::bptest(ivmod)$p.value
names(Breusch_Pagan_P) = "MR_Breusch_Pagan_P"
## TYPE II ANOVA
a = car::Anova(fitIV_ts$fitY.LX, type = "II", test.statistic = "F")
## Exposure Fstat
MR_Fstat = a[exposure,3]
names(MR_Fstat) = "MR_Fstat"
## WALD TEST
# car::linearHypothesis(ivmod, paste0(exposure, "=0"), test = "F" )
MR_Wald_F_test = lmtest::waldtest(ivmod, exposure, test = "F" )
MR_Wald_F_test = c(MR_Wald_F_test$F[2], MR_Wald_F_test$`Pr(>F)`[2])
names(MR_Wald_F_test) = c("MR_Wald_F", "MR_Wald_P")
## Variance Explained (eta-sq or R2)
sumsq = a[,1]; names(sumsq) = rownames(a)
## Model Eta-sq
w = which(names(sumsq) == "Residuals")
model_r2 = sum( sumsq[-w] ) / sum( sumsq )
names(model_r2) = "MR_model_Rsq"
## Genotype Predicted Exposure eta-sq
dhat_r2 = sumsq[ exposure ] / sum( sumsq )
names(dhat_r2) = "MR_dhat_r2"
MR_stats = c(iv_n, Wstat, MR_coef, model_r2, dhat_r2, MR_Fstat, MR_Wald_F_test, Breusch_Pagan_P )
############################
## IX. Wu-Hausman Endogeneity
## Test
############################
## Observational (OLS) residuals (outcome on exposure and covariates)
mod_data$ols_res = residuals( fitY.LX )
## Stage 1 residuals (exposure on instrument + covariates)
mod_data$ei_res = residuals( fitX.LZ )
## Define the linear regression formula
if( length(na.omit(covariates))>0 ){
WHE_form = formula(paste0( "ols_res ~ ", paste0( covariates, collapse = " + ") ,"+" , exposure, " + ei_res" ) )
} else {
WHE_form = formula( paste0( "ols_res ~ ", exposure , " + ei_res") )
}
## Wu Hausman linear model of residuals
if( is.na( weights) ){
fit.WHE <- glm(formula = WHE_form , data = mod_data, family = "gaussian" )
} else {
w = grep( weights, colnames(mod_data) ); colnames(mod_data)[w] = "study_weights"
fit.WHE <- glm(formula = WHE_form , data = mod_data, weights = study_weights, family = "gaussian" )
}
## Lagrange Multiplier Statistic:
s = summary(fit.WHE)
## R^2 from Deviances
r2 = 1 - ( s$deviance / s$null.deviance )
## *** Lagrange Multiplier Statistic ***
## ----- Wu_Hausman_stat ------
LM = length( residuals(fit.WHE) ) * r2
names(LM) = "Wu_Hausman_stat"
## P-value
WuH_P = pchisq(q = LM, df = 1, lower.tail = F)
names(WuH_P) = "Wu_Hausman_P"
## REDEFINE MR_stats output
MR_stats = c(MR_stats, LM, WuH_P)
############################
## X. Data OUT
############################
names(exposure) = "MR_exposure"
names(outcome) = "MR_outcome"
names(instrument) = "MR_instrument"
###
out = c(outcome, exposure, instrument,
ei_stats,
oe_stats,
MR_stats )
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.