#' GAM and linear-strata Mendelian randomization
#'
#' This function derives strata defined observational and TSLS (MR) estimates using linear modeling.
#' The full data set will be fit to a linear model and a generalized linear model (GAM).
#' Then subsequently the exposure data will be stratified and a linear model will be used to derive
#' effect estimates for each strata.
#'
#' @keywords GAM strata stratified MR
#' @param wdata a data frame passed to function containing necessary data for analysis
#' @param outcome a single string character of the column name for the outcome or dependent or response variable
#' @param exposure a single string character of the column name for the exposure or independent or explanatory variable
#' @param instrument a data frame passed to function containing necessary data for analysis
#' @param linear_covariates a vector of string(s) that are also column names used to define variables that will be set as parametric (linear) covariates.
#' @param smooth_covariates a vector of string(s) that are also column names used to define variables that will be set as non-linear (smooth, s()) covariates.
#' @param strata a single integer or vector to define how strata should be defined.
#' * `strata = 4`: will define quartiles or 4 evenly sized bins
#' * `strata = 10`: will define deciles or 10 evenly sized bins
#' * `strata = c(1,10,20,30)`: a user defined numeric vector to define boundaries for each strata.
#' - The numeric vector example provided will define 4 strata. Lower bound values are inclusive, upper bounds are exclusive, to the exception of the last bounding value.
#' @param rnt_outcome binary TRUE or FALSE if the dependent or response variable should be rank normal transformed.
#' @param weights_variable a single string character of the column name for a weights variable
#' @param outlier_cutoff a single numeric value to define a cutoff value for how many iqr or sd units outlier values
#' @param outlier_method a single string character of "iqr" or "sd" to determine if outlier should be determined by means and sd or medians and iqr.
#' @param messages should a progress message be printed to screen - binary TRUE or FALSE
#' @param return_models should the model data data frame and each gam and linear model be returned (TRUE or FALSE). Default is TRUE, as they are needed for the plot function.
#' @return returns a glsmr object containing the complete linear and GAM models for the full data set, summary statistics for the data, a strata observational table, and a strata TSLS (MR) table.
#' @importFrom stats na.omit anova quantile sd formula lm fitted.values quantile
#' @export
#' @examples
#' glsmr()
glsmr = function( wdata,
outcome = NA,
exposure = NA,
instrument = NA,
linear_covariates = NA,
smooth_covariates = NA,
strata = 4,
rnt_outcome = FALSE,
weights_variable = NA,
outlier_method = "iqr",
outlier_cutoff = 5,
messages = FALSE,
return_models = TRUE){
############################################
### 0. look for any errors in parameters
############################################
# if (!strata[1] %in% c("quartiles","deciles") & length(strata) < 3 & class(strata) != "numeric" ){
# stop("strata parameter can either be defined as (1) quartiles, (2) deciles, or (3) a numeric vector of at least length 3 defining strata boundries")
# }
## PART I: SETTING UP THE DATA
if(messages == TRUE){ message("Part I. Setting up the data.") }
############################################
### PART I: 1. Define Model Data
############################################
if(messages == TRUE){ message("Part I.1. Defining model data frame.") }
model_variables = na.omit( c(outcome, exposure, instrument, linear_covariates, smooth_covariates, weights_variable) )
# mod_data = na.omit( wdata[, c(model_variables)] )
mod_data = wdata[, c(model_variables)]
############################################
## PART I: 2. define covariates
############################################
if(messages == TRUE){ message("Part I.2. Defining covariates.") }
covariates = na.omit(c(linear_covariates, smooth_covariates))
if(length(covariates) == 0){covariates = NULL}
############################################
## PART I: 3. Identify outcome outliers
############################################
if(messages == TRUE){ message("Part I.3. Identifying outcome outliers") }
outliers = id_outliers( y = mod_data[, outcome], outlier_method = outlier_method, outlier_cutoff = outlier_cutoff)
# How many outcome outliers
number_of_outcome_outliers = length(outliers); names(number_of_outcome_outliers) = "number_of_outcome_outliers"
## Turn outliers to NA
if(length(outliers) > 0){
mod_data[outliers, outcome] = NA
}
############################################
## PART I: 4. Identify exposure outliers
############################################
if(messages == TRUE){ message("Part I.4. Identifying exposure outliers") }
outliers = id_outliers( y = mod_data[, exposure], outlier_method = outlier_method, outlier_cutoff = outlier_cutoff)
# How many exposure outliers
number_of_exposure_outliers = length(outliers); names(number_of_exposure_outliers) = "number_of_exposure_outliers"
## Turn outliers to NA
if(length(outliers) > 0){
mod_data[outliers, exposure] = NA
}
############################################
## PART I: 5. normality of outcome
############################################
if(messages == TRUE){ message("Part I.5. Estimate Shapiro-Wilk normality W-stat for outcome") }
W_outcome = normW(mod_data[, outcome]); names(W_outcome) = "W_outcome"
############################################
## PART I: 6. normality of exposure
############################################
if(messages == TRUE){ message("Part I.6. Estimate Shapiro-Wilk normality W-stat for exposure") }
W_exposure = normW(mod_data[, exposure]); names(W_exposure) = "W_exposure"
############################################
## PART I: 7. rank normalize the outcome ?
############################################
if(rnt_outcome == TRUE){
if(messages == TRUE){ message("Part I.7. Performing rank normal transformation of outcome") }
mod_data[, outcome] = rntransform( mod_data[, outcome] )
} else {
if(messages == TRUE){ message("Part I.7. No rank normal transformation of outcome performed") }
}
############################################
## PART I: 8. Derive IV-Free Exposure
############################################
if(messages == TRUE){ message("Part I.8. derive instument free exposure") }
mod_data = iv_free_exposure( wdata = mod_data,
exposure = exposure,
instrument = instrument,
covariates = NA,
exposure_mean_normalize = TRUE)
####################################
## PART I: 9. Derive Instrument predicted exposure or d.hat
####################################
if(messages == TRUE){ message("Part I.9. derive instument predicted exposure") }
mod_data = iv_predicted_exposure( wdata = mod_data,
exposure = exposure,
instrument = instrument,
covariates = covariates)
############################################
### PART I: 10. null GAM model
### with NO exposure
############################################
if(messages == TRUE){ message("Part I.10. running full observational NULL GAM model with NO Exposure to extract outcome residuals modeled with covariate smooths") }
res_gam_mod0 = gamfit( wdata = mod_data,
dependent = outcome,
independent = NA,
linear_covariates = c(linear_covariates),
smooth_covariates = smooth_covariates,
exposure_trait = NA)
## place residuals into mod_data
m = match(rownames(mod_data), rownames(res_gam_mod0$fit$model))
mod_data$GAM_outcome_residuals = residuals(res_gam_mod0$fit)[m]
############################################
## PART I: 10. Summary Statistics for exposure
############################################
if(messages == TRUE){ message("Part I.10. summary statistic for exposure") }
temp = na.omit( mod_data[, c(outcome, exposure, covariates)] )
exp_ss = c( N = nrow(temp) ,
mean = mean(temp[, exposure], na.rm = TRUE),
min = min(temp[, exposure], na.rm = TRUE),
max = max(temp[, exposure], na.rm = TRUE),
sd = sd(temp[, exposure], na.rm = TRUE) )
rm(temp)
### Estimate again with filtering on Instrument NAs too
temp = na.omit( mod_data[, c(outcome, exposure, covariates, instrument)] )
iv_exp_ss = c( N = nrow(temp) ,
mean = mean(temp[, exposure], na.rm = TRUE),
min = min(temp[, exposure], na.rm = TRUE),
max = max(temp[, exposure], na.rm = TRUE),
sd = sd(temp[, exposure], na.rm = TRUE) )
rm(temp)
############################################
## PART I: 11. Stratify data by Exposure
############################################
if(messages == TRUE){ message("Part I.11. stratifying the data by the (A) exposure and (B) iv free exposure") }
strata_data = stratify_data( wdata = mod_data, stratify_on = exposure, strata = strata )
iv_free_strata_data = stratify_data( wdata = mod_data, stratify_on = "iv_free_exposure", strata = strata )
############################################
## PART I: 12. Summary Statistics for exposure
## by strata
############################################
if(messages == TRUE){ message("Part I.12. stratified summary statistic for exposure") }
strata_exp_ss = stratify_sumstats(wdata = strata_data, exposure = exposure, model_vars = c(outcome, exposure, covariates) )
iv_strata_exp_ss = stratify_sumstats(wdata = iv_free_strata_data, exposure = exposure, model_vars = c(outcome, exposure, covariates, instrument) )
## PART II: OBSERVATIONAL MODELING
if(messages == TRUE){ message("Part II. Observational modeling") }
############################################
### PART II: 1. null GAM model
### with no exposure smooth
############################################
if(messages == TRUE){ message("Part II.1. running full observational NULL GAM model") }
gam_mod0 = gamfit( wdata = mod_data,
dependent = outcome,
independent = NA,
linear_covariates = c(linear_covariates, exposure),
smooth_covariates = smooth_covariates,
exposure_trait = exposure)
############################################
### PART II: 2. full GAM model
############################################
if(messages == TRUE){ message("Part II.2. running full observational GAM model") }
gam_mod = gamfit( wdata = mod_data,
dependent = outcome,
independent = exposure,
linear_covariates = linear_covariates,
smooth_covariates = smooth_covariates,
exposure_trait = exposure)
############################################
## PART II: 3. Testing for non-linearity
## is the GAM with an exposure smooth
## better than a model without an exposure smooth ?
############################################
if(messages == TRUE){ message("Part II.3. testing non-linearity of observational data: GAM vs NULL GAM") }
## Ftest
a = anova(gam_mod0$fit, gam_mod$fit, test = "F")
obs_nonlinearity_Ftest = a[2,3:6]; names(obs_nonlinearity_Ftest) = paste0( "", c("obs_nonlinear_Ftest_df","obs_nonlinear_Ftest_deviance","obs_nonlinear_Ftest_F","obs_nonlinear_Ftest_P"))
### LRT test
obs_nonlinearity_lrt = lmtest::lrtest( gam_mod0$fit, gam_mod$fit )
obs_nonlinearity_lrt = unlist( obs_nonlinearity_lrt[2, 3:5] ); names(obs_nonlinearity_lrt) = c("obs_nonlinear_LRT_df","obs_nonlinear_LRT_ChisqStat","obs_nonlinear_LRT_P")
### Model Fit Stats
mod_stats = c(gam_mod0$summary[c("REML","AIC","loglik")], gam_mod$summary[c("REML","AIC","loglik")])
names(mod_stats) = paste0( c( rep("gam_mod0_", 3), rep("gam_mod_", 3) ), names(mod_stats) )
obs_nonlinearity_lrt = c( mod_stats, obs_nonlinearity_lrt )
### Pull the stats together
obs_nonlinearity_test = unlist( c(obs_nonlinearity_Ftest, obs_nonlinearity_lrt) )
############################################
### PART II: 4. Linear model
############################################
if(messages == TRUE){ message("Part II.4. running full observational linear model") }
lm_mod = lmfit( wdata = mod_data,
dependent = outcome,
independent = exposure,
covariates = covariates)
## Combine summary stats of full for data observational data
# obs_ss = c(lm_mod$summary, exp_ss)
############################################
## PART II: 5. Run observational linear model on each
## strata
############################################
if(messages == TRUE){ message("Part II.5. running observational linear models on each strata") }
strata_lm_mod = stratify_lmfit( wdata = strata_data,
outcome = outcome,
exposure = exposure,
covariates = covariates)
res_strata_lm_mod = stratify_lmfit( wdata = strata_data,
outcome = "GAM_outcome_residuals",
exposure = exposure,
covariates = NA)
## Combine strata sumstats and linear model estimates
# strata_obs_ss = cbind(strata_lm_mod, strata_exp_ss)
############################################
## PART II: 6. Observational Meta
## Analysis
############################################
if(messages == TRUE){ message("Part II.6. run meta analysis on observational strata betas and means") }
obs_meta_test = meta_test( strata_lm_mod, modulator_col_name = "exposure_mean" )
res_obs_meta_test = meta_test( res_strata_lm_mod, modulator_col_name = "exposure_mean" )
############################################
## PART II: 7. Combine strata summary stats
## & full model summary stats
############################################
obs_ss = rbind( strata_lm_mod , fulldata = lm_mod$summary )
# obs_ss = obs_ss[, -c(4:6,9) ]
## observational results for the residualized outcome.
res_obs_ss = rbind( res_strata_lm_mod , fulldata = lm_mod$summary )
## PART III: MR MODELING
if(messages == TRUE){ message("Part III. MR modeling") }
####################################
### PART III: 1. Null IV GAM model
### with NO exposure smooth
####################################
if(messages == TRUE){ message("Part III.1. running full TSLS NULL GAM model") }
iv_gam_mod0 = gamfit( wdata = mod_data,
dependent = outcome,
independent = NA,
linear_covariates = c(linear_covariates, "iv_predicted_exposure"),
smooth_covariates = smooth_covariates)
####################################
### PART III: 4. Full IV GAM model
### with exposure smooth
####################################
if(messages == TRUE){ message("Part III.2. running full TSLS GAM model") }
iv_gam_mod = gamfit( wdata = mod_data,
dependent = outcome,
independent = "iv_predicted_exposure",
linear_covariates = linear_covariates,
smooth_covariates = smooth_covariates)
####################################
## PART III: 3. ANOVA F-test of
## GAM_0 vs GAM
####################################
if(messages == TRUE){ message("Part III.3. testing non-linearity of TSLS model: GAM vs NULL GAM") }
## Ftest
a = anova(iv_gam_mod0$fit, iv_gam_mod$fit, test = "F")
iv_nonlinearity_Ftest = a[2,3:6]; names(iv_nonlinearity_Ftest) = paste0( "", c("iv_nonlinear_Ftest_df","iv_nonlinear_Ftest_deviance","iv_nonlinear_Ftest_F","iv_nonlinear_Ftest_P"))
### LRT
iv_nonlinearity_lrt = lmtest::lrtest( iv_gam_mod0$fit, iv_gam_mod$fit )
iv_nonlinearity_lrt = unlist( iv_nonlinearity_lrt[2, 3:5] ); names(iv_nonlinearity_lrt) = c("iv_nonlinear_LRT_df","iv_nonlinear_LRT_ChisqStat","iv_nonlinear_LRT_P")
## model fit sum stats
mod_stats = c(iv_gam_mod0$summary[c("REML","AIC","loglik")], iv_gam_mod$summary[c("REML","AIC","loglik")])
names(mod_stats) = paste0( c( rep("iv_gam_mod0_", 3), rep("iv_gam_mod_", 3) ), names(mod_stats) )
iv_nonlinearity_lrt = c( mod_stats, iv_nonlinearity_lrt )
### Pull stats together
iv_nonlinearity_test = unlist( c(iv_nonlinearity_Ftest, iv_nonlinearity_lrt) )
##########################
## Combine Obs and IV
## non-linearity test stats
## Data Out
##########################
nonlinearity_test = rbind(obs = obs_nonlinearity_test, tsls = iv_nonlinearity_test)
####################################
### PART III: 4. Estimate instrument on exposure
### FULL DATA Beta_ie
####################################
if(messages == TRUE){ message("Part III.4. estimating effect of instrument on exposure (beta_ie) for full data set") }
beta_ie = iv_estimates( wdata = mod_data,
dependent = exposure,
instrument = instrument,
covariates = covariates)
####################################
### PART III: 5. Estimate instrument on outcome
### FULL DATA Beta_io
####################################
if(messages == TRUE){ message("Part III.5. estimating effect of instrument on outcome (beta_io) for full data set") }
beta_io = iv_estimates( wdata = mod_data,
dependent = outcome,
instrument = instrument,
covariates = covariates)
####################################
### PART III: 6. Estimate causal estimate
### FULL DATA Beta_iv
####################################
if(messages == TRUE){ message("Part III.6. estimating causal effect estimate (beta_iv)") }
ratio_mr_ss = as.data.frame( rbind( beta_ie = beta_ie,
beta_io = beta_io,
beta_iv = rep(NA, length(beta_io) ) ) )
ratio_mr_ss$beta[3] = ratio_mr_ss$beta[2] / ratio_mr_ss$beta[1]
ratio_mr_ss$se[3] = ratio_mr_ss$se[2] / ratio_mr_ss$beta[1]
ratio_mr_ss$P[3] = ratio_mr_ss$P[2]
ratio_mr_ss$n[3] = ratio_mr_ss$n[2]
# ratio_mr_ss = cbind(ratio_mr_ss, rbind(iv_exp_ss, iv_exp_ss, iv_exp_ss))
####################################
### PART III: 7. ivreg()
### FULL DATA Beta_iv
####################################
if(messages == TRUE){ message("Part III.7. running a full TSLS model with ivreg") }
iv_fit = ivregfit( wdata = mod_data,
outcome = outcome,
exposure = exposure,
instrument = instrument,
covariates = covariates,
weights_variable = NA,
rnt_outcome = FALSE)
### Combine FULL DATA TSLS | MR summary statistics
cn = c("exposure_n", "exposure_mean", "exposure_min", "exposure_max", "exposure_sd",
"outcome_n", "outcome_mean", "outcome_min", "outcome_max", "outcome_sd")
ratio_mr_ss[3, cn] = iv_fit$summary[cn]
###
ivreg_ss = iv_fit$summary
m = match( names(ratio_mr_ss), names(ivreg_ss) )
tsls_ss = rbind( ratio_mr_ss, ivreg = ivreg_ss[m] )
####################################
### PART III: 8. Estimate instrument on exposure
### beta coefficients by strata
####################################
if(messages == TRUE){ message("Part III.8. estimating effect of instrument on exposure for each strata") }
iv_strata_beta_ie = stratify_lmfit( wdata = iv_free_strata_data,
outcome = exposure,
exposure = instrument,
covariates = covariates)
# iv_strata_beta_ie = cbind(iv_strata_beta_ie, iv_strata_exp_ss )
####################################
### PART III: 9. Testing assumption
### that IV-exposure effect is
### stable across strata
### i.e. relationship is linear
####################################
if(messages == TRUE){ message("Part III.9. IV-exposure assumption meta (beta_ie)") }
ie_meta_test = meta_test( iv_strata_beta_ie, modulator_col_name = "exposure_mean" )
####################################
### PART III: 10. Estimate instrument on outcome
### beta coefficients by strata
####################################
if(messages == TRUE){ message("Part III.10. estimating effect of instrument on outcome for each strata") }
iv_strata_beta_io = stratify_lmfit( wdata = iv_free_strata_data,
outcome = outcome,
exposure = instrument,
covariates = covariates)
# iv_strata_beta_io = cbind(iv_strata_beta_io, iv_strata_exp_ss )
####################################
### PART III: 11. Beta_iv causal effect
### estimates by strata
####################################
if(messages == TRUE){ message("Part III.11. estimating the iv ratio MR estimates for each strata") }
iv_strata_ratio = stratify_ivratio( strata_beta_ie = iv_strata_beta_ie,
strata_beta_io = iv_strata_beta_io,
beta_ie = beta_ie["beta"],
tsls_ss = tsls_ss)
####################################
## PART III: 12. Run a ivreg model
## on each strata
####################################
if(messages == TRUE){ message("Part III.12. running ivreg on each strata") }
iv_strata_ivreg = stratify_ivregfit( wdata = iv_free_strata_data,
outcome = outcome,
exposure = exposure,
instrument = instrument,
covariates = covariates,
weights_variable = NA,
rnt_outcome = FALSE)
# iv_strata_ivreg = cbind(iv_strata_ivreg, iv_strata_exp_ss )
####################################
### PART III: 13. ivreg meta
####################################
if(messages == TRUE){ message("Part III.13. ivreg meta analysis (beta_iv)") }
iv_meta_test = meta_test( iv_strata_ivreg, modulator_col_name = "exposure_mean" )
## add the full data ivreg results
iv_strata_ivreg = rbind(iv_strata_ivreg, fulldata = iv_fit$summary )
if(messages == TRUE){ message("Part IV. returning results to user") }
####################################
## RESULTS OUT
####################################
if(messages == TRUE){ message("Part IV.1. compiling data to report") }
names(rnt_outcome) = "outcome_RNTransformed"
## summary stats
ss_out = data.frame( ## return input variable
outcome = outcome,
exposure = exposure,
instrument = instrument,
## summary stats
number_of_outcome_outliers = number_of_outcome_outliers,
number_of_exposure_outliers = number_of_exposure_outliers,
W_outcome = W_outcome,
W_exposure = W_exposure,
rnt_outcome = rnt_outcome,
number_of_strata = length(strata_data),
stringsAsFactors = FALSE )
rownames(ss_out) = "sumstats"
## meta analysis stats.
meta_stats = rbind(obs_meta_test, res_obs_meta_test, ie_meta_test, iv_meta_test)
rownames(meta_stats) = c("obs", "res_obs", "ie","mr")
############################################
### Place all Summary Tables together
############################################
summary_tables = list(
## test of non-linear relationships
nonlinearity_test = nonlinearity_test,
observational_sumstats = obs_ss,
observational_GAM_res_sumstats = res_obs_ss,
### return the instrument exposure and instrument outcome stat estimates
strata_beta_ie = iv_strata_beta_ie,
strata_beta_io = iv_strata_beta_io,
## Complete data set TSLS estimates
fulldata_tsls_sumstats = tsls_ss,
## Stratified TSLS estimates
strata_tsls_ratio_sumstats = iv_strata_ratio,
strata_tsls_ivreg_sumstats = iv_strata_ivreg,
## Stratified meta sum stats
strata_meta_analysis = meta_stats,
## return the model data data frame
model_data = mod_data
)
############################################
### Place all complete data models in a single list
############################################
models_no_stratification = list(
## observational models
observational_gam_null = gam_mod0$fit,
observational_gam = gam_mod$fit,
observational_lm = lm_mod$fit,
## tsls models
tsls_gam_null = iv_gam_mod0$fit,
tsls_gam = iv_gam_mod$fit,
tsls_ivreg = iv_fit$fit
)
############################################
### Pull data together
############################################
if(return_models == TRUE){
out = list(summary_stats = ss_out,
summary_tables = summary_tables,
models_no_stratification = models_no_stratification
)
} else {
out = list(summary_stats = ss_out,
summary_tables = summary_tables
)
}
if(messages == TRUE){ message("Part IV.2. returning results to user") }
return(out)
} ## end of function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.