Description Usage Arguments Value Author(s) Examples
View source: R/discovery_regression.R
Fit multiple regression models in one go.
1 2 3 4 5 6 7 8 | discovery_regression(
df_long,
model = c("lm", "glm", "coxph"),
formula,
key = key,
predictor = predictor,
verbose = FALSE
)
|
df_long |
a data frame in a long format that contains a |
model |
a character, setting the type of model to fit on the input data
frame. Must be one of |
formula |
a formula object. For the case of models |
key |
the name of the |
predictor |
the name of the variable for which (adjusted)
univariate associations are estimated. This is stated here in order to
individuate the predictor of interest over many, possible cofactors that are
also present in |
verbose |
logical (default FALSE). If TRUE it prints a message with the names of the predictor and outcome. This may come in handy when, for example, fitting multiple outcomes. |
A data frame with the following columns: a character variable with the same
name as the key
parameter and numeric variables estimate
,
se
and pvalue
with the values of the respective variables
of the linear model. If predictor
is factor, additional column
term
is returned indicating factor level of predictor
Maria Kalimeri, Emmi Tikkanen, Juuso Parkkinen, Vilma Jagerroos
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | library(magrittr)
# Linear Regression Example
# We will use the simulated demo data that come with the package,
# ggforestplot::df_demo_metabolic_data
# Extract the names of the NMR biomarkers for discovery analysis
nmr_biomarkers <- dplyr::intersect(
ggforestplot::df_NG_biomarker_metadata$machine_readable_name,
colnames(df_demo_metabolic_data)
)
# Select only variables to be used for the model and collapse to a long
# format
df_long <-
df_demo_metabolic_data %>%
# Select only model variables
dplyr::select(tidyselect::all_of(nmr_biomarkers), gender, BMI) %>%
# Log-transform and scale biomarkers
dplyr::mutate_at(
.vars = dplyr::vars(tidyselect::all_of(nmr_biomarkers)),
.funs = ~ .x %>% log1p() %>% scale() %>% as.numeric()
) %>%
# Collapse to a long format
tidyr::gather(
key = machine_readable_name,
value = biomarkervalue,
tidyselect::all_of(nmr_biomarkers)
)
df_assoc_per_biomarker <-
discovery_regression(
df_long = df_long,
model = "lm",
formula =
formula(
biomarkervalue ~ BMI + factor(gender)
),
key = machine_readable_name,
predictor = BMI
)
# Filter Nightingale metadata data frame for biomarkers of interest
df_grouping <-
ggforestplot::df_NG_biomarker_metadata %>%
dplyr::filter(group %in% "Fatty acids")
# Join the association data frame with the group data above
df <-
df_assoc_per_biomarker %>%
# use right_join, with df_grouping on the right, to preserve the order of
# biomarkers it specifies.
dplyr::right_join(., df_grouping, by = "machine_readable_name")
# Draw a forestplot of the results
ggforestplot::forestplot(
df = df,
name = name,
estimate = estimate,
se = se,
pvalue = pvalue,
psignif = 0.001,
xlab = "1-SD increment in biomarker concentration
per 1-SD increment in BMI",
title = "Associations of fatty acids to BMI",
logodds = TRUE
)
# Logistic Regression Example
# Extract names of relevant NMR biomarkers
nmr_biomarkers <- dplyr::intersect(
ggforestplot::df_NG_biomarker_metadata$machine_readable_name,
colnames(df_demo_metabolic_data)
)
# Select only variables to be used for the model and
# collapse to a long format
df_long <-
df_demo_metabolic_data %>%
# Select only model variables (avoid memory overhead)
dplyr::select(
tidyselect::all_of(nmr_biomarkers),
gender,
incident_diabetes,
BMI,
baseline_age
) %>%
dplyr::mutate_at(
.vars = dplyr::vars(tidyselect::all_of(nmr_biomarkers)),
.funs = ~ .x %>% log1p() %>% scale() %>% as.numeric()
) %>%
# Collapse to a long format
tidyr::gather(
key = machine_readable_name,
value = biomarkervalue,
tidyselect::all_of(nmr_biomarkers)
)
df_assoc_per_biomarker_gender <-
discovery_regression(
df_long = df_long,
model = "glm",
formula =
formula(
incident_diabetes ~ biomarkervalue + factor(gender) + BMI + baseline_age
),
key = machine_readable_name,
predictor = biomarkervalue
)
# Filter Nightingale metadata data frame for biomarkers of interest
df_grouping <-
ggforestplot::df_NG_biomarker_metadata %>%
dplyr::filter(
group %in% "Cholesterol",
!(machine_readable_name %in% c("HDL2_C", "HDL3_C"))
)
# Join the association data frame with the group data above
df <-
df_assoc_per_biomarker_gender %>%
# use right_join, with df_grouping on the right, to preserve the order of
# biomarkers it specifies.
dplyr::right_join(., df_grouping, by = "machine_readable_name")
# Draw a forestplot of the results
ggforestplot::forestplot(
df = df,
name = name,
estimate = estimate,
se = se,
pvalue = pvalue,
psignif = 0.001,
xlab = "Odds ratio for incident type 2 diabetes (95% CI)
per 1-SD increment in biomarker concentration",
title = "Cholesterol and risk of future type 2 diabetes",
logodds = TRUE
)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.