#----------------------------------
# Functions `lm_related` for `CHMI`
#----------------------------------
### 'glm' odds--------------------------------------------------------------------------
# '@export
chmi.stat.glm_or <- function(
phen,
l_group,
l_varsy, l_varsx,
arg_multivar = c('univariate', 'multivariate'),
arg_info_model = c('null_deviance', 'df_null', 'log_lik', 'aic', 'bic', 'deviance', 'df_residual', 'NULL'),
arg_info_beta = c('beta', 'std_error', 't_value', 'NULL'),
arg_filter_vars,
format_pval = TRUE,
padj_method = c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none'),
stat = c('janitor', 'broom', 'tidymodels', 'tidyverse'))
{
# stat
stat <- match.arg(stat, c('janitor', 'broom', 'tidymodels', 'tidyverse'))
# list of formulas
l_formulas <- list()
# loop for `l_varsy` & `l_varsx`
if(arg_multivar == 'univariate'){
# univariate
for(j in l_varsy) {
for(i in l_varsx) {
forms <- paste0(j, ' ~ ', i)
l_formulas[[length(l_formulas) + 1]] = forms
}
}
} else if (arg_multivar == 'multivariate'){
# multivariate
for(j in l_varsy) {
for(i in l_varsx) {
forms <- paste0(j, ' ~ ', paste0(unlist(l_varsx, i), collapse = ' + '))
}
l_formulas[[length(l_formulas) + 1]] = forms
}
}
# extract whole summary of 'lm' models
phen_1 <- phen %>%
select(l_group, l_varsy, l_varsx) %>%
group_by_at(l_group) %>%
group_nest() %>%
slice(rep(1:n(), each = length(l_formulas))) %>%
arrange_at(l_group) %>%
mutate(
l_forms = rep(l_formulas),
l_mods = map2(data, l_forms,
~ try(glm(formula = as.formula(.y), data = .x, family = binomial(link = 'logit')))),
smry_model = map(l_mods, ~ janitor::clean_names(glance(.x), case = 'snake')),
smry_beta = map(l_mods, ~ janitor::clean_names(tidy(.x), case = 'snake')),
or = map(l_mods, ~ tibble::enframe(exp(coef(.x)), name = NULL, value = 'or')),
ci = map(l_mods, ~ tibble::as_tibble(exp(confint(.x, level = .95))))) %>%
select(-data, -l_mods) %>%
unnest(cols = c('l_forms', 'smry_model', 'smry_beta', 'or', 'ci'),
names_repair = ~ tidyr_legacy(., sep = '_')) %>%
filter(!str_detect(term, 'Intercept')) %>%
select(everything(),
beta = estimate, t_value = statistic, vars_x = term, raw_pval = p_value,
ci_lower = starts_with('2.5') , ci_upper = starts_with('97.5')) %>%
separate(l_forms, c('vars_y', 'rest'), sep = ' ~ ', remove = TRUE) %>%
mutate_at(vars(c('vars_y', 'vars_x')), as.factor) %>%
select(-rest)
### argument 'arg_filter_vars'
if(!missing(arg_filter_vars)) {
# update 'data'
phen_1 <- phen_1 %>%
filter(str_detect(vars_x, pattern = paste(arg_filter_vars, collapse = '|')))
}
# list of variables to choose
l_vars <- c(l_group, 'vars_y', 'vars_x', 'or', 'ci_lower', 'ci_upper', 'raw_pval')
l_vars_arg <- c(l_vars, arg_info_model, arg_info_beta)
### update 'data'
phen_1 <- phen_1 %>%
select(one_of(l_vars_arg))
### argument 'format_pval'
if(format_pval == TRUE & !missing(padj_method)) {
phen_1 <- chmi.stat.p_adjust(phen = phen_1, method = padj_method) %>%
mutate_at(vars(c('adjust_text', 'adjust_signif')), as.factor)
}
# return
return(phen_1)
}
### 'lm' betas----------------------------------------------------------------------------
# '@export
chmi.stat.lm_beta <- function(
phen,
l_group,
l_varsy, l_varsx,
custom_formls = FALSE, #if we want to customize the model formulas. By default we don't.
formls = NULL, #list of customized models (one per each 5 isotypes). By default, NULL.
arg_multivar = c('univariate', 'multivariate'),
arg_info_model = c('r_squared', 'adj_r_squared', 'sigma', 't_value', 'p_value',
'df', 'log_lik', 'aic', 'bic', 'deviance', 'df_residual', 'NULL'),
arg_info_beta = c('std_error' , 't_value_beta', 'NULL'),
arg_filter_vars,
format_pval = TRUE,
padj_method = c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none'),
stat = c('janitor', 'broom', 'tidymodels', 'tidyverse'))
{
### stat
stat <- match.arg(stat, c('janitor', 'broom', 'tidymodels', 'tidyverse'))
# list of formulas
l_formulas <- list()
if(custom_formls == FALSE){
### loop for `l_varsy` & `l_varsx`
if(arg_multivar == 'univariate'){
# univariate
for(j in l_varsy) {
for(i in l_varsx) {
forms <- paste0(j, ' ~ ', i)
l_formulas[[length(l_formulas) + 1]] = forms
}
}
} else if (arg_multivar == 'multivariate'){
# multivariate
for(j in l_varsy) {
for(i in l_varsx) {
forms <- paste0(j, ' ~ ', paste0(unlist(l_varsx, i), collapse = ' + '))
}
l_formulas[[length(l_formulas) + 1]] = forms
}
}
phen_1 <- phen %>%
select(l_group, l_varsy, l_varsx) %>%
group_by_at(l_group) %>%
group_nest() %>%
slice(rep(1:n(), each = length(l_formulas))) %>%
arrange_at(l_group) %>%
mutate(l_forms = rep(l_formulas))
}
else if(custom_formls == TRUE){
l_formulas <- formls
phen_1 <- phen %>%
select(l_group, l_varsy, l_varsx) %>%
arrange_at(l_group) %>%
group_by(isotype) %>%
group_nest() %>%
mutate(l_forms = rep(l_formulas)) %>% #first we put the corresponding formula per isotype
unnest(cols = data) %>%
group_by_at(l_group) %>% #and finally we arrange the data so that it looks like when we
#don't have customized formulas
group_nest() %>%
mutate(l_form = map(data, ~list(.x$l_forms)),
l_forms = map(l_form, ~ .x[[1]][[1]])) %>%
select(-l_form)
}
### extract whole summary of 'lm' models
phen_1 <- phen_1 %>%
mutate(l_mods = map2(data, l_forms,
~ try(lm(formula = as.formula(.y), data = .x, model = TRUE))),
smry_model = map(l_mods, ~ janitor::clean_names(glance(.x), case = 'snake')),
smry_beta = map(l_mods, ~ janitor::clean_names(tidy(.x), case = 'snake')),
ci_lower = map(l_mods, ~ tibble::as_tibble(confint(.x, level = .95))[, 1]),
ci_upper = map(l_mods, ~ tibble::as_tibble(confint(.x, level = .95))[, 2])) %>%
select(-data, -l_mods) %>%
unnest(cols = c('l_forms', 'smry_model', 'smry_beta', 'ci_lower', 'ci_upper'),
names_repair = ~ tidyr_legacy(., sep = '_')) %>% ## '_1' fix 'data_beta' info for 'statistic' & 'p_value'
filter(!str_detect(term, 'Intercept')) %>%
select(everything(),
t_value = statistic,
beta = estimate, vars_x = term, t_value_beta = statistic_1, raw_pval = p_value_1,
ci_lower = starts_with('2.5') , ci_upper = starts_with('97.5')) %>%
separate(l_forms, c('vars_y', 'rest'), sep = ' ~ ', remove = TRUE) %>%
mutate_at(vars(c('vars_y', 'vars_x')), as.factor) %>%
select(-rest)
### argument 'arg_filter_vars'
if(!missing(arg_filter_vars)) {
# update 'data'
phen_1 <- phen_1 %>%
filter(str_detect(vars_x, pattern = paste(arg_filter_vars, collapse = '|')))
}
# list of variables to choose
l_vars <- c(l_group, 'vars_y', 'vars_x', 'beta', 'ci_lower', 'ci_upper', 'raw_pval')
l_vars_arg <- c(l_vars, arg_info_model, arg_info_beta)
### update 'data'
phen_1 <- phen_1 %>%
select(one_of(l_vars_arg))
### argument 'format_pval'
if(format_pval == TRUE & !missing(padj_method)) {
phen_1 <- chmi.stat.p_adjust(phen = phen_1, method = padj_method) %>%
mutate_at(vars(c('adjust_text', 'adjust_signif')), as.factor)
}
# return
return(phen_1)
}
### AIC - BIC selection manual function-----------------------------------------
## https://www.methodology.psu.edu/resources/AIC-vs-BIC/
## Kruschke K., J. Doing Bayesian Data Analysis: A tutorial with R and BUGS.
## Metric Predicted Variable with Multiple Metric Predictors (pp. 371-400).
## https://www.users.csbsju.edu/~mgass/robert.pdf
# '@export
chmi.stat.all_possible_regressions <- function(
phen,
l_varsy,
l_varsx,
base_covlist,
l_group,
filter_signif_raw_pval = c(0.01, 0.05, 0.1),
#base_covlist, tested_covlist,
#n_comb, testing = FALSE,
stat = c('plyr'))
{
### arg
stat <- match.arg(stat, c('plyr'))
# check
stopifnot(all(c(l_varsx, l_varsy) %in% names(phen)))
### update base_covlist (depending on the l_varsx of each multivariable model)
base_covlist <- base_covlist[base_covlist %in% l_varsx]
### create a list of 'tested_covlist' combinations
l_combn <- list()
#loop
for(i in 1:length(l_varsx[l_varsx %nin% base_covlist])){
# combinations without repetition
combn_1 <- combn(x = l_varsx[l_varsx %nin% base_covlist], i, FUN = paste0, simplify = FALSE)
# listed combinations
l_combn <- rlist::list.append(l_combn, combn_1)
}
### join in unique list()
l_combn <- unlist(l_combn, recursive = FALSE)
### create l_formulas
l_formulas <- list()
# loop
for(i in l_varsy) {
for(j in l_combn) {
# create one formula
forms <- paste0(i, ' ~ ', paste0(base_covlist, collapse = ' + '), paste0(' + '), paste0(j, collapse = ' + '))
l_formulas[[length(l_formulas) + 1]] = forms
}
}
### operate 'linear models' (common code to both situations "if" below)
phen_1 <- phen %>%
select(l_group, l_varsy, l_varsx) %>%
group_by_at(l_group) %>%
nest() %>% #si em dóna error del nombre de files
#group_nest() %>%
slice(rep(1:n(), each = length(l_formulas))) %>%
arrange_at(l_group) %>%
mutate(
l_forms = rep(l_formulas),
complete_data = map(data, ~ .x %>% filter(complete.cases(.))),
l_mods = map2(complete_data, l_forms,
~ try(lm(formula = as.formula(.y), data = .x, model = TRUE))),
aic = map_dbl(l_mods, ~ janitor::clean_names(glance(.x), case = 'snake')[['aic']]),
bic = map_dbl(l_mods, ~ janitor::clean_names(glance(.x), case = 'snake')[['bic']]),
adj_r_squared = map_dbl(l_mods, ~ janitor::clean_names(glance(.x), case = 'snake')[['adj_r_squared']]))
###only if one of the 3 base variables are in the phen ( = base_covlist != 0), there will be a base model (entering this condition).
if(length(base_covlist) == 0){
### operate 'linear models' only in this "if" situation
phen_1 <- phen_1 %>%
select(-c(data, complete_data, l_mods)) %>%
unnest(cols = c(l_forms)) %>%
mutate_at(vars(aic, bic), ~ round(., 1)) %>%
group_by_at(l_group) %>%
group_nest() %>%
mutate(aicsort_data = map(data,
~ arrange_at(.x, vars(aic, bic, adj_r_squared)))) %>%
mutate(aicdiff_data = map(aicsort_data,
~ mutate(.x, aic_diff = aic - lag(aic, default = first(aic))))) %>%
unnest(cols = c(aicdiff_data)) %>%
select(-c(data, aicsort_data, aic_diff)) %>%
mutate_at(vars(l_forms), as.factor) %>%
mutate_at(vars(adj_r_squared), ~ round(., 3))
}
###only if one of the 3 base variables are in the phen ( = base_covlist != 0), there will be a base model (entering this condition).
else if(length(base_covlist) != 0){
### create l_formulas_base
l_formulas_base <- list()
# loop to create the formula of the base model
for(i in l_varsy) {
for(j in base_covlist) {
# create one formula
forml_base <- paste0(i, ' ~ ', paste0(unlist(base_covlist, j), collapse = ' + '))
}
l_formulas_base[[length(l_formulas_base) + 1]] = forml_base
}
### join in unique list()
l_formulas_base <- unlist(l_formulas_base, recursive = FALSE)
### add base model to list with all other models
#l_formulas <- rlist::list.append(l_formulas, l_formulas_base)
### operate 'linear models' only in this "if" situation
phen_1 <- phen_1 %>%
mutate(
l_form_mod_0 = l_formulas_base,
l_mod_0 = map2(complete_data, l_form_mod_0,
~ try(lm(formula = as.formula(.y), data = .x, model = TRUE))),
aic_mod_0 = map_dbl(l_mod_0, ~ janitor::clean_names(glance(.x), case = 'snake')[['aic']]),
bic_mod_0 = map_dbl(l_mod_0, ~ janitor::clean_names(glance(.x), case = 'snake')[['bic']]),
adj_r_squared_mod_0 = map_dbl(l_mod_0, ~ janitor::clean_names(glance(.x), case = 'snake')[['adj_r_squared']]),
anova_pval = map2_dbl(l_mod_0, l_mods, ~ anova(.x, .y)[['Pr(>F)']][2])) %>%
select(-c(data, complete_data, l_mods, l_mod_0)) %>%
unnest(cols = c(l_forms, l_form_mod_0)) %>%
mutate_at(vars(aic, bic, aic_mod_0, bic_mod_0), ~ round(., 1)) %>%
group_by_at(l_group) %>%
group_nest() %>%
mutate(aicsort_data = map(data,
~ arrange_at(.x, vars(aic, bic, adj_r_squared)))) %>%
mutate(aicdiff_data = map(aicsort_data,
~ mutate(.x, aic_diff = aic - lag(aic, default = first(aic))))) %>%
unnest(cols = c(aicdiff_data)) %>%
select(-c(data, aicsort_data, aic_diff)) %>%
mutate_at(vars(l_forms), as.factor) %>%
mutate_at(vars(adj_r_squared, adj_r_squared_mod_0, anova_pval), ~ round(., 3)) %>%
mutate(anova_pval_signif = factor(ifelse(anova_pval <= filter_signif_raw_pval, paste0(anova_pval),
ifelse(anova_pval > filter_signif_raw_pval, 'ns', 'NA'))))
}
# return
return(phen_1)
}
### 'lmm' models--------------------------------------------------------------------------
# '@export
chmi.stat.lmm_mods <- function(phen,
l_group,
n_levels = c('3', '2'),
format_pval = F,
stat = c('multcomp'))
{
# stat
stat <- match.arg(stat, c('multcomp'))
# 'lmm' models & 'ancova' adjustment
phen_1 <- phen %>%
group_by_at(l_group) %>%
nest() %>%
mutate(formls = map(isotype_igg,
~ paste0('log10_mfi ~ ',
paste0(c('t2_point_lm', 'gr_hbs'), collapse = ' * '), ' + ',
paste0('(t2_point_lm | original_id)')))) %>%
mutate(mods = map2(data, formls,
~ try(lmer(formula = as.formula(.y),
data = .x))))
# argument 'n_levels'
if(n_levels == '3') {
# argument for 3 factor variable
phen_2 <- phen_1 %>%
mutate(pval_int1 = map(mods,
~ summary(.x) %$% coefficients[5, 5] %>% as.numeric())) %>%
mutate(pval_int2 = map(mods,
~ summary(.x) %$% coefficients[6, 5] %>% as.numeric())) %>%
mutate(glht_t2point = map(mods,
~ multcomp::glht(.x, linfct = rbind(t2point = c(0, 1, 0, 0, 0, 0))))) %>%
mutate(dat_t2point = map(glht_t2point, ~ tidy(summary(.x)))) %>%
mutate(glht_t2point_gr1 = map(mods,
~ multcomp::glht(.x, linfct = rbind(t2point_gr1 = c(0, 1, 0, 0, 1, 0))))) %>%
mutate(dat_t2point_gr1 = map(glht_t2point_gr1, ~ tidy(summary(.x)))) %>%
mutate(glht_t2point_gr2 = map(mods,
~ multcomp::glht(.x, linfct = rbind(t2point_gr2 = c(0, 1, 0, 0, 0, 1))))) %>%
mutate(dat_t2point_gr2 = map(glht_t2point_gr2, ~ tidy(summary(.x))))
# unnest 'data'
phen_3 <- phen_2 %>%
select(l_group, dat_t2point, dat_t2point_gr1, dat_t2point_gr2, pval_int1, pval_int2) %>%
unnest() %>%
select(l_group, contains('val'), contains('est')) %>%
select(l_group, est_t2point = estimate, pval_t2point = p.value,
t2point_aa = estimate1, pval_t2p_aa = p.value1, pval_int_aa = pval_int1,
t2point_as = estimate2, pval_t2p_as = p.value2, pval_int_as = pval_int2)
} else{
# argument for 2 factor variable
phen_2 <- phen_1 %>%
mutate(pval_int2 = map(mods,
~ summary(.x) %$% coefficients[4, 5] %>% as.numeric())) %>%
mutate(glht_t2point = map(mods,
~ multcomp::glht(.x, linfct = rbind(t2point = c(0, 1, 0, 0))))) %>%
mutate(dat_t2point = map(glht_t2point, ~ tidy(summary(.x)))) %>%
mutate(glht_t2point_gr2 = map(mods,
~ multcomp::glht(.x, linfct = rbind(t2point_gr2 = c(0, 1, 0, 1))))) %>%
mutate(dat_t2point_gr2 = map(glht_t2point_gr2, ~ tidy(summary(.x))))
# unnest 'data'
phen_3 <- phen_2 %>%
select(l_group, dat_t2point, dat_t2point_gr2, pval_int2) %>%
unnest() %>%
select(l_group, contains('val'), contains('est')) %>%
select(l_group, est_t2point = estimate, pval_t2point = p.value,
t2point_as = estimate1, pval_t2p_as = p.value1, pval_int_as = pval_int2)
}
# argument for 'pval_format'
if (format_pval == T) {
# list of 'pvals'
l_pval <- phen_3 %>% select(contains('val')) %>% names()
# do 'p.adjust()'
phen_4 <- mutate_at(phen_3, vars(l_pval), funs(p.adjust(., method = 'BH')))
# round 'p.adjust()'
phen_4 <- mutate_at(phen_4, vars(l_pval), funs(
ifelse(is.na(.), 'nc',
ifelse(. <= 0.0001, '< 0.0001',
ifelse(. <= 0.001, '< 0.001',
ifelse(. <= 0.01, '< 0.01',
ifelse(. <= 0.05, paste0(round(., 3)),
'ns')))))))
# return
return(list(phen_4, phen_2))
}else {
# return
return(list(phen_3, phen_2))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.