R/chmi.stat_lm.R

Defines functions chmi.stat.all_possible_regressions chmi.stat.lm_beta chmi.stat.glm_or

#----------------------------------
# Functions `lm_related` for `CHMI`
#----------------------------------

### 'glm' odds--------------------------------------------------------------------------

# '@export
chmi.stat.glm_or <- function(
  phen,
  l_group,
  l_varsy, l_varsx,
  arg_multivar = c('univariate', 'multivariate'),
  arg_info_model = c('null_deviance', 'df_null', 'log_lik', 'aic', 'bic', 'deviance', 'df_residual', 'NULL'),
  arg_info_beta = c('beta', 'std_error', 't_value', 'NULL'),
  arg_filter_vars,
  format_pval = TRUE,
  padj_method = c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY',  'fdr', 'none'),
  stat = c('janitor', 'broom', 'tidymodels', 'tidyverse'))
{
# stat
  stat <- match.arg(stat, c('janitor', 'broom', 'tidymodels', 'tidyverse'))

# list of formulas
  l_formulas <- list()

# loop for `l_varsy` & `l_varsx`
  if(arg_multivar == 'univariate'){
  # univariate
  	for(j in l_varsy) {
  		for(i in l_varsx) {
  	    forms <- paste0(j, ' ~ ', i)

    	 l_formulas[[length(l_formulas) + 1]] = forms
    	}
    }

  } else if (arg_multivar == 'multivariate'){
  # multivariate
    for(j in l_varsy) {
    	for(i in l_varsx) {
    	   forms <- paste0(j, ' ~ ', paste0(unlist(l_varsx, i), collapse = ' + '))
      }
      l_formulas[[length(l_formulas) + 1]] = forms
    }
  }

# extract whole summary of 'lm' models
  phen_1 <- phen %>%
    select(l_group, l_varsy, l_varsx) %>%
    group_by_at(l_group) %>%
    group_nest() %>%
    slice(rep(1:n(), each = length(l_formulas))) %>%
    arrange_at(l_group) %>%
    mutate(
      l_forms = rep(l_formulas),
      l_mods = map2(data, l_forms,
        ~ try(glm(formula = as.formula(.y), data = .x, family = binomial(link = 'logit')))),
      smry_model = map(l_mods, ~ janitor::clean_names(glance(.x), case = 'snake')),
      smry_beta = map(l_mods, ~ janitor::clean_names(tidy(.x), case = 'snake')),
      or = map(l_mods, ~ tibble::enframe(exp(coef(.x)), name = NULL, value = 'or')),
      ci = map(l_mods, ~ tibble::as_tibble(exp(confint(.x, level = .95))))) %>%
    select(-data, -l_mods) %>%
    unnest(cols = c('l_forms', 'smry_model', 'smry_beta', 'or', 'ci'),
      names_repair = ~ tidyr_legacy(., sep = '_')) %>%
    filter(!str_detect(term, 'Intercept')) %>%
    select(everything(),
      beta = estimate, t_value = statistic, vars_x = term, raw_pval = p_value,
      ci_lower = starts_with('2.5') , ci_upper = starts_with('97.5')) %>%
    separate(l_forms, c('vars_y', 'rest'), sep = ' ~ ', remove = TRUE) %>%
    mutate_at(vars(c('vars_y', 'vars_x')), as.factor) %>%
    select(-rest)


### argument 'arg_filter_vars'
  if(!missing(arg_filter_vars)) {
  # update 'data'
    phen_1 <- phen_1 %>%
      filter(str_detect(vars_x, pattern = paste(arg_filter_vars, collapse = '|')))
    }

  # list of variables to choose
    l_vars <- c(l_group, 'vars_y', 'vars_x', 'or', 'ci_lower', 'ci_upper', 'raw_pval')
    l_vars_arg <- c(l_vars,	arg_info_model, arg_info_beta)

### update 'data'
  phen_1 <- phen_1 %>%
    select(one_of(l_vars_arg))

### argument 'format_pval'
  if(format_pval == TRUE & !missing(padj_method)) {
    phen_1 <- chmi.stat.p_adjust(phen = phen_1, method = padj_method) %>%
    	mutate_at(vars(c('adjust_text', 'adjust_signif')), as.factor)
  }

  # return
  return(phen_1)
}



### 'lm' betas----------------------------------------------------------------------------

# '@export
chmi.stat.lm_beta <- function(
		phen,
		l_group,
		l_varsy, l_varsx,
    custom_formls = FALSE, #if we want to customize the model formulas. By default we don't.
    formls = NULL, #list of customized models (one per each 5 isotypes). By default, NULL.
    arg_multivar = c('univariate', 'multivariate'),
    arg_info_model = c('r_squared', 'adj_r_squared', 'sigma', 't_value', 'p_value',
      'df', 'log_lik', 'aic', 'bic', 'deviance', 'df_residual', 'NULL'),
    arg_info_beta = c('std_error' , 't_value_beta', 'NULL'),
    arg_filter_vars,
		format_pval = TRUE,
    padj_method = c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY',  'fdr', 'none'),
    stat = c('janitor', 'broom', 'tidymodels',  'tidyverse'))
{
### stat
  stat <- match.arg(stat, c('janitor', 'broom', 'tidymodels', 'tidyverse'))

  # list of formulas
  l_formulas <- list()

  if(custom_formls == FALSE){

  ### loop for `l_varsy` & `l_varsx`
    if(arg_multivar == 'univariate'){
      # univariate
      	for(j in l_varsy) {
      		for(i in l_varsx) {
      	    forms <- paste0(j, ' ~ ', i)

        l_formulas[[length(l_formulas) + 1]] = forms
      	}

      }

    } else if (arg_multivar == 'multivariate'){
      # multivariate
        for(j in l_varsy) {
        	for(i in l_varsx) {
        	   forms <- paste0(j, ' ~ ', paste0(unlist(l_varsx, i), collapse = ' + '))
        }
       l_formulas[[length(l_formulas) + 1]] = forms
      }
    }

	phen_1 <- phen %>%
		      select(l_group, l_varsy, l_varsx) %>%
		      group_by_at(l_group) %>%
		      group_nest() %>%
		      slice(rep(1:n(), each = length(l_formulas))) %>%
		      arrange_at(l_group) %>%
		      mutate(l_forms = rep(l_formulas))

  }

    else if(custom_formls == TRUE){

        l_formulas <- formls

        phen_1 <- phen %>%
            	  select(l_group, l_varsy, l_varsx) %>%
        		  arrange_at(l_group) %>%
                  group_by(isotype) %>%
                  group_nest() %>%
                  mutate(l_forms = rep(l_formulas)) %>% #first we put the corresponding formula per isotype
                  unnest(cols = data) %>%
                  group_by_at(l_group) %>% #and finally we arrange the data so that it looks like when we
                  #don't have customized formulas
                  group_nest() %>%
                  mutate(l_form = map(data, ~list(.x$l_forms)),
                         l_forms = map(l_form, ~ .x[[1]][[1]])) %>%
                  select(-l_form)

    }

### extract whole summary of 'lm' models

      phen_1 <- phen_1 %>%
                mutate(l_mods = map2(data, l_forms,
                        ~ try(lm(formula = as.formula(.y), data = .x, model = TRUE))),
                       smry_model = map(l_mods, ~ janitor::clean_names(glance(.x), case = 'snake')),
                       smry_beta = map(l_mods, ~ janitor::clean_names(tidy(.x), case = 'snake')),
                       ci_lower = map(l_mods, ~ tibble::as_tibble(confint(.x, level = .95))[, 1]),
                       ci_upper = map(l_mods, ~ tibble::as_tibble(confint(.x, level = .95))[, 2])) %>%
                select(-data, -l_mods) %>%
                unnest(cols = c('l_forms', 'smry_model', 'smry_beta', 'ci_lower', 'ci_upper'),
                       names_repair = ~ tidyr_legacy(., sep = '_')) %>% ## '_1' fix 'data_beta' info for 'statistic' & 'p_value'
                filter(!str_detect(term, 'Intercept')) %>%
                select(everything(),
                       t_value = statistic,
                       beta = estimate, vars_x = term, t_value_beta = statistic_1, raw_pval = p_value_1,
                       ci_lower = starts_with('2.5') , ci_upper = starts_with('97.5')) %>%
                separate(l_forms, c('vars_y', 'rest'), sep = ' ~ ', remove = TRUE) %>%
                mutate_at(vars(c('vars_y', 'vars_x')), as.factor) %>%
                select(-rest)


### argument 'arg_filter_vars'
  if(!missing(arg_filter_vars)) {
  # update 'data'
    phen_1 <- phen_1 %>%
      filter(str_detect(vars_x, pattern = paste(arg_filter_vars, collapse = '|')))
  }

# list of variables to choose
l_vars <- c(l_group, 'vars_y', 'vars_x', 'beta', 'ci_lower', 'ci_upper', 'raw_pval')
l_vars_arg <- c(l_vars,	arg_info_model, arg_info_beta)

### update 'data'
  phen_1 <- phen_1 %>%
    select(one_of(l_vars_arg))


### argument 'format_pval'
	if(format_pval == TRUE & !missing(padj_method)) {
    phen_1 <- chmi.stat.p_adjust(phen = phen_1, method = padj_method) %>%
    	mutate_at(vars(c('adjust_text', 'adjust_signif')), as.factor)
  }

  # return
  return(phen_1)
}




### AIC - BIC selection manual function-----------------------------------------

## https://www.methodology.psu.edu/resources/AIC-vs-BIC/
## Kruschke K., J. Doing Bayesian Data Analysis: A tutorial with R and BUGS.
## Metric Predicted Variable with Multiple Metric Predictors (pp. 371-400).
## https://www.users.csbsju.edu/~mgass/robert.pdf

# '@export
chmi.stat.all_possible_regressions <- function(
  phen,
  l_varsy,
  l_varsx,
  base_covlist,
  l_group,
  filter_signif_raw_pval = c(0.01, 0.05, 0.1),
  #base_covlist, tested_covlist,
  #n_comb, testing = FALSE,
  stat = c('plyr'))
{

### arg
    stat <- match.arg(stat, c('plyr'))

  # check
  stopifnot(all(c(l_varsx, l_varsy) %in% names(phen)))

### update base_covlist (depending on the l_varsx of each multivariable model)
    base_covlist <- base_covlist[base_covlist %in% l_varsx]

### create a list of 'tested_covlist' combinations
    l_combn <- list()

    #loop
    for(i in 1:length(l_varsx[l_varsx %nin% base_covlist])){
        # combinations without repetition
        combn_1 <- combn(x = l_varsx[l_varsx %nin% base_covlist], i, FUN = paste0, simplify = FALSE)

        # listed combinations
        l_combn <- rlist::list.append(l_combn, combn_1)
    }

### join in unique list()
    l_combn <- unlist(l_combn, recursive = FALSE)


### create l_formulas
    l_formulas <- list()

    # loop
    for(i in l_varsy) {
        for(j in l_combn) {
        # create one formula
        forms <- paste0(i, ' ~ ', paste0(base_covlist, collapse = ' + '), paste0(' + '), paste0(j, collapse = ' + '))

        l_formulas[[length(l_formulas) + 1]] = forms
        }
     }

### operate 'linear models' (common code to both situations "if" below)
phen_1 <- phen %>%
    select(l_group, l_varsy, l_varsx) %>%
    group_by_at(l_group) %>%
    nest() %>%  #si em dóna error del nombre de files
    #group_nest() %>%
    slice(rep(1:n(), each = length(l_formulas))) %>%
    arrange_at(l_group) %>%
    mutate(
        l_forms = rep(l_formulas),
        complete_data = map(data, ~ .x %>% filter(complete.cases(.))),
        l_mods = map2(complete_data, l_forms,
            ~ try(lm(formula = as.formula(.y), data = .x, model = TRUE))),
        aic = map_dbl(l_mods, ~ janitor::clean_names(glance(.x), case = 'snake')[['aic']]),
        bic = map_dbl(l_mods, ~ janitor::clean_names(glance(.x), case = 'snake')[['bic']]),
        adj_r_squared = map_dbl(l_mods, ~ janitor::clean_names(glance(.x), case = 'snake')[['adj_r_squared']]))

###only if one of the 3 base variables are in the phen ( = base_covlist != 0), there will be a base model (entering this condition).
     if(length(base_covlist) == 0){

     ### operate 'linear models' only in this "if" situation
     phen_1 <- phen_1 %>%
               select(-c(data, complete_data, l_mods)) %>%
               unnest(cols = c(l_forms)) %>%
               mutate_at(vars(aic, bic), ~ round(., 1)) %>%
               group_by_at(l_group) %>%
               group_nest() %>%
               mutate(aicsort_data = map(data,
               ~ arrange_at(.x, vars(aic, bic, adj_r_squared)))) %>%
               mutate(aicdiff_data = map(aicsort_data,
               ~ mutate(.x, aic_diff = aic - lag(aic, default = first(aic))))) %>%
               unnest(cols = c(aicdiff_data)) %>%
               select(-c(data, aicsort_data, aic_diff)) %>%
               mutate_at(vars(l_forms), as.factor) %>%
               mutate_at(vars(adj_r_squared), ~ round(., 3))
     }


###only if one of the 3 base variables are in the phen ( = base_covlist != 0), there will be a base model (entering this condition).
     else if(length(base_covlist) != 0){

     ### create l_formulas_base
         l_formulas_base <- list()

     # loop to create the formula of the base model
   for(i in l_varsy) {
       for(j in base_covlist) {
         # create one formula
         forml_base <- paste0(i, ' ~ ', paste0(unlist(base_covlist, j), collapse = ' + '))
          }
         l_formulas_base[[length(l_formulas_base) + 1]] = forml_base
      }

    ### join in unique list()
    l_formulas_base <- unlist(l_formulas_base, recursive = FALSE)

    ### add base model to list with all other models
    #l_formulas <- rlist::list.append(l_formulas, l_formulas_base)

### operate 'linear models' only in this "if" situation
phen_1 <- phen_1 %>%
          mutate(
            l_form_mod_0 = l_formulas_base,
            l_mod_0 = map2(complete_data, l_form_mod_0,
            ~ try(lm(formula = as.formula(.y), data = .x, model = TRUE))),
            aic_mod_0 = map_dbl(l_mod_0, ~ janitor::clean_names(glance(.x), case = 'snake')[['aic']]),
            bic_mod_0 = map_dbl(l_mod_0, ~ janitor::clean_names(glance(.x), case = 'snake')[['bic']]),
            adj_r_squared_mod_0 = map_dbl(l_mod_0, ~ janitor::clean_names(glance(.x), case = 'snake')[['adj_r_squared']]),
            anova_pval = map2_dbl(l_mod_0, l_mods, ~ anova(.x, .y)[['Pr(>F)']][2])) %>%
          select(-c(data, complete_data, l_mods, l_mod_0)) %>%
          unnest(cols = c(l_forms, l_form_mod_0)) %>%
          mutate_at(vars(aic, bic, aic_mod_0, bic_mod_0), ~ round(., 1)) %>%
          group_by_at(l_group) %>%
          group_nest() %>%
          mutate(aicsort_data = map(data,
          ~ arrange_at(.x, vars(aic, bic, adj_r_squared)))) %>%
          mutate(aicdiff_data = map(aicsort_data,
          ~ mutate(.x, aic_diff = aic - lag(aic, default = first(aic))))) %>%
          unnest(cols = c(aicdiff_data)) %>%
          select(-c(data, aicsort_data, aic_diff)) %>%
          mutate_at(vars(l_forms), as.factor) %>%
          mutate_at(vars(adj_r_squared, adj_r_squared_mod_0, anova_pval), ~ round(., 3)) %>%
          mutate(anova_pval_signif = factor(ifelse(anova_pval <= filter_signif_raw_pval, paste0(anova_pval),
                                  ifelse(anova_pval > filter_signif_raw_pval, 'ns', 'NA'))))

        }

 # return
 return(phen_1)
}




### 'lmm' models--------------------------------------------------------------------------

# '@export
chmi.stat.lmm_mods <- function(phen,
    l_group,
    n_levels = c('3', '2'),
    format_pval = F,
    stat = c('multcomp'))
{
# stat
  stat <- match.arg(stat, c('multcomp'))

# 'lmm' models & 'ancova' adjustment
  phen_1 <- phen %>%
    group_by_at(l_group) %>%
    nest() %>%
    mutate(formls = map(isotype_igg,
    ~ paste0('log10_mfi ~ ',
      paste0(c('t2_point_lm', 'gr_hbs'), collapse = ' * '), ' + ',
      paste0('(t2_point_lm | original_id)')))) %>%
    mutate(mods = map2(data, formls,
    ~ try(lmer(formula = as.formula(.y),
          data = .x))))

# argument 'n_levels'
if(n_levels == '3') {
# argument for 3 factor variable
  phen_2 <- phen_1 %>%
    mutate(pval_int1 = map(mods,
    ~ summary(.x) %$% coefficients[5, 5] %>% as.numeric())) %>%
    mutate(pval_int2 = map(mods,
    ~ summary(.x) %$% coefficients[6, 5] %>% as.numeric())) %>%
    mutate(glht_t2point = map(mods,
    ~ multcomp::glht(.x, linfct = rbind(t2point = c(0, 1, 0, 0, 0, 0))))) %>%
    mutate(dat_t2point = map(glht_t2point, ~ tidy(summary(.x)))) %>%
    mutate(glht_t2point_gr1 = map(mods,
    ~ multcomp::glht(.x, linfct = rbind(t2point_gr1 = c(0, 1, 0, 0, 1, 0))))) %>%
    mutate(dat_t2point_gr1 = map(glht_t2point_gr1, ~ tidy(summary(.x)))) %>%
    mutate(glht_t2point_gr2 = map(mods,
    ~ multcomp::glht(.x, linfct = rbind(t2point_gr2 = c(0, 1, 0, 0, 0, 1))))) %>%
    mutate(dat_t2point_gr2 = map(glht_t2point_gr2, ~ tidy(summary(.x))))

 # unnest 'data'
   phen_3 <- phen_2 %>%
    select(l_group, dat_t2point, dat_t2point_gr1, dat_t2point_gr2, pval_int1, pval_int2) %>%
    unnest() %>%
    select(l_group, contains('val'), contains('est')) %>%
    select(l_group, est_t2point = estimate, pval_t2point = p.value,
      t2point_aa = estimate1, pval_t2p_aa = p.value1, pval_int_aa = pval_int1,
      t2point_as = estimate2, pval_t2p_as = p.value2, pval_int_as = pval_int2)

  } else{
  # argument for 2 factor variable
    phen_2 <- phen_1 %>%
      mutate(pval_int2 = map(mods,
      ~ summary(.x) %$% coefficients[4, 5] %>% as.numeric())) %>%
      mutate(glht_t2point = map(mods,
      ~ multcomp::glht(.x, linfct = rbind(t2point = c(0, 1, 0, 0))))) %>%
      mutate(dat_t2point = map(glht_t2point, ~ tidy(summary(.x)))) %>%
      mutate(glht_t2point_gr2 = map(mods,
      ~ multcomp::glht(.x, linfct = rbind(t2point_gr2 = c(0, 1, 0, 1))))) %>%
      mutate(dat_t2point_gr2 = map(glht_t2point_gr2, ~ tidy(summary(.x))))

  # unnest 'data'
    phen_3 <- phen_2 %>%
      select(l_group, dat_t2point, dat_t2point_gr2, pval_int2) %>%
      unnest() %>%
      select(l_group, contains('val'), contains('est')) %>%
      select(l_group, est_t2point = estimate, pval_t2point = p.value,
         t2point_as = estimate1, pval_t2p_as = p.value1, pval_int_as = pval_int2)
  }
  # argument for 'pval_format'
    if (format_pval == T) {
    # list of 'pvals'
      l_pval <- phen_3 %>% select(contains('val')) %>% names()

    # do 'p.adjust()'
      phen_4 <- mutate_at(phen_3, vars(l_pval), funs(p.adjust(., method = 'BH')))

    # round 'p.adjust()'
      phen_4 <- mutate_at(phen_4, vars(l_pval), funs(
        ifelse(is.na(.), 'nc',
        ifelse(. <= 0.0001, '< 0.0001',
        ifelse(. <= 0.001, '< 0.001',
        ifelse(. <= 0.01, '< 0.01',
        ifelse(. <= 0.05, paste0(round(., 3)),
        'ns')))))))

      # return
      return(list(phen_4, phen_2))

     }else {
     # return
     return(list(phen_3, phen_2))
     }
}
mvazquezs/chmitools documentation built on May 1, 2020, 2:06 a.m.