R/run_exwas_sdq_confounders.R

run_exwas_sdq_confounders <- function (input_data, sdq, covariates_list, file_name) {

  tidy_result <- list()

  # Change the 'mids' object imputed dataset to complete, stacked dataset
  all_imps <- mice::complete(input_data, "long") %>%

    # Select variables of interes plus covariates, SDQ cores and imp number
    dplyr::select(.imp, covariates_list, cohort, smoking:sex, h_sdq_external, h_sdq_internal)


  for (conf in covariates_list) {

    #If confounder is a cohort, fit the model with SDQ and cohort only
    if (conf == "cohort") {

      # change input dataset to dataframe
      pool_fit <- all_imps %>%

        # perform regression for each imputed dataset (group by
        # imputation number)
        dplyr::group_by(.imp) %>%

        # run simple regression for sdq ~ exposure + covariates
        dplyr::do(model = MASS::glm.nb(formula = get(sdq) ~ get(conf), data = .))

    #If confounder is not a cohort, fit the model with SDQ, cohort and confounder
    } else {

      pool_fit <- all_imps %>%
        dplyr::group_by(.imp) %>%
        dplyr::do(model = MASS::glm.nb(formula = get(sdq) ~ get(conf) + cohort, data = .))
    }

    # transform the regression output to a list
    tidy_fit <- pool_fit %>%
      as.list() %>%

      # pick the list item containing numerical regression output
      # (coefficient and other statistics for the intercept,
      # confounders and exposure), ignore other results provided by
      # the glm.nb function
      .[[-1]] %>%

      # pool the regression outocomes for each imputed dataset, create new columns containing estimates
      #and CIs, assign name to the row with the confounder
      mice::pool() %>%
      summary(conf.int = TRUE, exponentiate = TRUE) %>%
      tibble::rownames_to_column(var = "confounder")

    if (conf == "cohort") {
      tidy_fit <- tidy_fit[-1, ]

    } else {

      tidy_fit <- tidy_fit[2, ]
    }

    tidy_fit <- tidy_fit %>%
    dplyr::mutate(CI = stringr::str_c(round(`2.5 %`, 2), round(`97.5 %`, 2), sep = "; "),
                  estCI = stringr::str_c(round(estimate, 2), " (", CI, ")", sep = ""),
                  pvalue = round(p.value, 3),
                  coadj = conf) %>%
      dplyr::mutate(confounder = gsub("get[(]conf[)]", paste0(conf, "_"), confounder)) %>%
      dplyr::select(confounder, coadj, estCI, pvalue)

    tidy_result[[length(tidy_result) + 1]] <- tidy_fit
  }

  #Combine reusults into one dataset
  res_all <- as.data.frame(do.call(rbind, tidy_result))

  # Save result to a file
  write.csv(res_all, file_name)

  return(res_all)
}
groovearmada/exwas documentation built on May 29, 2019, 12:02 a.m.