library(learnr)
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)

Presenting cohort findings is tricky, be careful


Adding more details to each model item in a list {data-progressive=TRUE}

Imagine you've ran several models. You:

You have 8 models in total, stored as a list. We should add more details to each model to differentiate them from each other. Use map() from purrr to wrangle each model simultaneously. The new code here, term[2], selects the main predictor, which is the second element in the term column.

capture.output({
data("unadjusted_models_list", package = "acdcourse")
data("adjusted_models_list", package = "acdcourse")
library(dplyr)
library(purrr)
}, file = tempfile())

Exercise step

Instructions:

# Add predictor and model type to each list item
unadjusted_models_list <- ___(
    unadjusted_models_list,
    # .x is purrr for "model goes here"
    ~mutate(.x,
            # This selects predictor, not confounder
            predictor = term[2],
            # Indicate model "adjustment"
            model = ___
           ))
'Add the adjustment column using `model = "Unadjusted"`.'
# Add predictor and model type to each list item
unadjusted_models_list <- map(
    unadjusted_models_list,
    # .x is purrr for "model goes here"
    ~mutate(.x,
            # This selects predictor, not confounder
            predictor = term[2],
            # Indicate model "adjustment"
            model = "Unadjusted"
           ))
"Nice!"

Exercise step

Instructions:

# Add predictor and model type to each list item
adjusted_models_list <- map(
    adjusted_models_list,
    # .x is purrr for "model goes here"
    ~mutate(.x,
            # This selects predictor, not confounder
            predictor = term[2],
            # Indicate model "adjustment"
            model = ___
           ))
"Refer to each list object in `map` with `.x`."
"Include the `~` before `mutate`."
# Add predictor and model type to each list item
adjusted_models_list <- map(
    adjusted_models_list,
    # .x is purrr for "model goes here"
    ~mutate(.x,
            # This selects predictor, not confounder
            predictor = term[2],
            # Indicate model "adjustment"
            model = "Adjusted"
           ))
"Excellent! You made use of map to add more details to each model object."

Combining the list of models into one data frame

The most efficient approach to later plotting and creating tables is to have all models in a single data frame. You've already prepared them a bit, now it's time to combine them together so you can continue working with them.

Instructions:

capture.output({
data("unadjusted_models_list", package = "acdcourse")
data("adjusted_models_list", package = "acdcourse")
library(dplyr)
library(purrr)
unadjusted_models_list <- map(
    unadjusted_models_list,
    ~mutate(.x, predictor = term[2], model = "Unadjusted")
)
adjusted_models_list <- map(
    adjusted_models_list,
    ~mutate(.x, predictor = term[2], model = "Adjusted")
)
}, file = tempfile())
all_models <- bind_rows(
        # Combine the two lists of models 
        ___, 
        ___
    ) %>% 
    mutate(outcome = ___) %>% 
    # Keep only predictor rows and fixed effects
    filter(___ == , ___ == ___)
"Filter when predictor and term are the same (`predictor == term`)."
all_models <- bind_rows(
        # Combine the two lists of models 
        unadjusted_models_list, 
        adjusted_models_list
    ) %>% 
    mutate(outcome = "got_cvd") %>% 
      # Keep only predictor rows and fixed effects
    filter(predictor == term, effect == "fixed")
"Well done! You've now bound all models together and continued wrangling them."

Communicating cohort findings through graphs


Plotting model estimate and uncertainty {data-progressive=TRUE}

Statistical analysis used on cohort data usually output some time of regression estimate along with a measure of uncertainty (e.g. 95% confidence interval). Sometimes it makes sense to present these results in a table, but often the better approach is to create a figure instead. Figures show magnitude, direction, uncertainty, and comparison of results very effectively.

Create a plot of the unadjusted model results that highlights the estimate and uncertainty of the estimate.

capture.output({
data("all_models", package = "acdcourse")
library(dplyr)
library(ggplot2)
}, file = tempfile())

Exercise step

Instructions:

# Keep only unadjusted models
unadjusted_results <- all_models %>% 
    filter(___ == ___)
'Filter should be `model == "Unadjusted"`.'
# Keep only unadjusted models
unadjusted_results <- all_models %>% 
    filter(model == "Unadjusted")
"Great!"

Exercise step

Instructions:

unadjusted_results <- all_models %>% 
    filter(model == "Unadjusted")

# Create a dot and error bar plot
unadjusted_results %>% 
    ggplot(aes(y = ___, x = ___,
              xmin = ___, xmax = ___)) +
    ___() +
    ___()
"The `errorbarh` (horizontal) requires `xmin = conf.low, xmax = conf.high`."
unadjusted_results <- all_models %>% 
    filter(model == "Unadjusted")

# Create a dot and error bar plot
unadjusted_results %>% 
    ggplot(aes(y = predictor, x = estimate, 
               xmin = conf.low, xmax = conf.high)) +
    geom_point() +
    geom_errorbarh()
"Great!"

Exercise step

Instructions:

unadjusted_results <- all_models %>% 
    filter(model == "Unadjusted")

# Create a dot and error bar plot
unadjusted_results %>% 
    ggplot(aes(y = predictor, x = estimate, 
               xmin = conf.low, xmax = conf.high)) +
    geom_point() +
    geom_errorbarh() +
    # Add vertical line
    ___(xintercept = ___)
"Set `xintercept = 1` for the center line."
unadjusted_results <- all_models %>% 
    filter(model == "Unadjusted")

# Create a dot and error bar plot
unadjusted_results %>% 
    ggplot(aes(y = predictor, x = estimate, 
               xmin = conf.low, xmax = conf.high)) +
    geom_point() +
    geom_errorbarh() +
    # Add vertical line
    geom_vline(xintercept = 1)
"Excellent! See how this graph shows the uncertainty around individual model estimates? This is an effective way of presenting results from models."

Create a more polished plot

Now that we've created this plot, let's polish it up. We want it to be "publication quality", since we'll eventually present this figure to others.

As with the previous exercise, use the unadjusted_results data frame you created to plot the findings. This time, make the plot more polished and presentable.

Instructions:

capture.output({
data("all_models", package = "acdcourse")
library(dplyr)
library(ggplot2)
unadjusted_results <- all_models %>% 
    filter(model == "Unadjusted")
}, file = tempfile())
# Make the plot more polished
unadjusted_results %>% 
    ggplot(aes(y = predictor, x = estimate, xmin = conf.low, xmax = conf.high)) +
    geom_point(size = ___) +
    geom_errorbarh(height = ___) +
    geom_vline(xintercept = 1, linetype = ___) +
    labs(y = ___, x = ___) +
      # Set the theme
    ___()
'The labels should be of the form `x = "Axis Label"` (e.g. for the x-axis).'
# Make the plot more polished
unadjusted_results %>% 
    ggplot(aes(y = predictor, x = estimate, xmin = conf.low, xmax = conf.high)) +
    geom_point(size = 3) +
    geom_errorbarh(height = 0.1) +
    geom_vline(xintercept = 1, linetype = "dotted") +
    labs(y = "Predictors", x = "Odds ratio (95% CI)") +
    # Set the theme
    theme_classic()
"Amazing! You have a very nice figure now that is ready to be presented to others! There are other themes to use if you don't like this one. Check out the package ggthemes for more."

Visualize unadjusted and adjusted model results

The STROBE best practices indicate to show both "crude" (unadjusted) and adjusted model results. Showing both can be informative and insightful into the research question. Create a plot of your results showing both unadjusted and adjusted models. Do the same steps as in the previous exercise for creating the plot.

Instructions:

capture.output({
data("all_models", package = "acdcourse")
library(ggplot2)
library(magrittr)
}, file = tempfile())
# Show results of both adjusted and unadjusted
___ %>% 
    ggplot(aes(y = predictor, x = estimate, xmin = conf.low, xmax = conf.high)) +
    geom_point(size = 3) +
    geom_errorbarh(height = 0.1) +
    geom_vline(xintercept = 1, linetype = "dotted") +
    # Facet plot by model
    ___(rows = vars(___)) +
    labs(y = "Predictors", x = "Odds ratio (95% CI)") +
    theme_classic()
"The faceting variable should be called as `vars(model)`."
# Show results of both adjusted and unadjusted
all_models %>% 
    ggplot(aes(y = predictor, x = estimate, xmin = conf.low, xmax = conf.high)) +
    geom_point(size = 3) +
    geom_errorbarh(height = 0.1) +
    geom_vline(xintercept = 1, linetype = "dotted") +
    # Split plot by model
    facet_grid(rows = vars(model)) +
    labs(y = "Predictors", x = "Odds ratio (95% CI)") +
    theme_classic()
"Great job! You can see that there are large differences in some of the results between unadjusted and adjusted models."

Use tables effectively to show your results


Present the basic characteristics of the cohort {data-progressive=TRUE}

A classic use for tables is showing the basic characteristics of a cohort dataset, as there are diverse data types and summary statistics that need to be shown. Including a basic participant characteristics table is part of the STROBE best practices. This table can be quite informative for others when they interpret your analysis results.

Using the carpenter package, create a table showing summary statistics for each data collection visit.

capture.output({
data("tidied_framingham", package = "acdcourse")
library(carpenter)
library(dplyr)
}, file = tempfile())

Exercise step

Instructions:

# Create a table of summary statistics
tidied_framingham %>% 
    # These discrete variables are numeric, but must be factors
    mutate_at(vars(followup_visit_number, got_cvd), as.factor) %>% 
    # Set followup visit number as table column
    outline_table(header = ___) 
"Use quotes around `followup_visit_number`."
# Create a table of summary statistics
tidied_framingham %>% 
    # These discrete variables are numeric, but must be factors
    mutate_at(vars(followup_visit_number, got_cvd), as.factor) %>% 
      # Set followup visit number as table column
    outline_table(header = "followup_visit_number") 
"Nice!"

Exercise step

Instructions:

# Create a table of summary statistics
tidied_framingham %>% 
    mutate_at(vars(followup_visit_number, got_cvd), as.factor) %>% 
    outline_table(header = "followup_visit_number") %>% 
    # Show n (%) for discrete variables as rows
    add_rows(c(___, ___, ___), stat = ___)
"The variables should be quoted, e.g. `"sex"`."
# Create a table of summary statistics
tidied_framingham %>% 
    mutate_at(vars(followup_visit_number, got_cvd), as.factor) %>% 
    outline_table(header = "followup_visit_number") %>% 
    # Show n (%) for discrete variables as rows
    add_rows(c("got_cvd", "sex", "education_combined"), stat = stat_nPct)
"Great!"

Exercise step

Instructions:

# Create a table of summary statistics
tidied_framingham %>% 
    mutate_at(vars(followup_visit_number, got_cvd), as.factor) %>% 
    outline_table(header = "followup_visit_number") %>% 
    add_rows(c("got_cvd", "sex", "education_combined"), stat = stat_nPct) %>% 
    # Show median (range) for continuous variables
    add_rows(c(___, ___, ___), stat = ___)
"The variables need to be surrounded by quotes, just like the function above."
# Create a table of summary statistics
tidied_framingham %>% 
    mutate_at(vars(followup_visit_number, got_cvd), as.factor) %>% 
    outline_table(header = "followup_visit_number") %>% 
    add_rows(c("got_cvd", "sex", "education_combined"), stat = stat_nPct) %>% 
    # Show median (range) for continuous variables
    add_rows(c("total_cholesterol", "body_mass_index", "participant_age"), stat = stat_medianIQR)
"Great!"

Exercise step

Instructions:

tidied_framingham %>% 
    mutate_at(vars(followup_visit_number, got_cvd), as.factor) %>% 
    outline_table(header = "followup_visit_number") %>% 
    add_rows(c("got_cvd", "sex", "education_combined"), stat = stat_nPct) %>% 
    add_rows(c("participant_age", "body_mass_index", "total_cholesterol"), stat = stat_medianIQR) %>% 
    # Rename headers to better titles
    renaming("header", c(___, ___, ___, ___)) %>% 
    # Build the table and convert to markdown form
    ___()
"The new column headers should be given as a character vector."
tidied_framingham %>% 
    mutate_at(vars(followup_visit_number, got_cvd), as.factor) %>% 
    outline_table(header = "followup_visit_number") %>% 
    add_rows(c("got_cvd", "sex", "education_combined"), stat = stat_nPct) %>% 
    add_rows(c("participant_age", "body_mass_index", "total_cholesterol"), stat = stat_medianIQR) %>% 
    # Rename headers to better titles
    renaming("header", c("Measures", "Baseline", "Second followup", "Third followup")) %>% 
    # Build the table and convert to markdown form
    build_table()
"Nice job! You've gotten the data formatted as a table for easy inclusion in a document or report and have provided basic participant characteristics from each cohort visit."

Supplemental tables of raw numbers for results {data-progressive=TRUE}

While the main messaging and presentation of results should emphasize figures over tables, often it is useful to other researchers (especially those doing meta-analyses or aggregating results) that the raw model results be given as well. Here we can use tables to give this data, as a supplement to the figure.

Provide the estimates and confidence intervals of the unadjusted and adjusted model results in a table format that you could include in a document or report. The packages glue, stringr, and knitr have been loaded.

capture.output({
data("all_models", package = "acdcourse")
library(knitr)
library(glue)
library(dplyr)
library(tidyr)
library(stringr)
}, file = tempfile())

Exercise step

Instructions:

# Prepare the results for the table
table_model_results <- all_models %>% 
    # Round values of variables to 3
    mutate_at(vars(___, ___, ___), ___, digits = ___)
"`mutate_at()` takes variables (as the first argument) inside `vars()` and applies a function like `round` as the second argument."
# Prepare the results for the table
table_model_results <- all_models %>% 
    # Round values of variables to 3
    mutate_at(vars(estimate, conf.low, conf.high), round, digits = 2)
"Great!"

Exercise step

Instructions:

# Prepare the results for the table
table_model_results <- all_models %>% 
    mutate_at(vars(estimate, conf.low, conf.high), round, digits = 2) %>% 
    # Use glue function to combine variables
    mutate(estimate_ci = glue("{___} ({___}, {___})"),
           # Underscores to spaces in predictor
           predictor = str_replace_all(___, "_", " "))
"The estimate and CI variables should be placed inside the `{}` in `glue()`."
# Prepare the results for the table
table_model_results <- all_models %>% 
    mutate_at(vars(estimate, conf.low, conf.high), round, digits = 2) %>% 
    # Use glue function to combine variables
    mutate(estimate_ci = glue("{estimate} ({conf.low}, {conf.high})"),
           # Underscores to spaces in predictor
           predictor = str_replace_all(predictor, "_", " "))
"Amazing!"

Exercise step

Instructions:

# Prepare the results for the table
all_models %>% 
    mutate_at(vars(estimate, conf.low, conf.high), round, digits = 2) %>% 
    mutate(estimate_ci = glue("{estimate} ({conf.low}, {conf.high})"),
           predictor = predictor %>% 
               str_remove("_scaled") %>% 
               str_replace_all("_", " ")) %>%
    # Keep then spread variables for final table
    select(___, ___, ___) %>% 
    spread(___, ___) %>% 
    # Create a Markdown table
    kable(caption = "Estimates and 95% CI from all models.")
"`spread` takes two arguments: 1) the current discrete column (`model`) that will be the new columns names, and 2) the values (`estimate_ci`) that will be in the new columns."
# Prepare the results for the table
all_models %>% 
    mutate_at(vars(estimate, conf.low, conf.high), round, digits = 2) %>% 
    mutate(estimate_ci = glue("{estimate} ({conf.low}, {conf.high})"),
           predictor = predictor %>% 
               str_remove("_scaled") %>% 
               str_replace_all("_", " ")) %>%
    # Keep then spread variables for final table
    select(model, predictor, estimate_ci) %>% 
    spread(model, estimate_ci) %>% 
    # Create a Markdown table
    kable(caption = "Estimates and 95% CI from all models.")
"Amazing! You've wrangled the data and prepared it to be presented as a table! You can now easily add this to your manuscript (especially easy if you use R Markdown)."


lwjohnst86/acdcourse documentation built on June 18, 2019, 8:26 p.m.