designit: a flexible engine to generate experiment layouts

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(designit)
library(tidyverse)

Introduction

Examples in this vignette are used were used in our presentation.

It uses a subset of the longitudinal_subject_samples dataset.

data("longitudinal_subject_samples")

dat <- longitudinal_subject_samples |> 
  filter(Group %in% 1:5, Week %in% c(1, 4)) |>
  select(SampleID, SubjectID, Group, Sex, Week)

# for simplicity: remove two subjects that don't have both visits
dat <- dat |>
  filter(SubjectID %in%
    (dat |> count(SubjectID) |> filter(n == 2) |> pull(SubjectID)))


subject_data <- dat |>
  select(SubjectID, Group, Sex) |>
  unique()

Batch effects matter

Here's an example of plate effect. Here both top and bottom rows of the plate are used as controls.

This is the experiment design:

data("plate_effect_example")
plate_effect_example |>
  ggplot() +
  aes(x = column, y = row, fill = treatment, alpha = log_conc) +
  geom_tile() +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  ) +
  scale_y_discrete(limits = rev) +
  scale_fill_brewer(palette = "Set1") +
  # make transparency more visible
  scale_alpha_continuous(range = c(0.2, 1)) +
  ggtitle("Design")

These are the readouts:

p1 <- plate_effect_example |>
  ggplot() +
  aes(x = column, y = row, fill = readout) +
  geom_tile() +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  ) +
  scale_y_discrete(limits = rev) +
  scale_fill_viridis_c() +
  ggtitle("Readout")

p2 <- plate_effect_example |>
  filter(treatment == "control") |>
  mutate(column = as.numeric(column)) |>
  ggplot() +
  aes(x = column, y = readout, color = row) +
  geom_point() +
  geom_line() +
  scale_color_brewer(palette = "Set1") +
  ggtitle("Control")

cowplot::plot_grid(p1, p2, nrow = 2)

Due to the plate effect, the control rows are affected differently. It is virtually impossible to normalize readouts in a meaningful way.

Go fully random?

set.seed(17) # gives `bad` random assignment

bc <- BatchContainer$new(
  dimensions = list("batch" = 3, "location" = 11)
) |>
  assign_random(subject_data)

Gone wrong: Random distribution of 31 grouped subjects into 3 batches turns out unbalanced:

bc$get_samples() |>
  ggplot(aes(x = batch, fill = Group)) +
  geom_bar() +
  labs(y = "subject count")

Block what you can and randomize what you cannot.” (G. Box, 1978)

designit

::: {#hello .greeting .message style="color: darkgreen;"} To avoid batch or gradient effects in complex experiments, designit is an R package that offers flexible ways to allocate a given set of samples to experiment layouts. It's strength is that it implements a very general framework that can easily be customized and extended to fit specific constrained layouts. :::

Sample Batching

Setup

set.seed(17) # gives `bad` random assignment
bc <- BatchContainer$new(
  dimensions = list("batch" = 3, "location" = 11)
) |>
  assign_random(subject_data)

Batch composition before optimization

cowplot::plot_grid(
  plotlist = list(
    bc$get_samples() |>
      ggplot(aes(x = batch, fill = Group)) +
      geom_bar() +
      labs(y = "subject count"),
    bc$get_samples() |>
      ggplot(aes(x = batch, fill = Sex)) +
      geom_bar() +
      labs(y = "subject count")
  ),
  nrow = 1
)
bc$get_samples()
bind_rows(
  head(bc$get_samples(), 3) |>
    mutate(across(everything(), as.character)),
  tibble(
    batch = "...",
    location = " ...",
    SubjectID = "...",
    Group = "...", Sex = "..."
  ),
  tail(bc$get_samples(), 3) |>
    mutate(across(everything(), as.character))
) |>
  gt::gt() |>
  gt::tab_options(
    table.font.size = 11,
    data_row.padding = 0.1
  )

Optimization

bc <- optimize_design(
  bc,
  scoring = list(
    group = osat_score_generator(
      batch_vars = "batch",
      feature_vars = "Group"
    ),
    sex = osat_score_generator(
      batch_vars = "batch",
      feature_vars = "Sex"
    )
  ),
  n_shuffle = 1,
  acceptance_func =
    ~ accept_leftmost_improvement(..., tolerance = 0.01),
  max_iter = 150,
  quiet = TRUE
)

Batch composition after optimization

cowplot::plot_grid(
  plotlist = list(
    bc$get_samples() |>
      ggplot(aes(x = batch, fill = Group)) +
      geom_bar() +
      labs(y = "subject count"),
    bc$get_samples() |>
      ggplot(aes(x = batch, fill = Sex)) +
      geom_bar() +
      labs(y = "subject count"),
    bc$plot_trace(include_aggregated = TRUE)
  ),
  ncol = 3
)
bind_rows(
  head(bc$get_samples(), 3) |>
    mutate(across(everything(), as.character)),
  tibble(
    batch = "...",
    location = " ...",
    SubjectID = "...",
    Group = "...", Sex = "..."
  ),
  tail(bc$get_samples(), 3) |>
    mutate(across(everything(), as.character))
) |>
  gt::gt() |>
  gt::tab_options(
    table.font.size = 11,
    data_row.padding = 0.1
  )

Plate layouts

Continuous confounding

Assays are often performed in well plates (24, 96, 384)

Observed effects

Since plate effects often cannot be avoided, we aim to distribute sample groups of interest evenly across the plate and adjust for the effect computationally.

Setup

set.seed(4)

bc <- BatchContainer$new(
  dimensions = list("plate" = 3, "row" = 4, "col" = 6)
) |>
  assign_in_order(dat)
plot_plate(bc,
  plate = plate, row = row, column = col,
  .color = Group, title = "Initial layout by Group"
)
plot_plate(bc,
  plate = plate, row = row, column = col,
  .color = Sex, title = "Initial layout by Sex"
)
cowplot::plot_grid(
  plotlist = list(
    plot_plate(bc,
      plate = plate, row = row, column = col,
      .color = Group, title = "Initial layout by Group"
    ),
    plot_plate(bc,
      plate = plate, row = row, column = col,
      .color = Sex, title = "Initial layout by Sex"
    )
  ),
  nrow = 2
)

2-step optimization

Across plate optimization using osat score as before

bc1 <- optimize_design(
  bc,
  scoring = list(
    group = osat_score_generator(
      batch_vars = "plate",
      feature_vars = "Group"
    ),
    sex = osat_score_generator(
      batch_vars = "plate",
      feature_vars = "Sex"
    )
  ),
  n_shuffle = 1,
  acceptance_func =
    ~ accept_leftmost_improvement(..., tolerance = 0.01),
  max_iter = 150,
  quiet = TRUE
)
cowplot::plot_grid(
  plotlist = list(
    plot_plate(bc1,
      plate = plate, row = row, column = col,
      .color = Group, title = "Layout after the first step, Group"
    ),
    plot_plate(bc1,
      plate = plate, row = row, column = col,
      .color = Sex, title = "Layout after the first step, Sex"
    )
  ),
  nrow = 2
)

Within plate optimization using distance based sample scoring function

bc2 <- optimize_design(
  bc1,
  scoring = mk_plate_scoring_functions(
    bc1,
    plate = "plate", row = "row", column = "col",
    group = "Group"
  ),
  shuffle_proposal_func = shuffle_with_constraints(dst = plate == .src$plate),
  max_iter = 150,
  quiet = TRUE
)
cowplot::plot_grid(
  plotlist = list(
    plot_plate(bc2,
      plate = plate, row = row, column = col,
      .color = Group, title = "Layout after the second step, Group"
    ),
    plot_plate(bc2,
      plate = plate, row = row, column = col,
      .color = Sex, title = "Layout after the second step, Sex"
    )
  ),
  nrow = 2
)

2-step optimization multi_plate_layout()

We are performing the same optimization as before, but using the multi_plate_layout() function to combine the two steps.

bc <- optimize_multi_plate_design(
  bc,
  across_plates_variables = c("Group", "Sex"),
  within_plate_variables = c("Group"),
  plate = "plate", row = "row", column = "col",
  n_shuffle = 2,
  max_iter = 500 # 2000
)
cowplot::plot_grid(
  plotlist = list(
    plot_plate(bc,
      plate = plate, row = row, column = col,
      .color = Group, title = "After optimization, Group"
    ),
    plot_plate(bc,
      plate = plate, row = row, column = col,
      .color = Sex, title = "After optimization, Sex"
    )
  ),
  nrow = 2
)
bc$plot_trace()

Glimpse on more complex application

Goal:

Constraints:

see vignette invivo_study_design for the full story.

Conclusion

Acknowledgements

layout <- crossing(row = 1:9, column = 1:12) |>
  mutate(Questions = "no")
layout$Questions[c(
  16, 17, 18, 19, 20, 21,
  27, 28, 33, 34,
  45, 46,
  55, 56, 66, 67, 90, 91
)] <- "yes"

plot_plate(layout, .color = Questions, title = "Thank you")


Try the designit package in your browser

Any scripts or data that you put into this service are public.

designit documentation built on May 29, 2024, 12:04 p.m.