knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(designit) library(tidyverse)
Examples in this vignette are used were used in our presentation.
It uses a subset of the longitudinal_subject_samples
dataset.
data("longitudinal_subject_samples") dat <- longitudinal_subject_samples |> filter(Group %in% 1:5, Week %in% c(1, 4)) |> select(SampleID, SubjectID, Group, Sex, Week) # for simplicity: remove two subjects that don't have both visits dat <- dat |> filter(SubjectID %in% (dat |> count(SubjectID) |> filter(n == 2) |> pull(SubjectID))) subject_data <- dat |> select(SubjectID, Group, Sex) |> unique()
Here's an example of plate effect. Here both top and bottom rows of the plate are used as controls.
This is the experiment design:
data("plate_effect_example") plate_effect_example |> ggplot() + aes(x = column, y = row, fill = treatment, alpha = log_conc) + geom_tile() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) + scale_y_discrete(limits = rev) + scale_fill_brewer(palette = "Set1") + # make transparency more visible scale_alpha_continuous(range = c(0.2, 1)) + ggtitle("Design")
These are the readouts:
p1 <- plate_effect_example |> ggplot() + aes(x = column, y = row, fill = readout) + geom_tile() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) + scale_y_discrete(limits = rev) + scale_fill_viridis_c() + ggtitle("Readout") p2 <- plate_effect_example |> filter(treatment == "control") |> mutate(column = as.numeric(column)) |> ggplot() + aes(x = column, y = readout, color = row) + geom_point() + geom_line() + scale_color_brewer(palette = "Set1") + ggtitle("Control") cowplot::plot_grid(p1, p2, nrow = 2)
Due to the plate effect, the control rows are affected differently. It is virtually impossible to normalize readouts in a meaningful way.
set.seed(17) # gives `bad` random assignment bc <- BatchContainer$new( dimensions = list("batch" = 3, "location" = 11) ) |> assign_random(subject_data)
Gone wrong: Random distribution of 31 grouped subjects into 3 batches turns out unbalanced:
bc$get_samples() |> ggplot(aes(x = batch, fill = Group)) + geom_bar() + labs(y = "subject count")
“Block what you can and randomize what you cannot.” (G. Box, 1978)
::: {#hello .greeting .message style="color: darkgreen;"}
To avoid batch or gradient effects in complex experiments,
designit
is an R package that offers flexible ways to allocate a given set of
samples to experiment layouts.
It's strength is that it implements a very general framework that can
easily be customized and extended to fit specific constrained layouts.
:::
BatchContainer
classoptimize_design()
set.seed(17) # gives `bad` random assignment
bc <- BatchContainer$new( dimensions = list("batch" = 3, "location" = 11) ) |> assign_random(subject_data)
Batch composition before optimization
cowplot::plot_grid( plotlist = list( bc$get_samples() |> ggplot(aes(x = batch, fill = Group)) + geom_bar() + labs(y = "subject count"), bc$get_samples() |> ggplot(aes(x = batch, fill = Sex)) + geom_bar() + labs(y = "subject count") ), nrow = 1 )
bc$get_samples()
bind_rows( head(bc$get_samples(), 3) |> mutate(across(everything(), as.character)), tibble( batch = "...", location = " ...", SubjectID = "...", Group = "...", Sex = "..." ), tail(bc$get_samples(), 3) |> mutate(across(everything(), as.character)) ) |> gt::gt() |> gt::tab_options( table.font.size = 11, data_row.padding = 0.1 )
bc <- optimize_design( bc, scoring = list( group = osat_score_generator( batch_vars = "batch", feature_vars = "Group" ), sex = osat_score_generator( batch_vars = "batch", feature_vars = "Sex" ) ), n_shuffle = 1, acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.01), max_iter = 150, quiet = TRUE )
Batch composition after optimization
cowplot::plot_grid( plotlist = list( bc$get_samples() |> ggplot(aes(x = batch, fill = Group)) + geom_bar() + labs(y = "subject count"), bc$get_samples() |> ggplot(aes(x = batch, fill = Sex)) + geom_bar() + labs(y = "subject count"), bc$plot_trace(include_aggregated = TRUE) ), ncol = 3 )
bind_rows( head(bc$get_samples(), 3) |> mutate(across(everything(), as.character)), tibble( batch = "...", location = " ...", SubjectID = "...", Group = "...", Sex = "..." ), tail(bc$get_samples(), 3) |> mutate(across(everything(), as.character)) ) |> gt::gt() |> gt::tab_options( table.font.size = 11, data_row.padding = 0.1 )
Assays are often performed in well plates (24, 96, 384)
Observed effects
Since plate effects often cannot be avoided, we aim to distribute sample groups of interest evenly across the plate and adjust for the effect computationally.
set.seed(4) bc <- BatchContainer$new( dimensions = list("plate" = 3, "row" = 4, "col" = 6) ) |> assign_in_order(dat)
plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Initial layout by Group" ) plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Initial layout by Sex" )
cowplot::plot_grid( plotlist = list( plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Initial layout by Group" ), plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Initial layout by Sex" ) ), nrow = 2 )
bc1 <- optimize_design( bc, scoring = list( group = osat_score_generator( batch_vars = "plate", feature_vars = "Group" ), sex = osat_score_generator( batch_vars = "plate", feature_vars = "Sex" ) ), n_shuffle = 1, acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.01), max_iter = 150, quiet = TRUE )
cowplot::plot_grid( plotlist = list( plot_plate(bc1, plate = plate, row = row, column = col, .color = Group, title = "Layout after the first step, Group" ), plot_plate(bc1, plate = plate, row = row, column = col, .color = Sex, title = "Layout after the first step, Sex" ) ), nrow = 2 )
bc2 <- optimize_design( bc1, scoring = mk_plate_scoring_functions( bc1, plate = "plate", row = "row", column = "col", group = "Group" ), shuffle_proposal_func = shuffle_with_constraints(dst = plate == .src$plate), max_iter = 150, quiet = TRUE )
cowplot::plot_grid( plotlist = list( plot_plate(bc2, plate = plate, row = row, column = col, .color = Group, title = "Layout after the second step, Group" ), plot_plate(bc2, plate = plate, row = row, column = col, .color = Sex, title = "Layout after the second step, Sex" ) ), nrow = 2 )
multi_plate_layout()
We are performing the same optimization as before, but using the
multi_plate_layout()
function to combine the two steps.
bc <- optimize_multi_plate_design( bc, across_plates_variables = c("Group", "Sex"), within_plate_variables = c("Group"), plate = "plate", row = "row", column = "col", n_shuffle = 2, max_iter = 500 # 2000 )
cowplot::plot_grid( plotlist = list( plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "After optimization, Group" ), plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "After optimization, Sex" ) ), nrow = 2 )
bc$plot_trace()
Goal:
Constraints:
see vignette invivo_study_design
for the full story.
Acknowledgements
layout <- crossing(row = 1:9, column = 1:12) |> mutate(Questions = "no") layout$Questions[c( 16, 17, 18, 19, 20, 21, 27, 28, 33, 34, 45, 46, 55, 56, 66, 67, 90, 91 )] <- "yes" plot_plate(layout, .color = Questions, title = "Thank you")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.