#' universal theme for intactr plots
theme_intactr <- function() {
theme_minimal() +
theme(text = element_text(size = 8),
strip.text = element_text(size = 8),
aspect.ratio = 1)
}
#' plot box plots with all constructs for a given combination
#'
#' @param geneA first gene
#' @param geneB second gene
#' @param lfcs log-fold change matrix
#' @param center_lfcs center log-fold changes using controls
#' @param scale_lfcs divide log-fold changes by controls
#' @import ggplot2
#' @export
plot_combo <- function(geneA, geneB, lfcs, n_guides = F, center_lfcs = F,
scale_lfcs = F) {
control_string <- 'ctl'
filtered_lfcs <- lfcs %>%
filter((gene1 %in% c(geneA, geneB) ) | (gene2 %in% c(geneA, geneB)) |
(control1 & control2))
tidy_lfcs <- preprocess_lfcs(filtered_lfcs, n_guides)
control_controls <- tidy_lfcs %>%
filter(control1,
control2) %>%
mutate(type = paste0(control_string, ':', control_string))
if (center_lfcs) {
ctl_mean <- control_controls %>%
group_by(context) %>%
summarise(mean_lfc = mean(avg_lfc))
}
if (scale_lfcs) {
ctl_sd <- control_controls %>%
group_by(context) %>%
summarise(sd_lfc = sd(avg_lfc))
}
geneA_controls <- tidy_lfcs %>%
filter((control1 & gene2 == geneA) |
(control2 & gene1 == geneA)) %>%
mutate(type = paste0(geneA, ':', control_string))
geneB_controls <- tidy_lfcs %>%
filter((control1 & gene2 == geneB) |
(control2 & gene1 == geneB)) %>%
mutate(type = paste0(geneB, ':', control_string))
geneA_geneB <- tidy_lfcs %>%
filter((gene1 == geneA & gene2 == geneB) |
(gene2 == geneA & gene1 == geneB)) %>%
mutate(type = paste0(geneA, ':', geneB))
bound_lfcs <- bind_rows(control_controls,
geneA_controls,
geneB_controls,
geneA_geneB) %>%
distinct() %>%
mutate(type = factor(type, levels = unique(c(paste0(control_string, ':', control_string),
paste0(geneA, ':', control_string),
paste0(geneB, ':', control_string),
paste0(geneA, ':', geneB)))))
subtitle <- 'Avg. LFC'
if (center_lfcs) {
bound_lfcs <- bound_lfcs %>%
inner_join(ctl_mean) %>%
mutate(avg_lfc = avg_lfc - mean_lfc)
subtitle <- paste0(subtitle, ', mean centered')
}
if (scale_lfcs) {
bound_lfcs <- bound_lfcs %>%
inner_join(ctl_sd) %>%
mutate(avg_lfc = avg_lfc/sd_lfc)
subtitle <- paste0(subtitle, ', scaled')
}
palette <- c('black','#b88637','#b84637', '#377eb8')
p <- ggplot(bound_lfcs) +
aes(x = type, y = avg_lfc) +
geom_boxplot(outlier.size = 0.2, aes(color = type)) +
facet_wrap('context', scales = 'free') +
scale_color_manual(values = palette) +
theme_intactr() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = 'top') +
xlab('') +
ylab('') +
labs(title = paste(geneA, geneB, sep = ':'),
subtitle = subtitle)
return(list(p = p, data = bound_lfcs))
}
#' return order tibble by hierarchical clustering
#'
#' @param scores_df tibble with scores and objects to be clustered
#' in tidy format.
#' @param score score to cluster on
cluster_order <- function(scores_df, score) {
rev_scores_df <- scores_df %>%
mutate(temp_geneA = geneB, temp_geneB = geneA) %>%
select(-geneA, -geneB) %>%
rename(geneA = temp_geneA, geneB = temp_geneB) %>%
filter(geneA != geneB)
bound_scores <- bind_rows(scores_df, rev_scores_df)
spread_scores <- bound_scores %>%
select(-genes) %>%
tidyr::pivot_wider(names_from = 'geneB', values_from = score) %>%
tibble::column_to_rownames('geneA')
gene_clust <- hclust(dist(spread_scores))
gene_order_tibble <- tibble(gene = gene_clust$labels[gene_clust$order]) %>%
mutate(order = row_number())
return(gene_order_tibble)
}
#' plot heatmap of combo scores
#'
#' @param combo_scores scores to plot
#' @param score name of the score column
#' @export
plot_score_heatmap <- function(combo_scores, score) {
minimal_scores <- combo_scores %>%
select(context, genes, geneA, geneB, score)
gene_order <- minimal_scores %>%
group_by(context) %>%
tidyr::nest() %>%
mutate(order = purrr::map(data, cluster_order, score)) %>%
tidyr::unnest(order) %>%
select(-data)
reversed_scores <- minimal_scores %>%
mutate(temp_geneA = geneB, temp_geneB = geneA) %>%
select(-geneA, -geneB) %>%
rename(geneA = temp_geneA, geneB = temp_geneB)
ordered_combos <- minimal_scores %>%
bind_rows(reversed_scores) %>%
inner_join(gene_order, by = c('context', 'geneA' = 'gene')) %>%
inner_join(gene_order, by = c('context', 'geneB' = 'gene'),
suffix = c('A', 'B')) %>%
filter(orderA <= orderB) %>%
mutate(geneA = tidytext::reorder_within(geneA, orderA, context),
geneB = tidytext::reorder_within(geneB, orderB, context))
p <- ggplot(ordered_combos) +
aes(x = geneA, y = geneB, fill = !!as.name(score)) +
geom_tile() +
theme_intactr() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
scale_fill_gradient2() +
guides(fill = guide_colourbar(barwidth = 0.5)) +
tidytext::scale_x_reordered() +
tidytext::scale_y_reordered() +
facet_wrap('context', scales = 'free')
return(p)
}
#' create a network of top hits with genes as nodes and scores as edges
#'
#' @param combo_scores scores to plot
#' @param score name of the score column
#' @param cutoff absolute score cutoff at which to draw edges
#' @export
plot_hit_network <- function(combo_scores, score, cutoff) {
signif_combos <- combo_scores %>%
filter(abs(!!as.name(score)) > cutoff,
geneA != geneB)
combo_graph <- signif_combos %>%
select(geneA, geneB, combo_z_score, context) %>%
tidygraph::as_tbl_graph()
p <- ggraph::ggraph(combo_graph, layout = 'stress', bbox = 3) +
ggraph::geom_edge_link(aes(color = !!as.name(score),
width = abs(!!as.name(score)))) +
ggraph::scale_edge_width(range = c(1, 4)) +
ggraph::geom_node_label(aes(label = name), size = 2.8,
repel = T, point.padding = 0,
box.padding = 0.01, min.segment.length = 0.1) +
theme_void() +
theme(aspect.ratio = 1) +
ggraph::scale_edge_color_gradient2() +
guides(edge_width = FALSE,
edge_colour = ggraph::guide_edge_colourbar(title = score, barwidth = 0.5)) +
ggraph::facet_edges('context')
return(p)
}
#' Visualize the residual plot for all of the guides for a pair of genes
#'
#' @param geneA first gene to plot
#' @param geneB second gene to plot
#' @param lfcs log-fold changes
#' @export
plot_combo_residuals <- function(geneA, geneB, lfcs, n_guides = F,
fit_controls = F) {
tidy_df <- preprocess_lfcs(lfcs, n_guides)
base_lfcs <- calculate_base_lfcs(tidy_df)
joined_base_lfcs <- join_base_lfcs(tidy_df, base_lfcs)
reversed_base_lfcs <- reverse_guides(joined_base_lfcs)
guide_residuals <- get_guide_residuals(reversed_base_lfcs,
fit_controls = F)
residuals_of_interest <- guide_residuals %>%
filter(gene1 %in% c(geneA, geneB)) %>%
mutate(target = if_else(control2, 'control',
if_else(gene2 == geneA, geneA,
if_else(gene2 == geneB, geneB,
'other'))),
target = factor(target, levels = c(geneA,geneB,'control', 'other'))) %>%
arrange(desc(target))
guiderank <- residuals_of_interest %>%
select(gene1, guide1, context) %>%
distinct() %>%
group_by(gene1, context) %>%
mutate(guide = rank(guide1))
residuals_guiderank <- inner_join(residuals_of_interest, guiderank)
p <- ggplot(residuals_guiderank) +
aes(x = base_lfc2, y = avg_lfc, color = target) +
geom_point(pch = 16) +
scale_color_manual(values = c('#e41a1c', '#377eb8', 'grey', 'black')) +
geom_smooth(method = 'lm', color = 'white', size = 0.5) +
theme_intactr() +
theme(aspect.ratio = 1, strip.text.x = element_blank(),
strip.text.y = element_text(size = 5)) +
facet_grid(rows = vars(gene1, context), cols = vars(guide),
scales = 'free') +
xlab('base_lfc')
return(list(plot = p, plot_data = residuals_guiderank))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.