#
# sct
#
# sct stands for sample-condition table. Briefly, it collects information from an ExpressionSet
# into a set of samples with specific conditions, together with gene expression. This is really
# only useful for gene subset. Given the gene subset, you can then plot the genes by sample
# and by condition.
#' Generate sct plots by sample as a list.
#'
#' Given a sample-condition table, generate a list of plots by sample. That is,
#' every sample generates a ggplot showing the gene expression of a subset of
#' genes for a specific condition (as highlighted by the sct, sample_field and
#' condition_field).
#'
#' sct stands for sample-condition table. Briefly, it collects information from an ExpressionSet
#' into a set of samples with specific conditions, together with gene expression. This is really
#' only useful for gene subset. Given the gene subset, you can then plot the genes by sample
#' and by condition.
#'
#' @param sct The sample-condition table (tibble/data.frame)
#' @param sample_field The string representing the sample field
#' @param condition_field The string representing the condition field
#' @param main A title to use for plots (one is autogenerated).
#'
#' @return A list of ggplots.
#' @export
#'
#'
plot_sct_by_sample<-function(sct, sample_field=NULL, condition_field=NULL, main=NULL) {
assertthat::assert_that(methods::is(sct, "data.frame"),
assertthat::has_name(sct, sample_field))
# An extra check, if it's really a data.frame then convert it to tibble to keep the
# rownames.
if ( !tibble::is_tibble(sct)) {
sct<-tibble::as_tibble(sct, rownames="Sample")
}
# Generate the plot list by iteratively calling plot_sct on groups of data.
plot_list<-sct %>%
dplyr::group_split(.data[[sample_field]]) %>%
purrr::map( function(tab) {
plot_sct(tab, sample_field, condition_field,
main=sprintf("Gene expression for %s across conditions.",unique(tab$Sample)))
})
plot_list
}
#' Plot a sample-condition table
#'
#' Plot the gene expression data within a sample-condition table, indicating both
#' the samples and conditions in the plot.
#'
#' sct stands for sample-condition table. Briefly, it collects information from an ExpressionSet
#' into a set of samples with specific conditions, together with gene expression. This is really
#' only useful for gene subset. Given the gene subset, you can then plot the genes by sample
#' and by condition.
#'
#' The samples are indicated by color and the conditions are indicates by shape. NOTE:
#' if the condition is not specified, then just the samples are plotted (i.e., no conditions).
#'
#' Also note that the samples and conditions have to be factors. This will be done by default
#' for you. However, if you want a particular ordering of the levels then convert the column
#' to a factor before calling the function.
#'
#' NOTE: If you want a list, use plot_sct_by_sample()
#'
#' @param sct A sample-condition table.
#' @param sample_field The string name of the sample field
#' @param condition_field The string name of the condition field
#' @param main a title for the plot
#'
#' @return A ggplot showing the gene expression for a set of samples and conditions.
#' @export
#'
#' @importFrom rlang := !!
plot_sct<-function(sct, sample_field=NULL, condition_field=NULL, main=NULL) {
assertthat::assert_that(methods::is(sct, "data.frame"),
assertthat::not_empty(sample_field),
assertthat::has_name(sct, "Gene"),
assertthat::has_name(sct, "Expression"),
assertthat::has_name(sct, sample_field))
# This allows a plot without a condition, by replacing condition with sample.
if (is.null(condition_field)) {
condition_field<-sample_field
}
# Convert the two fields to factors. If they already were, nothing happens. This
# uses the default factor ordering.
sct<-sct %>%
dplyr::mutate(!!sample_field := factor(.data[[sample_field]]),
!!condition_field := factor(.data[[condition_field]]))
# ggplot sct.
g<-ggplot2::ggplot(sct, ggplot2::aes(x=.data$Gene,
y=.data$Expression
))
# Special case if too many samples on the plot
if ( length(unique(dplyr::pull(sct,condition_field))) > 6 ) {
# Skip the shapes altogether and stick with color only.
g <- g + ggplot2::geom_jitter(height=0, width=0.2, shape=21,
mapping=ggplot2::aes(fill=.data[[sample_field]]))
} else {
# Map shape to the condition.
g <- g +
ggplot2::geom_jitter(height=0, width=0.2,
mapping=ggplot2::aes(shape=.data[[condition_field]],
color=.data[[sample_field]]))
}
# If we are plotting gene ranks, then make a continuous scale
if (all(sct$Expression %in% 1:10))
g <- g + scale_y_continuous(breaks=1:10)
# Finish the plot
g +
ggplot2::theme_bw() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, size=8, hjust=1)) +
ggplot2::ggtitle(main) +
ggsci::scale_color_npg()
}
#' Build an SCT from an ExpressionSet
#'
#' This creates a sample-condition table from an ExpressionSet with pData.
#'
#' An SCT is a set of gene expressions indicating specific samples and associated with
#' a specific condition. This structure allows plotting of individual gene expression by
#' sample and by condition.
#'
#' sct stands for sample-condition table. Briefly, it collects information from an ExpressionSet
#' into a set of samples with specific conditions, together with gene expression. This is really
#' only useful for gene subset. Given the gene subset, you can then plot the genes by sample
#' and by condition.
#'
#' This function takes an ExpressionSet, extracts two fields from the pData (sample_field
#' and condition_field) and builds the necessary structure for plotting.
#'
#' @param x An ExpressionSet (possibly reduced gene list)
#' @param sample_field A pdata field representing the unique samples (repeated entries).
#' @param condition_field A pdata field representing the condition to plot.
#'
#' @return A sct (sample-condition table).
#' @export
#'
build_sct<-function(x, sample_field=NULL, condition_field=NULL) {
assertthat::assert_that(methods::is(x, "ExpressionSet"),
assertthat::not_empty(sample_field),
assertthat::has_name(Biobase::pData(x), sample_field))
# If condition field is empty, just reuse samlpe field
if ( is.null(condition_field))
condition_field<-sample_field
# Combine gene expression and pData fields into single entity.
sct<-cbind(
t(Biobase::exprs(x)),
Biobase::pData(x)[,unique(c(sample_field, condition_field)),drop=F]
)
# Then pivot the data to usable form.
pivot_sct(sct, sample_field, condition_field)
}
#' Internal function to pivot a SCT
#'
#' @param sct The un-pivoted sample-condition table.
#' @param sample_field Name of the sample field
#' @param condition_field Name of the condition field
#'
#' @return A pivoted sct suitable for graphing.
#' @export
#'
#' @importFrom magrittr %>%
pivot_sct<-function(sct, sample_field, condition_field) {
assertthat::assert_that(methods::is(sct, "data.frame"),
assertthat::not_empty(sample_field)
)
# Create gene table first.
# Exclude metadata columns,
gene_table<-sct[,-match(c(sample_field,condition_field), colnames(sct))]
# transpose and convert to tibble
gene_table<-tibble::as_tibble(t(gene_table), rownames="Gene")
# Pivot the expression data to longer format.
psct<-tidyr::pivot_longer(gene_table,
cols=c(tidyselect::everything(), -tidyselect::any_of(c("Gene"))),
names_to="Sample", values_to="Expression")
# Add in the sample/condition information by joining on original data and selecting only
# the prespecified columns.
dplyr::left_join(psct, tibble::as_tibble(sct, rownames="Sample"),
by=c("Sample" = "Sample" )) %>%
dplyr::select(tidyselect::all_of(c("Gene","Sample","Expression")),
!!!sample_field, !!!condition_field)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.