if (getRversion() >= "2.15.1") {
utils::globalVariables(c("value", "variable", "Clonotype", "Count", "Sample", "Clone",
"Value", "Value.mean", "Value.min", "Value.max", "quantile",
"Kmer", "Group", "Size", "Val", "Grouping.var", "Xrep", 'Yrep',
"Clone.num", "Q", "Clonotypes", "Sample_subj", "Seq.count",
"Overlap", "head", "Mean", "MeanVal", "MinVal", "MaxVal",
"Q1", "Q2", "Type", "Length", "Gene", "Freq", "Sequence",
"AA", "Clones", "Source.gr", "Target.gr", "Samples", "Samples.y",
"CDR3.aa", "p.adj", "group1", "group2", "y.coord", "..p.adj..", ".SD",
"name", "label", "."))
}
##### Utility functions #####
.rem_legend <- function (.p) { .p + theme(legend.position="none") }
.rem_xlab <- function (.p) { .p + theme(axis.title.x = element_blank()) }
.rem_ylab <- function (.p) { .p + theme(axis.title.y = element_blank()) }
.colourblind_vector <- function() {
c("#FF4B20", "#FFB433", "#C6FDEC", "#7AC5FF", "#0348A6")
}
.colourblind_discrete <- function (.n, .colour = F) {
cs <- .colourblind_vector()
if (.colour) {
scale_colour_manual(values = colorRampPalette(cs)(.n))
} else {
scale_fill_manual(values = colorRampPalette(cs)(.n))
}
}
.tweak_fill <- function (.n) {
palette_name = ""
if (.n == 1) { palette_name = "Set2" }
else if (.n == 2) { palette_name = "Set1" }
# else if (.n < 4) { palette_name = "YlGnBu" }
# else if (.n < 6) { palette_name = "RdBu" }
else if (.n < 12) { palette_name = "Spectral" }
else { return(scale_fill_hue()) }
scale_fill_brewer(palette = palette_name)
}
.tweak_col <- function (.n) {
palette_name = ""
if (.n == 1) { palette_name = "Set2" }
else if (.n == 2) { palette_name = "Set1" }
# else if (.n < 4) { palette_name = "YlGnBu" }
# else if (.n < 6) { palette_name = "RdBu" }
else if (.n < 12) { palette_name = "Spectral" }
else { return(scale_colour_hue()) }
scale_color_brewer(palette = palette_name)
}
theme_cleveland2 <- function (rotate = TRUE) {
if (rotate) {
theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour = "grey70",
linetype = "dashed"))
}
else {
theme(panel.grid.major.x = element_line(colour = "grey70",
linetype = "dashed"), panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())
}
}
##### The one and only - the ultimate vis() function #####
#' One function to visualise them all
#'
#' @name vis
#'
#' @import ggplot2
#' @importFrom factoextra fviz_cluster fviz_dend fviz_pca_ind
#' @importFrom grDevices colorRampPalette
#'
#' @description Output from every function in immunarch can be visualised with a
#' single function - \code{vis}. The \code{vis} automatically detects
#' the type of the data and draws a proper visualisation. For example, output
#' from the \code{repOverlap} function will be identified as repertoire overlap values
#' and respective visualisation will be chosen without any additional arguments.
#' See "Details" for the list of available visualisations.
#'
#' @param .data Pass the output from any immunarch analysis tool to \code{vis()}.
#' @param ... Any other arguments, see the "Details" section for specific visualisation functions.
#'
#' @details
#' List of available visualisations for different kinds of data.
#'
#' Basic analysis:
#'
#' - Exploratory analysis results (from \link{repExplore}) - see \link{vis.immunr_exp_vol};
#'
#' - Clonality statistics (from \link{repClonality}) - see \link{vis.immunr_homeo}.
#'
#' Overlaps and public clonotypes:
#'
#' - Overlaps (from \link{repOverlap}) using heatmaps, circos plots, polar area plots - see \link{vis.immunr_ov_matrix};
#'
#'- Overlap clustering (from \link{repOverlapAnalysis}) - see \link{vis.immunr_hclust};
#'
#' - Repertoire incremental overlaps (from \link{repOverlap}) - see \link{vis.immunr_inc_overlap};
#'
#' - Public repertoire abundance (from \link{pubRep}) - vis \link{vis.immunr_public_repertoire}.
#'
#' Gene usage:
#'
#' - Gene usage statistics (from \link{geneUsage}) using bar plots, box plots, treemaps - see \link{vis.immunr_gene_usage};
#'
#' - Gene usage distances (from \link{geneUsageAnalysis}) using heatmaps, circos plots, polar area plots - see \link{vis.immunr_ov_matrix};
#'
#' - Gene usage clustering (from \link{geneUsageAnalysis}) - see \link{vis.immunr_hclust}.
#'
#' Diversity estimation:
#'
#' - Diversity estimations (from \link{repDiversity}) - see \link{vis.immunr_chao1}.
#'
#' Advanced analysis:
#'
#' - Repertoire dynamics (from \link{trackClonotypes}) - see \link{vis.immunr_dynamics};
#'
#' - Sequence logo plots of amino acid distributions (from \link{kmer_profile}) - see \link{vis_seqlogo};
#'
#' - Kmers distributions (from \link{getKmers}) - see \link{vis.immunr_kmer_table};
#'
#' - Mutation networks (from mutationNetwork) - Work In Progress on vis.immunr_mutation_network;
#'
#' - CDR3 amino acid properties, e.g., biophysical (from cdrProp) - Work In Progress on vis.immunr_cdr_prop.
#'
#' Additionaly, we provide a wrapper functions for visualisations of common data types:
#'
#' - Any data frames or matrices using heatmaps - see \link{vis_heatmap} and \link{vis_heatmap2};
#'
#' - Any data frames or matrices using circos plots - see \link{vis_circos};
#'
#' - Any data frames or matrices using polar area plots - see \link{vis_radar}.
#'
#' @seealso \link{fixVis} for precise manipulation of plots.
#'
#' @examples
#' \dontrun{
#' # Load the test data
#' data(immdata)
#'
#' # Compute and visualise overlaps
#' ov = repOverlap(immdata$data)
#' vis(ov)
#' }
#'
#' @export vis
vis <- function (.data, ...) {
UseMethod('vis')
}
##### Overlap & heatmaps, circos plots and polar area plots #####
#' Overlap / gene usage distance visualisation
#'
#' @name vis.immunr_ov_matrix
#'
#' @aliases vis.immunr_ov_matrix vis.immunr_gu_matrix
#'
#' @description Visualise matrices with overlap values or gene usage distances among samples.
#' For details see links below.
#'
#' @param .data Output from \link{repOverlap} or \link{geneUsageAnalysis}.
#'
#' @param .plot A string specifying the plot type:
#'
#' - "heatmap" for heatmaps using \link{vis_heatmap};
#'
#' - "heatmap2" for heatmaps using \link{vis_heatmap2};
#'
#' - "circos" for circos plots using \link{vis_circos};
#'
#' - "radar" for polar area plots using \link{vis_radar}.
#'
#' @param ... Other arguments are passed through to the underlying plotting function:
#'
#' - "heatmap" - passes arguments to \link{vis_heatmap};
#'
#' - "heatmap2" - passes arguments to \link{vis_heatmap2} and \link{heatmap} from the "heatmap3" package;
#'
#' - "circos" - passes arguments to \link{vis_circos} and \link{chordDiagram} from the "circlize" package;
#'
#' - "radar"- passes arguments to \link{vis_radar}.
#'
#' @export
vis.immunr_ov_matrix <- function (.data, .plot = c("heatmap", "heatmap2", "circos", "radar"), ...) {
args = list(...)
if (!(".title" %in% names(args))) {
args$.title = "Repertoire overlap"
}
.plot = .plot[1]
heatfun = vis_heatmap
if (.plot == "heatmap2") {
heatfun = vis_heatmap2
} else if (.plot == "circos") {
heatfun = vis_circos
} else if (.plot == "radar") {
heatfun = vis_radar
}
do.call(heatfun, c(list(.data = .data), args))
}
#' @export
vis.immunr_gu_matrix <- function (.data, .plot = c("heatmap", "heatmap2", "circos", "radar"), ...) {
args = list(...)
if (!(".title" %in% names(args))) {
args$.title = "Gene usage"
}
.plot = .plot[1]
heatfun = vis_heatmap
if (.plot == "heatmap2") {
heatfun = vis_heatmap2
} else if (.plot == "circos") {
heatfun = vis_circos
} else if (.plot == "radar") {
heatfun = vis_radar
}
do.call(heatfun, c(list(.data = .data), args))
}
#' Visualisation of matrices and data frames using ggplot2-based heatmaps
#'
#' @name vis_heatmap
#'
#' @aliases vis_heatmap
#'
#' @description Fast and easy visualisations of matrices or data frames
#' with functions based on the ggplot2 package.
#'
#' @param .data Input object: a matrix or a data frame.
#'
#' If matrix: column names and row names (if presented) will be used as names for labs.
#'
#' If data frame: the first column will be used for row names and removed from the data.
#' Other columns will be used for values in the heatmap.
#'
#' @param .text If TRUE then plot values in the heatmap cells. If FALSE do not plot values,
#' just plot coloured cells instead.
#'
#' @param .scientific If TRUE then use the scientific notation for numbers (e.g., "2.0e+2").
#'
#' @param .signif.digits Number of significant digits to display on plot.
#'
#' @param .text.size Size of text in the cells of heatmap.
#'
#' @param .labs A character vector of length two with names for x-axis and y-axis, respectively.
#'
#' @param .title The The text for the plot's title.
#'
#' @param .leg.title The The text for the plots's legend. Provide NULL to remove the legend's title completely.
#'
#' @param .legend If TRUE then displays a legend, otherwise removes legend from the plot.
#'
#' @param .na.value Replace NA values with this value. By default they remain NA.
#'
#' @param .transpose Logical. If TRUE then switch rows and columns.
#'
#' @param ... Other passed arguments.
#'
#' @seealso \link{vis}, \link{repOverlap}.
#'
#' @export
vis_heatmap <- function (.data, .text = T, .scientific = FALSE, .signif.digits = 2, .text.size = 4,
.labs = c('Sample', 'Sample'), .title = "Overlap",
.leg.title = 'Overlap values', .legend = T,
.na.value = NA, .transpose = F, ...) {
if (has_class(.data, 'data.frame')) {
names <- .data[[1]]
.data <- as.matrix(.data[,-1])
row.names(.data) <- names
if (.transpose) {
.data = t(.data)
}
} else if (is.null(dim(.data))) {
.data = as.matrix(.data)
if (.transpose) {
.data = t(.data)
}
}
if (is.null(colnames(.data))) {
colnames(.data) <- paste0('C', 1:ncol(.data))
}
if (is.null(row.names(.data))) {
row.names(.data) <- paste0('C', 1:nrow(.data))
}
.data[is.na(.data)] <- .na.value
tmp <- as.data.frame(.data)
tmp$name <- row.names(.data)
m <- reshape2::melt(tmp, id.var = c('name'))
m[,1] <- factor(m[,1], levels = rev(rownames(.data)))
m[,2] <- factor(m[,2], levels = colnames(.data))
m$label <- format(m$value, scientific = .scientific, digits = .signif.digits)
p <- ggplot(m, aes(x = variable, y = name, fill = value))
p <- p + geom_tile(aes(fill = value), colour = "white")
if (.text) { p <- p + geom_text(aes(label = label), size = .text.size) }
p <- p + scale_fill_distiller(palette="RdBu")
p <- p + ggtitle(.title) +
guides(fill = guide_colourbar(title=.leg.title)) +
xlab(.labs[1]) + ylab(.labs[2]) + coord_fixed() +
theme_linedraw() + theme(axis.text.x = element_text(angle=90, vjust = .5)) +
scale_x_discrete(expand=c(0,0)) + scale_y_discrete(expand=c(0,0))
if (!.legend) { p = .rem_legend(p) }
if (is.null(.labs)) { p = .rem_xlab(.rem_ylab(p)) }
p
}
#' Visualisation of matrices using heatmap3-based heatmaps
#'
#' @importFrom heatmap3 heatmap3
#'
#' @name vis_heatmap2
#'
#' @description Visualise matrices with the functions based on the \link[heatmap3]{heatmap3}
#' package with minimum amount of arguments.
#'
#' @param .data Input matrix. Column names and row names (if presented) will be used as names for labs.
#'
#' @param .grid.size Line width of the grid between cells.
#'
#' @param .title The text for the plot's title (same as the "main" argument in \link[heatmap3]{heatmap3}).
#'
#' @param .labs A character vector of length two with names for x-axis and y-axis, respectively.
#'
#' @param col A vector specifying the colors (same as the "col" argument in \link[heatmap3]{heatmap3}).
#'
#' @param ... Other arguments for the \link[heatmap3]{heatmap3} function.
#'
#' @seealso \link{vis}, \link{repOverlap}.
#'
#' @export
vis_heatmap2 <- function (.data, .grid.size = 1.5, .title = NULL, .labs = NULL, col = colorRampPalette(c("#67001f", "#d6604d", "#f7f7f7", "#4393c3", "#053061"))(1024), ...) {
heatmap3(.data, highlightCell = expand.grid(1:nrow(.data), 1:ncol(.data), "black", .grid.size),
main = .title, xlab = .labs[1], ylab = .labs[2], col = col, ...)
}
#' Visualisation of matrices using circos plots
#'
#' @importFrom circlize chordDiagram
#'
#' @name vis_circos
#'
#' @description Visualise matrices with the \link{chordDiagram} function
#' from the circlize package.
#'
#' @param .data Input matrix.
#'
#' @param .title The The text for the title of the plot.
#'
#' @param ... Other arguments passed to \link{chordDiagram} from the 'circlize' package.
#'
#' @seealso \link{vis}, \link{repOverlap}.
#'
#' @examples
#' \dontrun{
#' data(immdata)
#' gu = geneUsage(immdata$data)
#' vis(gu, .plot = "circos")
#' }
#'
#' @export
vis_circos <- function (.data, .title = NULL, ...) {
chordDiagram(.data, ...)
}
#' Visualisation of distance matrices with polar area plots
#'
#' @name vis_radar
#'
#' @description Visualise distance matrices using polar area plots from ggplot2.
#' Due to the nature of polar area plots, it is impossible to visualise all distances on the
#' same plot. So the output of the function will be a number of polar area plots, one for each
#' sample in the input matrix. Each plot visualises a distance from the specific samples
#' to other samples in the dataset.
#'
#' @param .data Input matrix.
#'
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#'
#' @param .ncol An integer number of columns to display. Provide NA (by default)
#' if you want the function to automatically detect the optimal number of columns.
#'
#' @param .which A character vector with sample names. Samples not presented in
#' the vector will be filtered out and will not be displayed in the resulting plot.
#'
#' @param .errorbars A numeric vector of length two with quantiles for error bars
#' on sectors. Disabled if ".errorbars.off" is TRUE.
#'
#' @param .errorbars.off If TRUE then plot CI bars for distances between each group.
#' Disabled if no group passed to the ".by" argument.
#'
#' @param .expand Passed to ggplot2's \link{scale_y_continuous} argument "expand".
#'
#' @param .title The text for the title of the plot.
#'
#' @param .subtitle The text for the subtitle of the plot. Provide NULL if you want to remove it.
#'
#' @param .legend If TRUE then plots the legend, otherwise removes the legend from the plot.
#'
#' @param .leg.title The text for the legend of the plot. Pass NULL if you want to remove it.
#'
#' @param ... Do not provide anything here.
#'
#' @export
vis_radar <- function (.data, .by = NA, .meta = NA,
.ncol = NA, .which = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = F, .expand = c(.25, 0),
.title = NA, .subtitle = NULL,
.legend = F, .leg.title = NULL, ...) {
stop("vis_radar() is under complety re-working. Please use other functions such as vis_heatmap.
Sorry for the inconvenience! Please contact us if you need vis_radar() ASAP.")
.data = melt(.data)
colnames(.data) = c("Source", "Target", "Value")
.data$Value[is.na(.data$Value)] <- 0
group_res = process_metadata_arguments(.data, .by, .meta, .data.sample.col = "Source")
group_res2 = process_metadata_arguments(.data, .by, .meta, .data.sample.col = "Target")
.data$Source.gr = group_res$group_column
.data$Target.gr = group_res2$group_column
.data_proc = .data %>%
dplyr::group_by(Source.gr, Target.gr) %>%
dplyr::summarise(Value = mean(Value, na.rm = T),
Value.min = quantile(Value, .errorbars[1], na.rm = T),
Value.max = quantile(Value, .errorbars[2], na.rm = T))
.data_proc = .data_proc[!is.na(.data_proc$Source.gr), ]
step = length(unique(.data_proc$Source.gr))
if (!is.na(.which[1])) {
.data_proc = .data_proc[.data_proc$Source.gr %in% .which, ]
}
if (is.na(.leg.title)) {
if (group_res$is_grouped) {
.leg.title = "Group"
} else {
.leg.title = "Sample"
}
}
if (!group_res$is_grouped) {
.errorbars.off = T
}
if (is.na(.ncol)) {
.ncol = round(length(unique(.data_proc$Source.gr)) ** .5)
}
.data_proc$Value[.data_proc$Source.gr == .data_proc$Target.gr] = 0
.data_proc$Value.min[.data_proc$Source.gr == .data_proc$Target.gr] = NA
.data_proc$Value.max[.data_proc$Source.gr == .data_proc$Target.gr] = NA
ps <- lapply(seq(1, nrow(.data_proc), step), function (l) {
p = ggplot(.data_proc[l:(l+step-1),], aes(x = Target.gr, y = Value, fill = Target.gr)) +
geom_bar(colour = 'black', stat = 'identity')
if (!.errorbars.off) {
p = p +
geom_errorbar(aes(ymin = Value.min, ymax = Value.max), width=0.2, position=position_dodge(.9))
}
p = p +
coord_polar() +
scale_y_continuous(expand = .expand) +
theme_linedraw() +
.colourblind_discrete(step)
if (!.legend) {
p = p + guides(fill = "none")
} else {
p = p + guides(fill = guide_legend(title=.leg.title))
}
p = p +
labs(title = .data_proc$Source.gr[l], subtitle = .subtitle)
p
})
do.call(grid.arrange, c(ps, ncol = .ncol, top = .title))
}
##### Complex overlaps - top overlap & public repertoires #####
#' Visualise incremental overlaps
#'
#' @importFrom dplyr rename
#'
#' @name vis.immunr_inc_overlap
#'
#' @param .data Output from the \link{repOverlap} function that uses "top" methods.
#'
#' @param .target Index of a repertoire to plot. Omitted if .grid is TRUE.
#'
#' @param .grid Logical. If TRUE then plot all similarities in a grid.
#'
#' @param .ncol Numeric. Number of columns in the resulting grid.
#'
#' @param ... Not used here.
#'
#' @seealso \link{repOverlap}
#'
#' @examples
#' \dontrun{
#' data(immdata)
#' tmp = repOverlap(immdata$data, "inc+overlap")
#' vis(tmp)
#' vis(tmp, .grid = T)
#' }
#'
#' @export
vis.immunr_inc_overlap <- function (.data, .target = 1, .grid = F, .ncol = 2, ...) {
data_is_bootstrapped = !is.null(attr(.data, "bootstrap"))
if (.grid) {
if (data_is_bootstrapped) {
sample_names = colnames(.data[[1]][[1]])
} else {
sample_names = colnames(.data[[1]])
}
p_list = lapply(1:length(sample_names), function (i_name) {
p = vis(.data, .target = i_name) + ggtitle(sample_names[i_name]) + theme_pubr(legend="right") + theme_cleveland2()
p
})
return(do.call(grid.arrange, c(p_list, list(ncol = .ncol))))
} else {
if (data_is_bootstrapped) {
sample_names = colnames(.data[[1]][[1]])
.data = lapply(.data, function (mat_list) {
lapply(mat_list, function (mat) { replace(mat, lower.tri(mat, T), NA) } )
})
.data = reshape2::melt(.data, na.rm = TRUE)
names(.data) = c("Sample_subj", "Sample", "Overlap", "N", "Seq.count")
filtered_data = .data[.data$Sample_subj == sample_names[.target], ]
filtered_data$Seq.count = as.numeric(filtered_data$Seq.count)
filtered_data = filtered_data %>%
dplyr::group_by(Sample_subj, Sample, Seq.count, Overlap) %>%
dplyr::summarise(Value = mean(Overlap, na.rm = T),
Value.min = quantile(Overlap, 0.025, na.rm = T),
Value.max = quantile(Overlap, 0.975, na.rm = T))
}
else {
sample_names = colnames(.data[[1]])
.data = lapply(.data, function (mat) replace(mat, lower.tri(mat, T), NA))
.data = reshape2::melt(.data, na.rm = TRUE)
names(.data) = c("Sample_subj", "Sample", "Overlap", "Seq.count")
filtered_data = .data[.data$Sample_subj == sample_names[.target], ]
filtered_data$Seq.count = as.numeric(filtered_data$Seq.count)
filtered_data = filtered_data %>%
dplyr::rename(Value = Overlap)
}
p = ggplot(aes(x = Seq.count, y = Value, col = Sample, group = Sample), data = filtered_data) +
geom_line()
if (data_is_bootstrapped) {
p = p +
geom_ribbon(aes(group = Sample, ymin=Value.min, ymax=Value.max, fill=Sample), alpha=0.15)
}
p = p +
theme_pubr(legend="right") + theme_cleveland2() +
ggtitle("Similarity of repertoires") +
xlab("Sequence count") + ylab("Similarity")
return(p)
}
}
#' Public repertoire visualisation
#'
#' @name vis.immunr_public_repertoire
#'
#' @param .data Public repertoire, an output from \link{pubRep}.
#' @param .plot A string specifying the plot type:
#'
#' - "freq" for visualisation of the distribution of occurrences of clonotypes
#' and their frequencies using \link{vis_public_frequencies}.
#'
#' - "clonotypes" for visualisation of public clonotype frequenciy correlations between pairs of
#' samples using \link{vis_public_clonotypes}
#' @param ... Further arguments passed \link{vis_public_frequencies} or \link{vis_public_clonotypes},
#' depending on the ".plot" argument.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' pr = pubRep(immdata$data)
#' vis(pr, "freq")
#' vis(pr, "freq", .type="boxplot")
#' vis(pr, "freq", .type="none")
#' vis(pr, "freq", .type="mean")
#' vis(pr, "freq", .by = "Status", .meta=immdata$meta)
#'
#' vis(pr, "clonotypes", 1, 2)
#' }
vis.immunr_public_repertoire <- function (.data, .plot = c("freq", "clonotypes"), ...) {
.plot = .plot[1]
if (.plot == "freq") {
vis_public_frequencies(.data, ...)
} else if (.plot == "clonotypes") {
vis_public_clonotypes(.data, ...)
} else {
stop('Error: unknown argument for .plot, please provide one of the following: "freq", "clonotypes" or "???"')
}
}
#' Visualise sharing of clonotypes among samples
#'
#' @importFrom UpSetR upset fromExpression
#'
#' @name vis.immunr_public_statistics
#'
#' @description Visualise public clonotype frequencies.
#'
#' @param .data Public repertoire - an output from the \link{pubRep} function.
#'
#' @param ... Other arguments passsed directly to \link{upset}.
#'
#' @export
vis.immunr_public_statistics <- function (.data, ...) {
upsetr_data = as.list(.data$Count)
names(upsetr_data) = .data$Group
upset(fromExpression(upsetr_data), ...)
}
#' Public repertoire visualisation
#'
#' @name vis_public_frequencies
#'
#' @description Visualise public clonotype frequencies.
#'
#' @param .data Public repertoire - an output from the \link{pubRep} function.
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#' @param .type Character. Either "boxplot" for plotting distributions of frequencies,
#' "none" for plotting everything, or "mean" for plotting average values only.
#'
#' @examples
#' \dontrun{
#' pr = pubRep(immdata$data)
#' vis(pr, "freq")
#' vis(pr, "freq", .type="boxplot")
#' vis(pr, "freq", .type="none")
#' vis(pr, "freq", .type="mean")
#' vis(pr, "freq", .by = "Status", .meta=immdata$meta)
#' }
vis_public_frequencies <- function (.data, .by = NA, .meta = NA,
.type = c("boxplot", "none", "mean")) {
.type[1] = .type
# ToDo: make it work with V genes
melted_pr = reshape2::melt(.data, id.vars=colnames(.data)[1:(match("Samples", colnames(.data)))])
colnames(melted_pr)[1] = "Sequence"
colnames(melted_pr)[ncol(melted_pr) - 1] = "Sample"
group_res = process_metadata_arguments(melted_pr, .by, .meta)
group_column = group_res$name
melted_pr$Group = group_res$group_column
if (.type == "boxplot") {
p = ggplot() +
geom_boxplot(aes(y = value, x = as.factor(Samples), col = Group), data = melted_pr) +
scale_y_log10() +
ggtitle("Distribution of public clonotype frequencies") +
xlab("Number of shared samples") + ylab("Public clonotype frequencies")
} else if (.type == "mean") {
melted_pr = melted_pr %>%
group_by(Sequence, Samples) %>%
summarise(Value = mean(value, na.rm = T))
p = ggplot() + geom_jitter(aes(x = Value, y=Samples), data=melted_pr) +
scale_x_log10() +
ggtitle("Mean frequencies of public clonotypes") +
ylab("Number of shared samples") + xlab("Public clonotype frequencies")
} else {
# ToDo: add the .verbose argument for this
cat("Warning! Visualising ", sum(!is.na(melted_pr$value)), ' points. Too many points may take a while to visualise depending on your hardware. We highly recommend you to use other types: "mean" or "boxplot"' )
p = ggplot() + geom_jitter(aes(x = value, y=Samples, col=Group), data=melted_pr) +
scale_x_log10() +
ggtitle("Distribution of public clonotype frequencies") +
ylab("Number of shared samples") + xlab("Public clonotype frequencies")
}
# tmp = as_tibble(data.frame(N_samples = .data$Samples,
# Value = rowMeans(public_matrix(.data), na.rm = T)))
# ggplot(aes(x=Value, y=N_samples, color=N_samples), data=tmp) + geom_jitter() +
# scale_x_log10() +
# xlab("Public clonotype frequency") + ylab("Number of samples") +
# ggtitle("Public clonotype frequencies") +
# theme_bw()
p + theme_pubr(legend="right") + theme_cleveland2()
}
#' Visualisation of public clonotypes
#'
#' @name vis_public_clonotypes
#'
#' @importFrom grid rectGrob gpar
#' @importFrom stats lm
#'
#' @description Visualise correlation of public clonotype frequencies in pairs of repertoires.
#'
#' @param .data Public repertoire data - an output from the \link{pubRep} function.
#'
#' @param .x.rep Either indices of samples or character vector of sample names
#' for the x-axis. Must be of the same length as ".y.rep".
#'
#' @param .y.rep Either indices of samples or character vector of sample names
#' for the y-axis. Must be of the same length as ".x.rep".
#'
#' @param .title The text for the title of the plot.
#'
#' @param .ncol An integer number of columns to print in the grid of pairs of repertoires.
#'
#' @param .point.size.modif An integer value that is a modifier of the point size.
#' The larger the number, the larger the points.
#'
#' @param .cut.axes If TRUE then axes limits become shorter.
#'
#' @param .density If TRUE then displays density plot for distributions of clonotypes
#' for each sample. If FALSE then removes density plot from the visualisation.
#'
#' @param .lm If TRUE then fit a linear model and displays an R adjusted coefficient
#' that shows how similar samples are in terms of shared clonotypes.
#'
#' @param .radj.size An integer value, that defines the size of the The text
#' for the R adjusted coefficient.
#'
#' @param .return.grob If TRUE then returh the gridArrange grob instead of the plot.
#'
#' @seealso \link{pubRep}, \link{vis.immunr_public_repertoire}
#'
#' @examples
#' \dontrun{
#' pr = pubRep(immdata$data)
#' vis(pr, "clonotypes", 1, 2)
#' }
vis_public_clonotypes <- function (.data, .x.rep = NA, .y.rep = NA,
.title = NA, .ncol = 3,
.point.size.modif = 1, .cut.axes = T,
.density = T, .lm = T, .radj.size = 3.5, .return.grob = F) {
.shared.rep = .data
mat <- public_matrix(.shared.rep)
if (is.na(.x.rep) && is.na(.y.rep)) {
ps <- list()
for (i in 1:ncol(mat)) {
for (j in 1:ncol(mat)) {
ps <- c(ps, list(vis_public_clonotypes(.shared.rep, i, j, '', .point.size.modif = .point.size.modif,
.cut.axes = .cut.axes, .density = .density, .lm = .lm, .radj.size = .radj.size,
.return.grob = T)))
}
}
grid.arrange(grobs = ps, ncol = .ncol, top = .title)
} else if (is.na(.x.rep)) {
ps <- lapply(1:ncol(mat), function (i) {
vis_public_clonotypes(.shared.rep, i, .y.rep, '', .point.size.modif = .point.size.modif,
.cut.axes = .cut.axes, .density = .density, .lm = .lm)
})
do.call(grid.arrange, c(ps, ncol = .ncol, top = .title))
} else if (is.na(.y.rep)) {
ps <- lapply(1:ncol(mat), function (j) {
vis_public_clonotypes(.shared.rep, .x.rep, j, '', .point.size.modif = .point.size.modif,
.cut.axes = .cut.axes, .density = .density, .lm = .lm)
})
do.call(grid.arrange, c(ps, ncol = .ncol, top = .title))
} else {
if (!is.character(.x.rep)) { .x.rep <- colnames(mat)[.x.rep] }
if (!is.character(.y.rep)) { .y.rep <- colnames(mat)[.y.rep] }
if (.x.rep == .y.rep) {
return(rectGrob(gp=gpar(col="white")))
}
df <- data.frame(cbind(mat[, .x.rep], mat[, .y.rep]))
df[,1] = df[,1] / sum(df[,1], na.rm = T)
df[,2] = df[,2] / sum(df[,2], na.rm = T)
df_full = df
df <- df[!is.na(df[,1]) & !is.na(df[,2]), ]
freq <- log10(sqrt(as.numeric(df[, 1]) * df[, 2])) / 2
names(df) <- c("Xrep", "Yrep")
names(df_full) <- c("Xrep", "Yrep")
pnt.cols <- log(df[, 1] / df[, 2])
suppressWarnings(pnt.cols[pnt.cols > 0] <- pnt.cols[pnt.cols > 0] / max(pnt.cols[pnt.cols > 0]))
suppressWarnings(pnt.cols[pnt.cols < 0] <- -pnt.cols[pnt.cols < 0] / min(pnt.cols[pnt.cols < 0]))
if (.cut.axes) {
mat.lims <- c(min(as.matrix(df_full), na.rm = T), max(as.matrix(df_full), na.rm = T))
} else {
mat.lims <- c(min(as.matrix(df_full), na.rm = T), 1)
}
empty <- ggplot()+geom_point(aes(1,1), colour="white") +
theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank()
)
min_df = min(floor(log10(min(df_full[,1], na.rm = T))), floor(log10(min(df_full[,2], na.rm = T))))
max_df = max(trunc(log10(max(df_full[,1], na.rm = T))), trunc(log10(max(df_full[,2], na.rm = T))))
breaks_values = 10**seq(min_df, 1)
breaks_labels = format(log10(breaks_values), scientific = F)
grey_col = "#CCCCCC"
points = ggplot() +
geom_point(aes(x = Xrep, y = Yrep, size = freq, fill = pnt.cols), data = df, shape=21) +
scale_radius(range = c(.point.size.modif, .point.size.modif * 6)) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
theme_linedraw() +
scale_fill_distiller(palette="RdBu") +
scale_x_log10(breaks = breaks_values, labels = breaks_labels, lim = mat.lims, expand = c(.015, .015)) +
scale_y_log10(breaks = breaks_values, labels = breaks_labels, lim = mat.lims, expand = c(.015, .015)) +
theme(legend.position="none") +
xlab(.x.rep) + ylab(.y.rep)
if (!is.na(.title)) {
points = points + ggtitle(.title)
}
if (.lm) {
adj.R.sq = summary(lm(Yrep ~ Xrep, df))$adj.
points = points +
geom_smooth(aes(x = Xrep, y = Yrep), method = "lm", data = df, fullrange = T, colour = "grey20", size = .5) +
geom_text(aes(x = max(df_full, na.rm = T) / 4,
y = min(df_full, na.rm = T),
label = paste0("R^2(adj.) = ", as.character(round(adj.R.sq, 2)))), size = .radj.size)
# ggtitle(paste0("R^2(adj.) = ", as.character(round(adj.R.sq, 2))))
}
if (.density) {
df2 = data.frame(Clonotype = df_full[!is.na(df_full[,1]) & is.na(df_full[,2]), 1], Type = "unique", stringsAsFactors = F)
df2 = rbind(df2, data.frame(Clonotype = df_full[!is.na(df_full[,1]) & !is.na(df_full[,2]), 1], Type = "public", stringsAsFactors = F))
top_plot = ggplot() +
geom_density(aes(x = Clonotype, fill = Type), colour = "grey25", data = df2, alpha = .3) +
scale_x_log10(breaks = 10**(seq(min_df, 0)), lim = mat.lims, expand = c(.12, .015)) +
theme_bw() + theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
legend.position = "none") +
scale_fill_manual(values = colorRampPalette(c(.colourblind_vector()[5], grey_col))(2))
df2 = data.frame(Clonotype = df_full[!is.na(df_full[,2]) & is.na(df_full[,1]), 2], Type = "unique", stringsAsFactors = F)
df2 = rbind(df2, data.frame(Clonotype = df_full[!is.na(df_full[,2]) & !is.na(df_full[,1]), 2], Type = "public", stringsAsFactors = F))
right_plot = ggplot() +
geom_density(aes(x = Clonotype, fill = Type), colour = "grey25", data = df2, alpha = .3) +
scale_x_log10(breaks = 10**(seq(min_df, 0)), lim = mat.lims, expand = c(.12, .015)) +
coord_flip() +
theme_bw() + theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
legend.position = "none") +
scale_fill_manual(values = colorRampPalette(c(.colourblind_vector()[1], grey_col))(2))
if (!.return.grob) {
grid.arrange(top_plot, empty, points, right_plot, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))
} else {
arrangeGrob(top_plot, empty, points, right_plot, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))
}
} else {
points
}
}
}
##### Gene usage & histogram plot, boxplot and treemap #####
#' Histograms and boxplots (general case / gene usage)
#'
#' @name vis.immunr_gene_usage
#'
#' @description Visualise distributions of genes using heatmaps, treemaps or other plots.
#'
#' @param .data Output from the \link{geneUsage} function.
#'
#' @param .plot String specifying the plot type:
#'
#' - "hist" for histograms using \link{vis_hist};
#'
#' - "tree" for histograms using \link{vis_treemap};
#'
#' - "heatmap" for heatmaps using \link{vis_heatmap};
#'
#' - "heatmap2" for heatmaps using \link{vis_heatmap2};
#'
#' - "circos" for circos plots using \link{vis_circos}.
#'
#' @param ... Other arguments passed to corresponding functions depending on the plot type:
#'
#' - "hist" - passes arguments to \link{vis_hist};
#'
#' - "box" - passes arguments to \link{vis_box};
#'
#' - "tree" - passes arguments to \link{vis_treemap} and \link{treemap} from the treemap package;
#'
#' - "heatmap" - passes arguments to \link{vis_heatmap};
#'
#' - "heatmap2" - passes arguments to \link{vis_heatmap2} and \link{heatmap} from the "heatmap3" package;
#'
#' - "circos" - passes arguments to \link{vis_circos} and \link{chordDiagram} from the "circlize" package.
#'
#' @examples
#' \dontrun{
#' data(twb)
#' gu = geneUsage(twb)
#'
#' # next two plots are similar
#' vis(gu, .title = "Gene usage on twins")
#' vis_hist(gu, .title = "Gene usage on twins")
#'
#' # next two plots are similar as well
#' vis(gu, "box", .title = "Gene usage on twins")
#' vis_box(gu, .title = "Gene usage on twins")
#' }
#'
#' @seealso \link{geneUsage}
#'
#' @export
vis.immunr_gene_usage <- function (.data, .plot = c("hist", "box", "tree", "heatmap", "heatmap2", "circos"), ...) {
.plot = .plot[1]
if (.plot == "hist") {
vis_hist(.data, ...)
} else if (.plot == "box") {
vis_box(.data, .melt = T, .grouping.var = "Gene",
.labs = c("Gene", "Usage"), .title = "Gene usage",
.subtitle = NULL, ...)
} else if (.plot == "tree") {
vis_treemap(.data, ...)
} else if (.plot == "heatmap") {
row.names(.data) = .data[[1]]
.data = t(as.matrix(.data[2:ncol(.data)]))
vis_heatmap(.data, ...)
} else if (.plot == "heatmap2") {
row.names(.data) = .data[[1]]
.data = t(as.matrix(.data[2:ncol(.data)]))
vis_heatmap2(.data, ...)
} else if (.plot == "circos") {
row.names(.data) = .data[[1]]
.data = t(as.matrix(.data[2:ncol(.data)]))
vis_circos(.data, ...)
} else {
stop("Error: Unknown value of the .plot parameter. Please provide one of the following: 'hist', 'box', 'tree'.")
}
}
#' Visualisation of distributions using histograms
#'
#' @importFrom gridExtra arrangeGrob grid.arrange
#'
#' @name vis_hist
#'
#' @description Visualisation of distributions using ggplot2-based histograms.
#'
#' @param .data Input matrix or data frame.
#'
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#'
#' @param .title The text for the title of the plot.
#'
#' @param .ncol A number of columns to display. Provide NA (by default) if you want the function
#' to automatically detect the optimal number of columns.
#'
#' @param .points A logical value defining whether points will be visualised or not.
#'
#' @param .test A logical vector whether statistical tests should be applied. See "Details" for more information.
#'
#' @param .coord.flip If TRUE then swap x- and y-axes.
#'
#' @param .grid If TRUE then plot separate visualisations for each sample.
#'
#' @param .labs A character vector of length two with names for x-axis and y-axis, respectively.
#'
#' @param .return.grob If TRUE then returh the gridArrange grob instead of the plot.
#'
#' @param .melt If TRUE then apply \link{melt} to the ".data" before plotting.
#' In this case ".data" is supposed to be a data frame with the first character column reserved
#' for names of genes and other numeric columns reserved to counts or frequencies of genes.
#' Each numeric column should be associated with a specific repertoire sample.
#'
#' @param .legend If TRUE then plots the legend. If FALSE removes the legend from the plot.
#' If NA automatically detects the best way to display legend.
#'
#' @param .add.layer Addditional ggplot2 layers, that added to each plot in the output plot or grid of plots.
#'
#' @param ... Is not used here.
#'
#' @details
#' If data is grouped, then statistical tests for comparing means of groups will be performed, unless \code{.test = F} is supplied.
#' In case there are only two groups, the Wilcoxon rank sum test (https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test) is performed
#' (R function \code{\link{wilcox.test}} with an argument \code{exact = F}) for testing if there is a difference in mean rank values between two groups.
#' In case there more than two groups, the Kruskal-Wallis test (https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance) is performed (R function \code{\link{kruskal.test}}), that is equivalent to ANOVA for ranks and it tests whether samples from different groups originated from the same distribution.
#' A significant Kruskal-Wallis test indicates that at least one sample stochastically dominates one other sample.
#' Adjusted for multiple comparisons P-values are plotted on the top of groups.
#' P-value adjusting is done using the Holm method (https://en.wikipedia.org/wiki/Holm%E2%80%93Bonferroni_method) (also known as Holm-Bonferroni correction).
#' You can execute the command \code{?p.adjust} in the R console to see more.
#'
#' @seealso \link{vis.immunr_gene_usage}, \link{geneUsage}
#'
#' @examples
#' \dontrun{
#' data(immdata)
#' imm_gu = geneUsage(immdata$data)
#' vis(imm_gu, .plot = "hist", .grid = T, .add.layer =
#' theme(axis.text.x = element_text(angle=75,vjust = 1)))
#' }
#'
#' @export
vis_hist <- function (.data, .by = NA, .meta = NA, .title = "Gene usage", .ncol = NA,
.points = T, .test = T, .coord.flip = F,
.grid = F, .labs = c("Gene", NA), .return.grob = F,
.melt = T, .legend = NA, .add.layer = NULL, ...) {
res = .data
if (.melt) {
res = reshape2::melt(res)
res = res[1:nrow(res), ]
if (ncol(.data) == 2) {
res[[2]] = "Data"
}
colnames(res) = c('Gene', 'Sample', 'Freq')
}
if (is.na(.labs[2])) {
.labs[2] = "Frequency"
if (sum(res$Freq > 1, na.rm = T) > 0) {
.labs[2] = "Count"
}
}
if (length(unique(res$Sample)) == 1) {
p <- ggplot() +
geom_bar(aes(x = Gene, y = Freq, fill = Freq), data = res, stat = 'identity', colour = 'black')
expand_vec = c(.02, 0)
p = p + theme_pubr() + rotate_x_text() + theme_cleveland2() +
labs(x = .labs[1], y = .labs[2], title = .title, fill = .labs[2]) +
scale_fill_distiller(palette = "RdBu")
if (.coord.flip) {
p = p + coord_flip() + scale_y_continuous(expand = c(.005, 0))
} else {
p = p + scale_y_continuous(expand = c(.02, 0))
}
if (!is.na(.legend)) {
if (!.legend) {
p = p + guides(fill = "none")
}
} else {
p = p + guides(fill = "none")
}
return(p + .add.layer)
}
else {
if (.grid) {
if (is.na(.legend)) {
.legend = F
}
if (is.na(.ncol)) {
.ncol = round(length(unique(res$Sample)) ** .5)
}
if (!is.na(.by[1])) {
group_res = process_metadata_arguments(res, .by, .meta, "Sample")
group_column = group_res$name
res$Group = group_res$group_column
res <- split(res, res$Group)
ps = list()
for (i in 1:length(res)) {
res[[i]]$Value = res[[i]]$Freq
ps[[i]] = vis_bar(.data = res[[i]], .by = .by, .meta = .meta,
.errorbars = c(0.025, 0.975), .errorbars.off = F, .stack = F,
.points = .points, .test = .test, .signif.label.size = 3.5, .errorbar.width = 0.45,
.defgroupby = "Sample", .grouping.var = "Gene",
.labs = .labs, .title = names(res)[i], .subtitle = NULL,
.legend=F, .leg.title = NA) +
.add.layer
}
if (.return.grob) {
return(do.call(gridExtra::arrangeGrob, c(ps, ncol = .ncol, top = .title)))
} else {
return(do.call(gridExtra::grid.arrange, c(ps, ncol = .ncol, top = .title)))
}
} else {
res <- split(res, res$Sample)
ps = list()
for (i in 1:length(res)) {
ps[[i]] = vis_hist(res[[i]], .title = names(res)[i], .ncol = NA, .coord.flip = .coord.flip,
.grid = F, .labs = c("Gene", NA), .return.grob = F,
.melt = F, .legend = .legend, .add.layer = .add.layer, ...)
}
if (.return.grob) {
return(do.call(gridExtra::arrangeGrob, c(ps, ncol = .ncol, top = .title)))
} else {
return(do.call(gridExtra::grid.arrange, c(ps, ncol = .ncol, top = .title)))
}
}
} else {
res$Value = res$Freq
p = vis_bar(.data = res, .by = .by, .meta = .meta,
.errorbars = c(0.025, 0.975), .errorbars.off = F, .stack = F,
.points = .points, .test = .test, .signif.label.size = 3.5, .errorbar.width = 0.45,
.defgroupby = "Sample", .grouping.var = "Gene",
.labs = .labs,
.title = .title, .subtitle = NULL,
.legend=T, .leg.title = NA)
return(p + .add.layer)
}
}
}
#' Visualisation of distributions using boxplots
#'
#' @name vis_box
#'
#' @description Visualisation of distributions using ggplot2-based boxplots.
#'
#' @param .data Input matrix or data frame.
#'
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#' @param .title The text for the title of the plot.
#' @param .labs Character vector of length two with names for x-axis and y-axis, respectively.
#' @param .melt If TRUE then apply \link{melt} to the ".data" before plotting.
#' In this case ".data" is supposed to be a data frame with the first character column reserved
#' for names of genes and other numeric columns reserved to counts or frequencies of genes.
#' Each numeric column should be associated with a specific repertoire sample.
#'
#' @param .points A logical value defining whether points will be visualised or not.
#' @param .test A logical vector whether statistical tests should be applied. See "Details" for more information.
#' @param .signif.label.size An integer value defining the size of text for p-value.
#' @param .defgroupby A name for the column with sample names.
#' @param .grouping.var A name for the column to group by.
#' @param .subtitle The The text for the plot's subtitle.
#' @param .leg.title The The text for the plots's legend. Provide NULL to remove the legend's title completely.
#' @param .legend If TRUE then displays a legend, otherwise removes legend from the plot.
#'
#' @seealso \link{vis.immunr_gene_usage}, \link{geneUsage}
#'
#' @export
vis_box <- function (.data, .by = NA, .meta = NA, .melt = T,
.points=T, .test = T, .signif.label.size = 3.5, .defgroupby = "Sample", .grouping.var = "Group",
.labs = c("X", "Y"), .title = "Boxplot (.title argument)",
.subtitle = "Subtitle (.subtitle argument)",
.legend=NA, .leg.title = "Legend (.leg.title argument)", .legend.pos = "right") {
if (.melt) {
res = reshape2::melt(.data)
res = res[1:nrow(res), ]
if (ncol(.data) == 2) {
res[[2]] = "Data"
}
colnames(res) = c(.grouping.var, 'Sample', 'Value')
}
.data = res
group_res = process_metadata_arguments(.data, .by, .meta, .defgroupby)
group_column = group_res$name
.data$Group = group_res$group_column
if (! (.grouping.var %in% names(.data))) {
stop("Error: no column '", .grouping.var, "' in the input data frame.")
}
.data$Grouping.var = .data[[.grouping.var]]
.data_proc = .data %>%
dplyr::group_by(Grouping.var, Group)
if (is.na(.labs[1])) {
if (group_res$is_grouped) {
.labs[1] = "Group"
} else {
.labs[1] = "Sample"
}
}
if (is.na(.leg.title)) {
if (group_res$is_grouped) {
.leg.title = "Group"
} else {
.leg.title = "Sample"
}
}
if (group_res$is_grouped) {
# p = ggplot(aes(x = Grouping.var, y = Value, fill = Group, color=Group, group = Group), data = .data) +
# geom_boxplot(position = "dodge", col = "black")
p = ggplot(aes(x = Grouping.var, y = Value, fill = Group), data = .data) +
geom_boxplot(position = "dodge", col = "black")
if (is.na(.legend)) {
if (.grouping.var == "Group") {
# p = p + guides(fill=F)
.legend.pos = "none"
}
} else if (.legend == F) {
# p = p + guides(fill=F)
.legend.pos = "none"
}
if (.points) {
p = p +
geom_point(color = "black", position=position_jitterdodge(0.05), size = 1)
}
if (.test) {
if (.grouping.var == "Group") {
comparisons = list()
for (i in 1:(nrow(.data_proc)-1)) {
for (j in (i+1):nrow(.data_proc)) {
# if ((.data$Grouping.var[i] == .data$Grouping.var[i]) && (.data$Group[i] != .data$Group[j])) {
comparisons = c(comparisons, list(c(i, j)))
# }
}
}
p_df = compare_means(Value ~ Group, .data, comparisons=comparisons, p.adjust.method = "holm")
y_max = max(.data$Value)
p.value.y.coord <- rep(y_max, nrow(p_df))
step.increase <- (1:nrow(p_df))*(y_max/10)
p.value.y.coord <- p.value.y.coord + step.increase
p_df <- p_df %>%
mutate(y.coord = p.value.y.coord,
p.adj = format.pval(p.adj, digits = 1))
p = p + geom_signif(data = p_df,
aes(xmin = group1, xmax = group2, annotations = p.adj, y_position = y.coord),
manual= TRUE, tip_length = 0.03, size = .5, inherit.aes = F)
} else {
# Seems fine...
# p_df = compare_means(Value ~ Group, group.by = "Grouping.var", method = "kruskal.test", .data, p.adjust.method = "holm")
# print(p_df)
p = p +
stat_compare_means(aes(label = ..p.adj..), bracket.size = .5, size=.signif.label.size,
label.y = max(.data$Value, na.rm = T) * 1.07)
}
}
p = p + .tweak_fill(length(unique(.data$Group)))
} else {
p = ggplot() +
geom_boxplot(aes(x = Grouping.var, y = Value, fill = Sample), position = "dodge", data = .data, col = "black")
if (is.na(.legend)) {
# p = p + guides(fill=F)
.legend.pos = "none"
}
p = p + .colourblind_discrete(length(unique(.data$Sample)))
}
p + theme_pubr(legend = .legend.pos) +
labs(x = .labs[1], y = .labs[2], title = .title, subtitle = .subtitle, fill = group_column) +
rotate_x_text() + theme_cleveland2()
}
#' Visualisation of data frames and matrices using treemaps
#'
#' @name vis_treemap
#'
#' @importFrom treemap treemap
#'
#' @param .data Input data frame or matrix.
#'
#' @param ... Other arguments passed to \link{treemap}.
#'
#' @export
vis_treemap <- function (.data, ...) {
melted = reshape2::melt(.data)
colnames(melted) = c("name", "group", "value")
treemap::treemap(melted, index=c("group", "name"), vSize="value", ...)
}
##### Clustering #####
#' Visualisation of hierarchical clustering
#'
#' @concept clustering
#'
#' @description
#' Visualisation of the results of hierarchical clustering.
#' For other clustering visualisations see \link{vis.immunr_kmeans}.
#'
#' @aliases vis.immunr_hclust
#'
#' @param .data Clustering results from \link{repOverlapAnalysis} or \link{geneUsageAnalysis}.
#' @param .rect Passed to \link{fviz_dend} - whether to add a rectangle around groups.
#' @param .plot A character vector of length one or two specifying which plots to visualise.
#' If "clust" then plot only the clustering. If "best" then plot the number of optimal clusters.
#' If both then plot both.
#' @param .return.grob If TRUE then return grob instead of plot.
#' @param ... Not used here.
#'
#' @seealso \link{vis}, \link{repOverlapAnalysis}, \link{geneUsageAnalysis}
#'
#' @export
vis.immunr_hclust <- function (.data, .rect = F, .plot = c("clust", "best"), .return.grob = F, ...) {
p1 = NULL
if ("clust" %in% .plot) {
p1 = fviz_dend(.data[[1]], main = "Hierarchical clustering", rect = .rect)
}
p2 = NULL
if ("best" %in% .plot) {
p2 = .data[[2]]
}
if (is.null(p1) | is.null(p2)) {
if (is.null(p1)) { p2 } else { p1 }
} else {
if (.return.grob) {
gridExtra::arrangeGrob(p1, p2, ncol = 2)
} else {
gridExtra::grid.arrange(p1, p2, ncol = 2)
}
}
}
#' Visualisation of K-means and DBSCAN clustering
#'
#' @concept clustering
#'
#' @description
#' Visualisation of the results of K-means and DBSCAN clustering.
#' For hierarhical clustering visualisations see \link{vis.immunr_hclust}.
#'
#' @aliases vis.immunr_kmeans vis.immunr_dbscan
#'
#' @param .data Clustering results from \link{repOverlapAnalysis} or \link{geneUsageAnalysis}.
#' @param .point If TRUE then plot sample points. Passed to \link{fviz_cluster}.
#' @param .text If TRUE then plot text labels. Passed to \link{fviz_cluster}.
#' @param .ellipse If TRUE then plot ellipses around all samples. Passed to "ellipse" from \link{fviz_cluster}.
#' @param .point.size Size of points, passed to "pointsize" from \link{fviz_cluster}.
#' @param .text.size Size of text labels, passed to labelsize from \link{fviz_cluster}.
#' @param .plot A character vector of length one or two specifying which plots to visualise.
#' If "clust" then plot only the clustering. If "best" then plot the number of optimal clusters.
#' If both then plot both.
#' @param .return.grob If TRUE then return grob instead of plot.
#' @param ... Not used here.
#'
#' @seealso \link{vis}, \link{repOverlapAnalysis}, \link{geneUsageAnalysis}
#'
#' @export
vis.immunr_kmeans <- function (.data, .point = T, .text = T, .ellipse = T,
.point.size = 2, .text.size = 10, .plot = c("clust", "best"),
.return.grob = F, ...) {
p1 = NULL
if ("clust" %in% .plot) {
p1 = fviz_cluster(.data[[1]], data = .data[[3]], main = "K-means clustering", geom = c("point", "text")[c(.point, .text)],
show.legend.text = F, show.clust.cent = F, repel=T, ellipse = .ellipse, shape = 16,
pointsize = .point.size, labelsize = .text.size, label.rectangle = T) +
guides(fill = "none", shape = "none") +
theme_linedraw()
}
p2 = NULL
if ("best" %in% .plot) {
p2 = .data[[2]]
}
if (is.null(p1) | is.null(p2)) {
if (is.null(p1)) { p2 } else { p1 }
# p = ifelse(is.null(p1), p2, p1) # doesnt work for some reason
} else {
if (.return.grob) {
gridExtra::arrangeGrob(p1, p2, ncol = 2)
} else {
gridExtra::grid.arrange(p1, p2, ncol = 2)
}
}
}
#' @export
vis.immunr_dbscan <- function (.data, .point = T, .text = T, .ellipse = T,
.point.size = 2, .text.size = 10, .plot = c("clust", "best"), ...) {
fviz_cluster(.data[[1]], data = .data[[2]], main = "DBSCAN clustering", geom = c("point", "text")[c(.point, .text)],
show.legend.text = F, show.clust.cent = F, repel=T, ellipse = .ellipse, shape = 16,
pointsize = .point.size, labelsize = .text.size, label.rectangle = T) +
guides(fill = "none", shape = "none") +
theme_linedraw()
}
##### Dimension reduction #####
#' PCA / MDS / tSNE visualisation (mainly overlap / gene usage)
#'
#' @importFrom ggpubr ggscatter
#'
#' @aliases vis.immunr_mds vis.immunr_pca vis.immunr_tsne
#'
#' @param .data Output from analysis functions such as \link{geneUsageAnalysis} or
#' \link{immunr_pca}, \link{immunr_mds} or \link{immunr_tsne}.
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#' @param .point Logical. If TRUE then plot points corresponding to objects.
#' @param .text Logical. If TRUE then plot sample names.
#' @param .ellipse Logical. If TRUE then plot ellipses around clusters of grouped samples.
#' @param .point.size Numeric. A size of points to plot.
#' @param .text.size Numeric. A size of sample names' labels.
#' @param ... Not used here.
#'
#' @details
#' Other visualisation methods:
#'
#' - PCA - \link{vis.immunr_pca}
#'
#' - MDS - \link{vis.immunr_mds}
#'
#' - tSNE - \link{vis.immunr_tsne}
#'
#' @export
vis.immunr_mds <- function (.data, .by = NA, .meta = NA,
.point = T, .text = T, .ellipse = T,
.point.size = 2, .text.size = 4, ...) {
if (!.point & !.text) {
stop("Error: Please provide at least one of the arguments: .point and .text")
}
group_res = process_metadata_arguments(data.frame(Sample = row.names(.data$x), stringsAsFactors = F), .by, .meta)
group_column = group_res$name
if (!group_res$is_grouped) {
.ellipse = F
}
fviz_pca_ind(.data, habillage = group_res$group_column, geom = c("point", "text")[c(.point, .text)],
repel=T, addEllipses = .ellipse, mean.point = F, pointshape = 16,
pointsize = .point.size, labelsize = .text.size, label.rectangle = T, show.legend.text = F) +
theme_linedraw() +
guides(fill = "none", shape = "none") +
labs(x = "Dim1", y = "Dim2", title = "Multidimensional scaling", col = group_column)
}
#' @export
vis.immunr_pca <- function (.data, .by = NA, .meta = NA,
.point = T, .text = T, .ellipse = T,
.point.size = 2, .text.size = 4, ...) {
if (!.point & !.text) {
stop("Error: Please provide at least one of the arguments: .point and .text")
}
group_res = process_metadata_arguments(data.frame(Sample = row.names(.data$x), stringsAsFactors = F), .by, .meta)
group_column = group_res$name
if (!group_res$is_grouped) {
.ellipse = F
}
fviz_pca_ind(.data, habillage = group_res$group_column, geom = c("point", "text")[c(.point, .text)],
repel=T, addEllipses = .ellipse, mean.point = F, pointshape = 16,
pointsize = .point.size, labelsize = .text.size, label.rectangle = T, show.legend.text = F) +
theme_linedraw() +
guides(fill = "none", shape = "none") +
labs(title = "Principal component analysis", col = group_column)
}
#' @export
vis.immunr_tsne <- function (.data, .by = NA, .meta = NA,
.point = T, .text = T, .ellipse = T,
.point.size = 2, .text.size = 4, ...) {
.data = data.frame(.data)
colnames(.data) = c("Dim1", "Dim2")
.data$Sample = row.names(.data)
group_res = process_metadata_arguments(.data, .by, .meta)
group_column = group_res$name
.data$Group = group_res$group_column
if (!group_res$is_grouped) {
.ellipse = F
}
ggscatter(data = .data, x = "Dim1", y = "Dim2", color="Group", ellipse = .ellipse, size = .point.size,
point = .point, label = ifelse(.text, "Sample", NULL), repel = T, label.rectangle = T, show.legend.text = F) +
theme_linedraw() +
guides(fill = "none", shape = "none") +
labs(title = "t-Distributed Stochastic Neighbor Embedding", col = group_column)
}
#
# ToDo: ideally, geneUsageAnalysis returns raw objects, and vis.immunr_mds transforms it into the fviz-compatible objects
#
##### Clonality analysis #####
vis_bar_stacked <- function (.data, .by = NA, .meta = NA,
.errorbars=c(0.025, 0.975), .errorbars.off = F, .stack = NA,
.grouping.var = NA,
.labs = c(NA, "Y"),
.title = "Barplot (.title argument)", .subtitle = "Subtitle (.subtitle argument)",
.legend=NA, .leg.title = NA) {
group_res = process_metadata_arguments(.data, .by, .meta)
.data$Group = group_res$group_column
if (! (.grouping.var %in% names(.data))) {
stop("Error: no column '", .grouping.var, "' in the input data frame.")
}
.data$Grouping.var = .data[[.grouping.var]]
position_bar = "dodge"
if (is.na(.stack)) {
if (group_res$is_grouped) {
if (.errorbars.off) {
position_bar = "stack"
} else {
position_bar = "dodge"
}
} else {
position_bar = "stack"
}
} else if (.stack) {
if (group_res$is_grouped) {
if (.errorbars.off) {
position_bar = "stack"
} else {
warning("Warning: Provide .errorbars.off = T in order to display stacked barplots.")
position_bar = "dodge"
}
} else {
position_bar = "stack"
}
}
if (is.na(.labs[1])) {
if (group_res$is_grouped) {
.labs[1] = "Group"
} else {
.labs[1] = "Sample"
}
}
if (group_res$is_grouped) {
perc = .data %>%
dplyr::group_by(Grouping.var, Group) %>%
dplyr::summarise(Value.mean = mean(Value, na.rm = T),
Value.min = quantile(Value, .errorbars[1], na.rm = T),
Value.max = quantile(Value, .errorbars[2], na.rm = T))
p <- ggplot() +
geom_bar(aes(x = Group, y = Value.mean, fill = Grouping.var), data = perc, colour = 'black', stat = 'identity', position=position_bar)
if (!.errorbars.off) {
p = p + geom_errorbar(aes(x = Group, fill = Grouping.var, ymin = Value.min, ymax = Value.max),
data = perc, colour = 'black', width=.2, position=position_dodge(.9))
}
} else {
p <- ggplot() +
geom_bar(aes(x = Sample, y = Value, fill = Grouping.var), data = .data, colour = 'black', stat = 'identity', position = position_bar)
}
p + theme_pubr(legend = "right") +
labs(x = .labs[1], y = .labs[2], title = .title, subtitle = .subtitle, fill = .leg.title) +
.colourblind_discrete(length(unique(.data$Grouping.var)))
}
#' Visualise clonality
#'
#' @name vis.immunr_clonal_prop
#'
#' @aliases vis.immunr_clonal_prop vis.immunr_homeo vis.immunr_top_prop vis.immunr_tail_prop
#'
#' @description An utility function to visualise the output from \code{\link{repClonality}}.
#'
#' @importFrom reshape2 melt
#' @importFrom scales percent
#'
#' @param .data Output from \code{\link{repClonality}}.
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#'
#' @param .errorbars A numeric vector of length two with quantiles for error bars
#' on sectors. Disabled if ".errorbars.off" is TRUE.
#'
#' @param .errorbars.off If TRUE then plot CI bars for distances between each group.
#' Disabled if no group passed to the ".by" argument.
#' @param .points A logical value defining whether points will be visualised or not.
#' @param .test A logical vector whether statistical tests should be applied. See "Details" for more information.
#' @param .signif.label.size An integer value defining the size of text for p-value.
#' @param ... Not used here.
#'
#' @details
#' If data is grouped, then statistical tests for comparing means of groups will be performed, unless \code{.test = F} is supplied.
#' In case there are only two groups, the Wilcoxon rank sum test (https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test) is performed
#' (R function \code{\link{wilcox.test}} with an argument \code{exact = F}) for testing if there is a difference in mean rank values between two groups.
#' In case there more than two groups, the Kruskal-Wallis test (https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance) is performed (R function \code{\link{kruskal.test}}), that is equivalent to ANOVA for ranks and it tests whether samples from different groups originated from the same distribution.
#' A significant Kruskal-Wallis test indicates that at least one sample stochastically dominates one other sample.
#' Adjusted for multiple comparisons P-values are plotted on the top of groups.
#' P-value adjusting is done using the Holm method (https://en.wikipedia.org/wiki/Holm%E2%80%93Bonferroni_method) (also known as Holm-Bonferroni correction).
#' You can execute the command \code{?p.adjust} in the R console to see more.
#'
#' @seealso \link{repClonality} \link{vis}
#'
#' @export
vis.immunr_clonal_prop <- function (.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .errorbars.off = F, .points=T, .test = T, .signif.label.size=3.5, ...) {
# ToDo: this and other repClonality and repDiversity functions doesn't work on a single repertoire. Fix it
perc_value = round(.data[1, 2][1])
.data = data.frame(Sample = row.names(.data), Value = .data[,1])
p = vis_bar(.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = F,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, "Clonal proportion"),
.title = "Clonal proportion", .subtitle = paste0("Number of clonotypes occupying the ", perc_value, "% of repertoires"),
.legend=NA, .leg.title = NA)
p
}
#' @export
vis.immunr_homeo <- function (.data, .by = NA, .meta = NA, .errorbars=c(0.025, 0.975), .errorbars.off = F, .stack = NA, .test = T, .points=T, ...) {
melted <- reshape2::melt(.data)
colnames(melted) <- c('Sample', 'Clone.group', 'Value')
if (is.na(.by[1]) && is.na(.stack)) {
.stack = T
}
p = vis_bar(melted, .by = .by, .meta = .meta,
.errorbars=.errorbars, .errorbars.off = .errorbars.off, .stack = .stack,
.grouping.var = "Clone.group", .points = .points,
.labs = c(NA, "Relative abundance, perc"),
.title = "Relative abundance", .subtitle = "Summary proportion of clonotypes with specific frequencies",
.legend=NA, .leg.title = "Clonotype group", .test=.test) +
scale_y_continuous(labels=scales::percent)
p
}
#' @export
vis.immunr_top_prop <- function (.data, .by = NA, .meta = NA, .errorbars=c(0.025, 0.975), .errorbars.off = F, .stack = NA, .points=T, .test=T, ...) {
tmp = .data
if (is.null(dim(tmp))) {
tmp = t(as.matrix(tmp))
.data = t(as.matrix(.data))
}
for (i in 2:ncol(.data)) {
tmp[,i] = .data[,i] - .data[,i-1]
}
res = tmp
.head = as.numeric(colnames(.data))
colnames(res) <- paste0('[', c(1, .head[-length(.head)] + 1), ':', .head, ')')
res <- as.data.frame(res)
res$Sample <- row.names(res)
res <- reshape2::melt(res)
colnames(res) = c("Sample", "Clone.index", "Value")
if (is.na(.by[1]) && is.na(.stack)) {
.stack = T
}
p = vis_bar(res, .by = .by, .meta = .meta, .points = .points, .test = .test,
.errorbars=.errorbars, .errorbars.off = .errorbars.off, .stack = .stack,
.grouping.var = "Clone.index",
.labs = c(NA, "Occupied repertoire space"),
.title = "Top clonal proportion", .subtitle = "Summary proportion of clonotypes with specific indices",
.legend=NA, .leg.title = "Clonotype indices") +
scale_y_continuous(labels=scales::percent)
p
}
#' @export
vis.immunr_rare_prop <- function (.data, .by = NA, .meta = NA, .errorbars=c(0.025, 0.975), .errorbars.off = F, .stack = NA, .points=T, .test=T, ...) {
tmp = .data
if (is.null(dim(tmp))) {
tmp = t(as.matrix(tmp))
.data = t(as.matrix(.data))
}
colnames_new = c()
prev_value = "1"
start_index = 1
if (colnames(.data)[1] == "1") {
colnames_new = "1"
prev_value = "2"
start_index = 2
}
for (i in start_index:ncol(.data)) {
colnames_new = c(colnames_new, paste0(prev_value, " - ", colnames(.data)[i]))
if (colnames(.data)[i] != "MAX") {
prev_value = as.character(as.integer(colnames(.data)[i]) + 1)
}
}
colnames(tmp) = colnames_new
for (i in 2:ncol(.data)) {
tmp[,i] = .data[,i] - .data[,i-1]
}
res = tmp
res <- as.data.frame(res)
res$Sample <- row.names(res)
res <- reshape2::melt(res)
colnames(res) = c("Sample", "Counts", "Value")
if (is.na(.by[1]) && is.na(.stack)) {
.stack = T
}
p = vis_bar(res, .by = .by, .meta = .meta,
.errorbars=.errorbars, .errorbars.off = .errorbars.off, .stack = .stack,
.grouping.var = "Counts", .points = .points, .test = .test,
.labs = c(NA, "Occupied repertoire space"),
.title = "Rare clonal proportion", .subtitle = "Summary proportion of clonotypes with specific counts",
.legend=NA, .leg.title = "Clonotype counts") +
scale_y_continuous(labels=scales::percent)
p
}
##### Diversity estimation & dodged bar plots #####
#' Bar plots
#'
#' @importFrom ggpubr compare_means geom_signif stat_compare_means theme_pubr rotate_x_text
#'
#' @name vis_bar
#'
#' @param .data Data for visulisation.
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#' @param .errorbars A numeric vector of length two with quantiles for error bars
#' on sectors. Disabled if ".errorbars.off" is TRUE.
#' @param .errorbars.off If TRUE then plot CI bars for distances between each group.
#' Disabled if no group passed to the ".by" argument.
#' @param .errorbar.width Numeric. Width for error bars.
#' @param .points A logical value defining whether points will be visualised or not.
#' @param .test A logical vector whether statistical tests should be applied. See "Details" for more information.
#' @param .signif.label.size An integer value defining the size of text for p-value.
#' @param .stack If TRUE and .errorbars.off is TRUE then plot stacked bar plots for each Group or Sample
#' @param .defgroupby A name for the column with sample names.
#' @param .grouping.var A name for the column to group by.
#' @param .subtitle The The text for the plot's subtitle.
#' @param .leg.title The The text for the plots's legend. Provide NULL to remove the legend's title completely.
#' @param .legend If TRUE then displays a legend, otherwise removes legend from the plot.
#' @param .labs A character vector of length two specifying names for x-axis and y-axis.
#' @param .title The The text for the plot's title.
#'
#' @export
vis_bar <- function (.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .errorbars.off = F, .stack = F,
.points=T, .test = T, .signif.label.size = 3.5, .errorbar.width = 0.2, .defgroupby = "Sample", .grouping.var = "Group",
.labs = c("X", "Y"), .title = "Barplot (.title argument)",
.subtitle = "Subtitle (.subtitle argument)",
.legend=NA, .leg.title = "Legend (.leg.title argument)", .legend.pos = "right",
.rotate_x = 90) {
group_res = process_metadata_arguments(.data, .by, .meta, .defgroupby)
group_column = group_res$name
.data$Group = group_res$group_column
if (! (.grouping.var %in% names(.data))) {
stop("Error: no column '", .grouping.var, "' in the input data frame.")
}
.data$Grouping.var = .data[[.grouping.var]]
.data_proc = .data %>%
dplyr::group_by(Grouping.var, Group) %>%
dplyr::summarise(Value.mean = mean(Value, na.rm = T),
Value.min = quantile(Value, .errorbars[1], na.rm = T),
Value.max = quantile(Value, .errorbars[2], na.rm = T))
if (is.na(.labs[1])) {
if (group_res$is_grouped) {
.labs[1] = "Group"
} else {
.labs[1] = "Sample"
}
}
if (is.na(.leg.title)) {
.leg.title = group_column
# if (group_res$is_grouped) {
# .leg.title = "Group"
# } else {
# .leg.title = "Sample"
# }
} else if (group_res$is_grouped) {
.leg.title = group_column
}
position_name = "dodge"
if (group_res$is_grouped) {
if (is.na(.stack)) {
.stack = F
}
} else {
if (is.na(.stack)) {
.stack = F
} else if (.stack) {
.errorbars.off = T
.points = F
.test = F
}
}
if (group_res$is_grouped) {
if (.stack) {
p = ggplot() +
geom_bar(aes(x = Group, y = Value.mean, fill = Grouping.var),
stat = "identity", position = "stack", data = .data_proc, col = "black")
} else {
p = ggplot(aes(x = Grouping.var, y = Value, fill = Group, color=Group, group = Group), data = .data) +
geom_bar(aes(x = Grouping.var, y = Value.mean),
stat = "identity", position = position_name, data = .data_proc, col = "black")
}
if (is.na(.legend)) {
if (.grouping.var == "Group") {
# p = p + guides(fill=F)
.legend.pos = "none"
}
} else if (.legend == F) {
# p = p + guides(fill=F)
.legend.pos = "none"
}
if (!.errorbars.off) {
p = p +
geom_errorbar(aes(x = Grouping.var, y = Value.mean, ymin = Value.min, ymax = Value.max, color = Group),
color = "black", data = .data_proc, width=.errorbar.width, position=position_dodge(.9))
}
if (.points) {
p = p +
geom_point(color = "black", position=position_jitterdodge(0.05), size = 1)
}
if (.test) {
if (.grouping.var == "Group") {
comparisons = list()
for (i in 1:(nrow(.data_proc)-1)) {
for (j in (i+1):nrow(.data_proc)) {
# if ((.data$Grouping.var[i] == .data$Grouping.var[i]) && (.data$Group[i] != .data$Group[j])) {
comparisons = c(comparisons, list(c(i, j)))
# }
}
}
p_df = compare_means(Value ~ Group, .data, comparisons=comparisons, p.adjust.method = "holm")
y_max = max(.data$Value)
p.value.y.coord <- rep(y_max, nrow(p_df))
step.increase <- (1:nrow(p_df))*(y_max/10)
p.value.y.coord <- p.value.y.coord + step.increase
p_df <- p_df %>%
mutate(y.coord = p.value.y.coord,
p.adj = format.pval(p.adj, digits = 1))
p = p + geom_signif(data = p_df,
aes(xmin = group1, xmax = group2, annotations = p.adj, y_position = y.coord),
manual= TRUE, tip_length = 0.03, size = .5, inherit.aes = F)
} else {
# Seems fine...
# p_df = compare_means(Value ~ Group, group.by = "Grouping.var", method = "kruskal.test", .data, p.adjust.method = "holm")
# print(p_df)
p = p +
stat_compare_means(aes(label = ..p.adj..), bracket.size = .5, size=.signif.label.size,
label.y = max(.data$Value, na.rm = T) * 1.07)
}
}
p = p + .tweak_fill(length(unique(.data$Group)))
} else {
if (.stack) {
p <- ggplot() +
geom_bar(aes(x = Sample, y = Value, fill = Grouping.var), data = .data, colour = 'black', stat = 'identity', position = "stack")
if (!is.na(.legend)) {
if (!.legend) {
# p = p + guides(fill=F)
.legend.pos = "none"
}
}
p = p + .colourblind_discrete(length(unique(.data$Grouping.var)))
} else {
p = ggplot() +
geom_bar(aes(x = Grouping.var, y = Value, fill = Sample), position = position_name, stat = "identity", data = .data, col = "black")
if (is.na(.legend)) {
# p = p + guides(fill=F)
.legend.pos = "none"
}
p = p + .colourblind_discrete(length(unique(.data$Sample)))
}
}
p = p + theme_pubr(legend = .legend.pos) +
labs(x = .labs[1], y = .labs[2], title = .title, subtitle = .subtitle, fill = .leg.title) +
rotate_x_text() + theme_cleveland2()
p
}
#' Visualise diversity.
#'
#' @name vis.immunr_chao1
#'
#' @aliases vis.immunr_chao1 vis.immunr_dxx vis.immunr_rarefaction vis.immunr_div vis.immunr_ginisimp vis.immunr_invsimp vis.immunr_hill
#' @description An utility function to visualise the output from \code{\link{repDiversity}}.
#'
#' @importFrom reshape2 melt
#'
#' @param .data Output from \code{\link{repDiversity}}.
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#' @param .errorbars A numeric vector of length two with quantiles for error bars
#' on sectors. Disabled if ".errorbars.off" is TRUE.
#' @param .errorbars.off If TRUE then plot CI bars for distances between each group.
#' Disabled if no group passed to the ".by" argument.
#' @param .points A logical value defining whether points will be visualised or not.
#' @param .test A logical vector whether statistical tests should be applied. See "Details" for more information.
#' @param .signif.label.size An integer value defining the size of text for p-value.
#' @param ... Not used here.
#'
#' @details
#' If data is grouped, then statistical tests for comparing means of groups will be performed, unless \code{.test = F} is supplied.
#' In case there are only two groups, the Wilcoxon rank sum test (https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test) is performed
#' (R function \code{\link{wilcox.test}} with an argument \code{exact = F}) for testing if there is a difference in mean rank values between two groups.
#' In case there more than two groups, the Kruskal-Wallis test (https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance) is performed (R function \code{\link{kruskal.test}}), that is equivalent to ANOVA for ranks and it tests whether samples from different groups originated from the same distribution.
#' A significant Kruskal-Wallis test indicates that at least one sample stochastically dominates one other sample.
#' Adjusted for multiple comparisons P-values are plotted on the top of groups.
#' P-value adjusting is done using the Holm method (https://en.wikipedia.org/wiki/Holm%E2%80%93Bonferroni_method) (also known as Holm-Bonferroni correction).
#' You can execute the command \code{?p.adjust} in the R console to see more.
#'
#' @seealso \link{repDiversity} \link{vis}
#' @export
vis.immunr_chao1 <- function (.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .errorbars.off = F, .points=T, .test = T, .signif.label.size=3.5, ...) {
.data = data.frame(Sample = row.names(.data), Value = .data[,1])
vis_bar(.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = F,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, "Chao1"),
.title = "Chao1", .subtitle = "Sample diversity estimation using Chao1",
.legend=NA, .leg.title = NA)
}
#' @export
vis.immunr_hill <- function (.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .errorbars.off = F,
.points=T, .add.points = T, .point.size=1.5, .add.point.size = 1, .line.size = 0.75, .leg.title = NA, ...) {
group_res = process_metadata_arguments(.data, .by, .meta)
.data$Group = group_res$group_column
if (is.na(.leg.title)) {
if (group_res$is_grouped) {
.leg.title = "Group"
} else {
.leg.title = "Sample"
}
}
grouped_data = .data %>%
dplyr::group_by(Group, Q) %>%
dplyr::summarise(Value.mean = mean(Value, na.rm = T),
Value.min = quantile(Value, .errorbars[1], na.rm = T),
Value.max = quantile(Value, .errorbars[2], na.rm = T))
position_value = "identity"
if (group_res$is_grouped) {
position_value = position_jitter(width=0.1)
}
p = ggplot() +
geom_line(aes(x = Q, y = Value.mean, col = Group), size = .line.size, data = grouped_data, stat = "identity")
if (.points) {
p = p + geom_point(aes(x = Q, y = Value, col = Group), data = .data,
position=position_value, alpha = .5, size = .point.size)
}
if (group_res$is_grouped) {
if (.add.points) {
p = p +
geom_point(aes(x = Q, y = Value.mean, col = Group), data = grouped_data, size = .add.point.size)
}
if (!.errorbars.off) {
p = p +
geom_errorbar(aes(x = Q, ymin = Value.min, ymax = Value.max, group = Group, col = Group),
data = grouped_data, width=.2, size = .line.size, position=position_dodge(0.1))
}
}
p = p +
theme_pubr(legend="right") + theme_cleveland2() +
labs(x = "Q values", y = "Diversity estimation", title = "Hill numbers", subtitle = "Sample diversity estimation using Hill numbers", color = .leg.title)
p
}
#' @export
vis.immunr_div <- function (.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = F,
.points=T, .test = T, .signif.label.size=3.5,
.legend = NA, ...) {
vis_bar(.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = F,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, "Effective number of clonotypes"),
.title = "True diversity", .subtitle = "Sample diversity estimation using the true diversity index",
.legend=.legend, .leg.title = NA)
}
#' @export
vis.immunr_ginisimp <- function (.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = F,
.points=T, .test = T, .signif.label.size=3.5, ...) {
vis_bar(.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = F,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, "Gini-Simpson index"),
.title = "Gini-Simpson index", .subtitle = "Sample diversity estimation using the Gini-Simpson index",
.legend=NA, .leg.title = NA)
}
#' @export
vis.immunr_invsimp <- function (.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = F,
.points=T, .test = T, .signif.label.size=3.5,
.legend = NA, ...) {
vis_bar(.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = F,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, "Inverse Simpson index"),
.title = "Inverse Simpson index", .subtitle = "Sample diversity estimation using the inverse Simpson index",
.legend=.legend, .leg.title = NA)
}
#' @export
vis.immunr_dxx <- function (.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = F,
.points=T, .test = T, .signif.label.size=3.5,
.legend = NA, ...) {
perc_value = round(.data[1, 2][1])
.data = data.frame(Sample = row.names(.data), Value = .data[,1])
vis_bar(.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = F,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, paste0("D", perc_value)),
.title = paste0("D", perc_value, " diversity index"), .subtitle = paste0("Number of clonotypes occupying the ", perc_value, "% of repertoires"),
.legend=.legend, .leg.title = NA)
}
#' @export
vis.immunr_rarefaction <- function (.data, .by = NA, .meta = NA,
.mean = T, .errors = T, .log = F,
.labels = T, ...) {
.muc.res = .data
group_res = process_metadata_arguments(.data, .by, .meta)
data_groups = group_res$groups
group_column = group_res$name
names(data_groups) = group_res$group_names
is_grouped = group_res$is_grouped
.muc.res$Group = group_res$group_column
if ("Type" %in% colnames(.muc.res)) {
.muc.res$Type <- factor(.muc.res$Type, levels = c('interpolation', 'extrapolation'), ordered = T)
}
p <- ggplot() + xlab('Sample size (clones)') + ylab('Estimated diversity (unique clonotypes)') +
ggtitle("Rarefaction analysis") +
theme_pubr(legend="right") + theme_cleveland2()
if (.mean && is_grouped) {
data_proc = .muc.res %>%
group_by(Group, Size) %>%
summarise(MinVal = quantile(Mean, .025, na.rm = T),
MaxVal = quantile(Mean, .975, na.rm = T),
MeanVal = mean(Mean, na.rm = T))
p = p + geom_line(aes(x = Size, y = MeanVal, color=Group), data = data_proc)
if (.errors) {
p = p + geom_ribbon(aes(x = Size, group = Group, ymin=MinVal, ymax=MaxVal, fill=Group), data = data_proc, alpha=0.15)
}
} else {
colnames(.muc.res)[2] = "Q1"
colnames(.muc.res)[4] = "Q2"
if (.errors) {
p = p + geom_ribbon(aes(x = Size, group = Sample, ymin=Q1, ymax=Q2), col = "grey80", data = .muc.res, alpha=0.2)
}
if ("Type" %in% colnames(.muc.res)) {
p = p +
geom_line(aes(x = Size, y = Mean, colour = Group, linetype = Type), size=1, data = .muc.res)
} else {
p = p +
geom_line(aes(x = Size, y = Mean, colour = Group), size=1, data = .muc.res)
}
if (.labels) {
tmp <- .muc.res[tapply(1:nrow(.muc.res), .muc.res$Sample, tail, 1), ]
tmp = tmp[order(tmp$Group),]
p <- p + ggrepel::geom_label_repel(aes(x = Size, y = Mean, label = Sample, fill = Group), data = tmp)
}
}
if (.log) {
p <- p + scale_x_log10()
}
p + .tweak_col(length(unique(.muc.res$Group))) + .tweak_fill(length(unique(.muc.res$Group)))
}
##### Exploratory analysis #####
#' Visualise exploratory analysis.
#'
#' @name vis.immunr_exp_vol
#'
#' @aliases vis.immunr_exp_vol vis.immunr_exp_count vis.immunr_exp_len vis.immunr_exp_clones
#' @description An utility function to visualise the output from \code{\link{repExplore}}.
#'
#' @importFrom reshape2 melt
#'
#' @param .data Output from \code{\link{repExplore}}.
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#'
#' @param .errorbars A numeric vector of length two with quantiles for error bars
#' on sectors. Disabled if ".errorbars.off" is TRUE.
#'
#' @param .errorbars.off If TRUE then plot CI bars for distances between each group.
#' Disabled if no group passed to the ".by" argument.
#' @param .points A logical value defining whether points will be visualised or not.
#' @param .test A logical vector whether statistical tests should be applied. See "Details" for more information.
#' @param .signif.label.size An integer value defining the size of text for p-value.
#' @param ... Not used here.
#'
#' @details
#' If data is grouped, then statistical tests for comparing means of groups will be performed, unless \code{.test = F} is supplied.
#' In case there are only two groups, the Wilcoxon rank sum test (https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test) is performed
#' (R function \code{\link{wilcox.test}} with an argument \code{exact = F}) for testing if there is a difference in mean rank values between two groups.
#' In case there more than two groups, the Kruskal-Wallis test (https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance) is performed (R function \code{\link{kruskal.test}}), that is equivalent to ANOVA for ranks and it tests whether samples from different groups originated from the same distribution.
#' A significant Kruskal-Wallis test indicates that at least one sample stochastically dominates one other sample.
#' Adjusted for multiple comparisons P-values are plotted on the top of groups.
#' P-value adjusting is done using the Holm method (https://en.wikipedia.org/wiki/Holm%E2%80%93Bonferroni_method) (also known as Holm-Bonferroni correction).
#' You can execute the command \code{?p.adjust} in the R console to see more.
#'
#' @seealso \link{repExplore} \link{vis}
#' @export
vis.immunr_exp_vol <- function (.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = F,
.points=T, .test = T,
.signif.label.size=3.5, ...) {
.data = rename_column(.data, "Volume", "Value")
vis_bar(.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = F,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, "Clonotypes"),
.title = "Number of clonotypes", .subtitle = "Number of unique clonotypes in the input data",
.legend=NA, .leg.title = NA)
}
#' @export
vis.immunr_exp_count <- function (.data, .by = NA, .meta = NA, .logx=T, .logy=T, .labs = c("Abundance", "Number of clonotypes"),
.title = "Distribution of clonotype abundances", .subtitle = NULL,
.legend=T, .leg.title = NA, ...) {
group_res = process_metadata_arguments(.data, .by, .meta)
.data$Group = group_res$group_column
if (is.na(.leg.title)) {
if (group_res$is_grouped) {
.leg.title = "Group"
} else {
.leg.title = "Sample"
}
}
p = ggplot() + geom_line(aes(x = Clone.num, y = Clonotypes, col = Group, group = Sample), data = .data, stat = "identity")
if (.logx) {
p = p + scale_x_log10()
}
if (.logy) {
p = p + scale_y_log10()
}
p = p + labs(x = .labs[1], y = .labs[2], title = .title, subtitle = .subtitle, color = .leg.title) +
theme_pubr(legend="right") + rotate_x_text() + theme_cleveland2()
p
}
#' @export
vis.immunr_exp_len <- function (.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = F,
.points = T, .test = T,
.signif.label.size = 3.5, ...) {
.data = rename_column(.data, "Count", "Value")
vis_bar(.data = .data, .by = .by, .meta = .meta,
.errorbars = c(0.025, 0.975), .errorbars.off = .errorbars.off, .stack = F,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Length",
.labs = c("CDR3 length", "Clonotypes"),
.title = "Distribution of CDR3 lengths", .subtitle = NULL,
.legend=T, .leg.title = NA)
}
#' @export
vis.immunr_exp_clones <- function (.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = F,
.points=T, .test = T,
.signif.label.size=3.5, ...) {
.data = rename_column(.data, "Clones", "Value")
vis_bar(.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = F,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, "Clones"),
.title = "Number of clones", .subtitle = "Number of clones in the input data",
.legend=NA, .leg.title = NA)
}
##### Kmer analysis & sequence logo plot #####
#' Most frequent kmers visualisation.
#'
#' @name vis.immunr_kmer_table
#'
#' @description
#' Plot a distribution (bar plot) of the most frequent kmers in a data.
#'
#' @param .data Data frame with two columns "Kmers" and "Count" or a list with such data frames. See Examples.
#' @param .head Number of the most frequent kmers to choose for plotting from each data frame.
#' @param .position Character vector of length 1. Position of bars for each kmers. Value for the \code{ggplot2} argument \code{position}.
#' @param .log Logical. If TRUE then plot log-scaled plots.
#' @param ... Not used here.
#'
#' @seealso \code{get.kmers}
#'
#' @examples
#' \dontrun{
#' # Load necessary data and package.
#' library(gridExtra)
#' data(immdata)
#' # Get 5-mers.
#' imm.km <- getKmers(immdata)
#' # Plots for kmer proportions in each data frame in immdata.
#' p1 <- vis(imm.km, .position = 'stack')
#' p2 <- vis(imm.km, .position = 'fill')
#' grid.arrange(p1, p2)
#' }
#' @export
vis.immunr_kmer_table <- function (.data, .head = 100, .position = c('stack', 'dodge', 'fill'), .log = F, ...) {
.position = switch(substr(.position[1], 1, 1), s = "stack", d = "dodge", f = "fill")
# .data[is.na(.data)] <- 0
max_counts = apply(.data[,-1], 1, max, na.rm = T)
max_indices = head(order(max_counts, decreasing = T), .head)
n_samples = ncol(.data) - 1
.data = .data[max_indices,]
melted = reshape2::melt(.data, id.vars = "Kmer")
colnames(melted) = c("Kmer", "Sample", "Count")
p <- ggplot() + geom_bar(aes(x = Kmer, y = Count, fill = Sample), col = "black",
data = melted, stat = 'identity', position = .position) +
theme_pubr(legend="right") + rotate_x_text() + theme_cleveland2()
if (.position[1] == 'stack' || .position[1] == 'dodge') {
p <- p + ylab('Count')
} else {
p <- p + ylab('Proportions')
}
if (.log) {
p = p + scale_y_log10(expand = c(0, 0))
} else {
p = p + scale_y_continuous(expand = c(0, 0))
}
p + scale_x_discrete(expand = c(0, 0)) + .tweak_fill(n_samples) + ggtitle("Kmers distribution")
}
#' Sequence logo plots for amino acid profiles.
#'
#' @importFrom ggseqlogo geom_logo theme_logo
#'
#' @aliases vis_seqlogo vis_textlogo
#'
#' @name vis_textlogo
#'
#' @usage
#' vis_textlogo(.data, .replace.zero.with.na = T, .width = 0.1, ...)
#'
#' vis_seqlogo(.data, .scheme = "chemistry", ...)
#'
#' @description
#' Plot sequence logo plots for visualising of amino acid motif sequences / profiles.
#'
#' `vis_textlogo` plots sequences in a text format - each letter has the same height. Useful when there
#' are no big differences between occurences of amino acids in the motif.
#'
#' `vis_seqlogo` is a traditional sequence logo plots. Useful when there are one or two amino acids
#' with clear differences in their occurrences.
#'
#' @param .data Output from the \code{kmer.profile} function.
#' @param .replace.zero.with.na if T then replace all zeros with NAs, therefore letters with
#' zero frequency wont appear at the plot.
#' @param .scheme Character. An argumentt passed to \link{geom_logo} specifying how to colour symbols.
#' @param .width Width for jitter, i.e., how much points will scatter around the verical line. Pass 0 (zero)
#' to plot points on the straight vertical line for each position.
#' @param ... Not used here.
#'
#' @return ggplot2 object
#'
#' @seealso \link{getKmers}, \link{kmer_profile}
#'
#' @examples
#' \dontrun{
#' data(immdata)
#' kmers = getKmers(immdata$data[[1]], 5)
#' ppm = kmer_profile(kmers, "prob")
#' vis(ppm, .plot="text")
#' vis(ppm, .plot="seq")
#'
#' d <- kmer_profile(c('CASLL', 'CASSQ', 'CASGL'))
#' vis_textlogo(d)
#' vis_seqlog(d)
#' }
#' @export
vis_textlogo <- function (.data, .replace.zero.with.na = T, .width = 0.1, ...) {
# ToDo: make different color schemas, for type of aminoacids (polarity, etc), etc
.data = reshape2::melt(.data)
if (.replace.zero.with.na) {
.data$value[.data$value == 0] = NA
}
colnames(.data) = c('AA', "Position", "Freq")
ggplot(aes(x = Position, y = Freq, colour = AA), data = .data) +
geom_jitter(colour = 'black', width=.width) +
ggrepel::geom_label_repel(aes(label = AA), size = 5) +
xlab("Position") + ylab("Proportion") +
theme_pubr(legend='none')
}
#' @export
vis_seqlogo <- function (.data, .scheme = "chemistry", ...) {
ggplot() +
ggseqlogo::geom_logo(.data, method="custom", col_scheme = .scheme) +
ggseqlogo::theme_logo()
}
#' Visualise kmer profiles
#'
#' @param .data Kmer data, an output from \link{kmer_profile}.
#' @param .plot String specifying the plot type:
#'
#' - "seqlogo" for traditional sequence logo plots using \link{vis_seqlogo};
#'
#' - "textlogo" for modified approach to sequence logo plots via text labels using \link{vis_textlogo};
#' @param ... Other arguments passed to \link{vis_textlogo} or \link{vis_seqlogo}, depending
#' on the ".plot" argument.
#'
#' @examples
#' \dontrun{
#' data(immdata)
#' getKmers(immdata$data[[1]], 5) %>% kmer_profile() %>% vis("seqlogo")
#' }
#'
#' @export
vis_immunr_kmer_profile_main <- function (.data, .plot, ...) {
if (.plot[1] == "text") {
vis_textlogo(.data, ...)
} else if (.plot[1] == "seq") {
vis_seqlogo(.data, ...)
} else if (.plot[1] == "textlogo") {
vis_textlogo(.data, ...)
} else if (.plot[1] == "seqlogo") {
vis_seqlogo(.data, ...)
} else {
stop('Error: unknown plot type. Please provide "seq" for seqlogo plots (vis_seqlogo) or "text" for textlogo plots (vis_textlogo) ')
}
}
#' @export
vis.immunr_kmer_profile_pfm <- function (.data, .plot = c("textlogo", "seqlogo"), ...) {
p = vis_immunr_kmer_profile_main(.data, .plot, ...)
p + ggtitle("Position frequency matrix")
}
#' @export
vis.immunr_kmer_profile_ppm <- function (.data, .plot = c("textlogo", "seqlogo"), ...) {
p = vis_immunr_kmer_profile_main(.data, .plot, ...)
p + ggtitle("Position probability matrix")
}
#' @export
vis.immunr_kmer_profile_pwm <- function (.data, .plot = c("textlogo", "seqlogo"), ...) {
p = vis_immunr_kmer_profile_main(.data, .plot, ...)
p + ggtitle("Position weight matrix")
}
#' @export
vis.immunr_kmer_profile_self <- function (.data, .plot = c("textlogo", "seqlogo"), ...) {
p = vis_immunr_kmer_profile_main(.data, .plot, ...)
p + ggtitle("Position self-information matrix")
}
##### Other & WIP visualisations #####
#' Repertoire dynamics
#'
#' @importFrom data.table setnames melt.data.table
#' @importFrom ggalluvial geom_flow geom_stratum
#'
#' @name vis.immunr_dynamics
#'
#' @param .data Output from the \link{trackClonotypes} function.
#' @param .plot Character. Either "smooth", "area" or "line". Each specifies a type of plot for visualisation of clonotype dynamics.
#' @param .order Numeric or character vector. Specifies the order to samples, e.g., it used for ordering samples
#' by timepoints. Either See "Examples" below for more details.
#' @param .log Logical. If TRUE then use log-scale for the frequency axis.
#' @param ... Not used here.
#'
#' @examples
#' \dontrun{
#' # Load an example data that comes with immunarch
#' data(immdata)
#'
#' # Option 1
#' # Choose the first 10 amino acid clonotype sequences
#' # from the first repertoire to track
#' tc = trackClonotypes(immdata$data, list(1, 10), .col = "aa")
#' # Choose the first 20 nucleotide clonotype sequences
#' # and their V genes from the "MS1" repertoire to track
#' tc = trackClonotypes(immdata$data, list("MS1", 20), .col = "nt+v")
#'
#' # Option 2
#' # Choose clonotypes with amino acid sequences "CASRGLITDTQYF" or "CSASRGSPNEQYF"
#' tc = trackClonotypes(immdata$data, c("CASRGLITDTQYF", "CSASRGSPNEQYF"), .col = "aa")
#'
#' # Option 3
#' # Choose the first 10 clonotypes from the first repertoire
#' # with amino acid sequences and V segments
#' target = immdata$data[[1]] %>% select(CDR3.aa, V.name) %>% head(10)
#' tc = trackClonotypes(immdata$data, target)
#'
#' # Visualise the output regardless of the chosen option
#' # Therea are three way to visualise it, regulated by the .plot argument
#' vis(tc, .plot = "smooth")
#' vis(tc, .plot = "area")
#' vis(tc, .plot = "line")
#'
#' # Visualising timepoints
#' # First, we create an additional column in the metadata with randomly choosen timepoints:
#' immdata$meta$Timepoint = sample(1:length(immdata$data))
#' immdata$meta
#' # Next, we create a vector with samples in the right order, according to the "Timepoint" column (from smallest to greatest):
#' sample_order = order(immdata$meta$Timepoint)
#' # Sanity check: timepoints are following the right order:
#' immdata$meta$Timepoint[sample_order]
#' # Samples, sorted by the timepoints:
#' immdata$meta$Sample[sample_order]
#' # And finally, we visualise the data:
#' vis(tc, .order = sample_order)
#' }
#'
#' @export
vis.immunr_dynamics <- function (.data, .plot = c("smooth", "area", "line"), .order = NA, .log = F, ...) {
.plot = .plot[1]
if (!(.plot %in% c("smooth", "area", "line"))) {
stop("Error: unknown plot identifier \"", .plot, "\". Please provide one of the following: \"smooth\", \"area\" or \"line\".")
}
y_lab_title = "Count"
melted = melt(.data) %>% lazy_dt() %>% rename(Count = value, Sample = variable) %>% collect()
setDT(melted)
if (max(melted[["Count"]]) <= 1) {
y_lab_title = "Proportion"
}
last_obj_column_i = match("Sample", names(melted)) - 1
column_names = names(melted)[1:last_obj_column_i]
melted[, Clonotype := do.call(paste, .SD), .SDcols = column_names]
if (!is.na(.order)) {
if (is.character(.order)) {
ordered_sample_names = .order
} else {
sample_names = unique(melted$Sample)
ordered_sample_names = sample_names[.order]
}
melted = melted[melted$Sample %in% ordered_sample_names, ]
melted$Sample = factor(melted$Sample, levels = ordered_sample_names, ordered = T)
}
p = ggplot(melted, aes(x = Sample, fill = Clonotype, stratum = Clonotype,
alluvium = Clonotype, y = Count, label = Clonotype))
if (.plot == "smooth") {
p = p +
geom_flow() +
geom_stratum()
} else if (.plot == "area") {
p = p +
geom_area(aes(group = Clonotype), color = "black")
} else {
p = p +
geom_line(aes(color = Clonotype, group = Clonotype))
}
if (.log) {
p = p + scale_y_log10()
}
p +
ylab(y_lab_title) +
ggtitle("Clonotype tracking") +
theme_pubr(legend = "right") + rotate_x_text(90) + theme_cleveland2()
}
# vis.immunr_mutation_network <- function (.data) {
# stop(IMMUNR_ERROR_NOT_IMPL)
# }
# vis.immunr_cdr_prop <- function (.data, .by = NA, .meta = NA, .plot = c("box", "hist")) {
# stop(IMMUNR_ERROR_NOT_IMPL)
# }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.