Nothing
############
# Pre-reqs #
############
globalVariables(c("Reference", "Comparison","Proportion","Change","value","Identity","PropCys"))
percent <- function(x, digits = 1, format = "f", ...) {
paste0(formatC(100 * x, format = format, digits = digits, ...), "%")
}
#########
# Plots #
#########
#' A heatmap plot of the column identities between two multiple sequence alignments
#'
#' @param x an object of type "pairwise alignment comparison" (typically the summary file generated by compare_alignments)
#' @param scale scale data to proportion of characters that are not conserved gaps (default = TRUE)
#' @param display display this plot (default = TRUE)
#'
#' @export
#' @examples
#' data(reference_alignment)
#' data(comparison_alignment)
#' PAC <- compare_alignments(reference_alignment,comparison_alignment)
#' plot_similarity_heatmap(PAC)
#'
#' @note This function displays the similarity between each pairwise column comparison for the reference and comparison MSAs. Colour density is determined by the proportion of identical character matches between the columns, normalised to the number of characters that are not merely conserved gaps. This gives a representation of which columns are well agreed upon by the MSAs, and which columns are split by one MSA relative to the other.
#'
plot_similarity_heatmap <- function(x,scale=TRUE,display=TRUE){
hm_data <- t(x$similarity_S)
if (scale){
hm_data <- t(x$similarity_S)/(1-x$results_R[2,]) # Similarity, excluding conserved gaps
}
md <- reshape2::melt(hm_data)
colnames(md) <- c('Reference','Comparison','value')
p <- ggplot2::ggplot(md) +
ggplot2::geom_tile(ggplot2::aes(x=Reference,y=Comparison,fill=value)) +
ggplot2::scale_fill_gradient("Similarity",low="white",high="black") +
ggplot2::labs(x = "Reference MSA column", y = "Comparison MSA column") +
ggplot2::scale_x_continuous(expand = c(0, 0)) +
ggplot2::scale_y_reverse (expand = c(0, 0)) +
ggplot2::theme(plot.background = ggplot2::element_rect(fill="white"),
panel.background = ggplot2::element_rect(fill="white"))
if (display){
print(p)
}
p
}
#' A heatmap plot of the dissimilarity matrix of two multiple sequence alignments
#'
#' @param x an object of type "pairwise alignment comparison" (typically the summary file generated by compare_alignments)
#' @param display display this plot (default = TRUE)
#'
#' @export
#' @examples
#' data(reference_alignment)
#' data(comparison_alignment)
#' PAC <- compare_alignments(reference_alignment,comparison_alignment)
#' plot_dissimilarity_matrix(PAC)
#'
#' @note This function displays the dissimilarity categories for all characters in the reference alignment. This gives a representation of which columns are well agreed upon by the MSAs, and which sequence regions of the reference alignment are split, merged, or shifted.
#'
plot_dissimilarity_matrix <- function(x,display=TRUE){
hm_data <- as.matrix(t(x$dissimilarity_simple))
hm_data <- gsub(hm_data, pattern = "M", replacement = "Match")
hm_data <- gsub(hm_data, pattern = "g", replacement = "Gap")
hm_data <- gsub(hm_data, pattern = "m", replacement = "Merge")
hm_data <- gsub(hm_data, pattern = "s", replacement = "Split")
hm_data <- gsub(hm_data, pattern = "x", replacement = "Shift")
md <- reshape2::melt(hm_data)
names <- rownames(x$reference_P)
colnames(md) <- c('Position','Sequence','Dissimilarity')
p <- ggplot2::ggplot(md) +
ggplot2::geom_tile(ggplot2::aes_string(x="Position",y="Sequence",fill="Dissimilarity")) +
ggplot2::scale_x_continuous(expand = c(0, 0)) +
ggplot2::scale_y_reverse(expand = c(0, 0),
labels=names,
breaks=1:length(names)) +
ggplot2::labs(x = "Reference MSA column") +
ggplot2::scale_fill_manual(values=c("white",
"black",
"darkred",
"firebrick2",
"pink")) +
ggplot2::theme(plot.background = ggplot2::element_rect(fill="white"),
panel.background = ggplot2::element_rect(fill="white"))
if (display){
print(p)
}
p
}
#' A line plot summary of column similarity between two multiple sequence alignments
#'
#' @param x an object of type "pairwise alignment comparison" (typically the summary file generated by compare_alignments)
#' @param scale scale data to proportion of characters that are not conserved gaps (default = TRUE)
#' @param CS additionally indicate columns with 100 percent identity using markers on the x-axis (default = FALSE)
#' @param cys additionally show the cysteine abundance for each column (default = FALSE)
#' @param display display this plot (default = TRUE)
#'
#' @export
#' @examples
#' data(reference_alignment)
#' data(comparison_alignment)
#' PAC <- compare_alignments(reference_alignment, comparison_alignment, CS=TRUE)
#' plot_similarity_summary(PAC, CS=TRUE, cys=TRUE)
#'
#' @note This function generates a plot that summarises the similarity between the two multiple sequence alignments for each column of the reference alignment. For each column, it plots the proportion of identical character matches as a proportion of the characters that are not merely conserved gaps. The overall average proportion of identical characters that are not conserved gaps is overlaid as a percentage. For alignments of cysteine-rich proteins, the cysteine abundance for each column may also be plotted to indicate columns containing conserved cysteines (`cys=TRUE`).
#'
plot_similarity_summary <- function(x, scale=TRUE, CS=FALSE, cys=FALSE, display=TRUE){
identity <- x$results_R[1,]
if (scale){
identity <- x$results_R[1,]/(1-x$results_R[2,]) # Similarity, excluding conserved gaps
}
proportion_cys <- 0.2*(x$cys)-0.2
if ( !is.na(x$column_score) ){
columnwise.CS <- x$column_score$columnwise.column.score==1
sum.CS <- x$column_score$column.score
} else {
columnwise.CS <- FALSE
sum.CS <- NA
}
score <- x$similarity_score
col <- 1:ncol(x$results)
plot_data <- data.frame(Identity=identity,columnwise.CS=columnwise.CS,PropCys=proportion_cys,Position=col)
p <- ggplot2::ggplot(plot_data,ggplot2::aes(x=Position)) +
ggplot2::geom_line(ggplot2::aes(y=identity,colour="Similarity")) +
ggplot2::labs(x = "Reference MSA column", y = "Proportion") +
ggplot2::scale_x_continuous(expand = c(0, 0)) +
ggplot2::scale_y_continuous(expand = c(0, 0),breaks=seq(0,1,1/10)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.title = ggplot2::element_text(face = "bold")) +
ggplot2::scale_colour_discrete(breaks=c("Similarity","Cysteines"),
name=paste("Score =",percent(score),
"\nCS score =",percent(sum.CS)))
if(cys) {
p <- p + ggplot2::geom_line(ggplot2::aes(y=PropCys,colour="Cysteines")) +
ggplot2::geom_line(ggplot2::aes(y=0)) +
ggplot2::geom_line(ggplot2::aes(y=0))
}
if(CS) {
cs_data <- plot_data[which(plot_data$columnwise.CS),]
if ( nrow(cs_data)>0){
p <- p + ggplot2::geom_point(data=cs_data, ggplot2::aes(y=0))
}
}
if (display){
print(p)
}
p
}
#' An area plot summary of the different causes of column dissimilarity between two multiple sequence alignments
#'
#' @param x an object of type "pairwise alignment comparison" (typically the summary file generated by compare_alignments)
#' @param scale scale data to proportion of characters that are not conserved gaps (default = TRUE)
#' @param stack stacked area plot in stead of line plot (default = TRUE)
#' @param display display this plot (default = TRUE)
#'
#' @export
#' @examples
#' data(reference_alignment)
#' data(comparison_alignment)
#' PAC <- compare_alignments(reference_alignment, comparison_alignment)
#' plot_dissimilarity_summary(PAC, stack=TRUE)
#'
#' @note This function generates a detailed breakdown of the differences between the multiple sequence alignments for each column of the reference alignment. For each column, the relative proportions of merges, splits and shifts is plotted as a proportion of characters that are not merely conserved gaps.
#'
plot_dissimilarity_summary <- function(x, scale=TRUE, stack=TRUE, display=TRUE){
plot_data <- data.frame(Merge=x$results_R[3,],
Shift=x$results_R[5,],
Split=x$results_R[4,],
Position=1:ncol(x$results))
if (scale){
plot_data <- data.frame(Merge=x$results_R[3,]/(1-x$results_R[2,]), # Dissimilarity, excluding conserved gaps
Shift=x$results_R[5,]/(1-x$results_R[2,]), # Dissimilarity, excluding conserved gaps
Split=x$results_R[4,]/(1-x$results_R[2,]), # Dissimilarity, excluding conserved gaps
Position=1:ncol(x$results))
}
md <- reshape2::melt(plot_data,id.vars='Position')
colnames(md) <- c('Position','Dissimilarity','Proportion')
if (stack) {
p <- ggplot2::ggplot(md,ggplot2::aes(x=Position,y=Proportion)) +
ggplot2::geom_area(ggplot2::aes_string(fill="Dissimilarity"),position='stack') +
ggplot2::geom_line(ggplot2::aes_string(data="Dissimilarity", ymax=1), position='stack') +
ggplot2::scale_x_continuous(expand = c(0, 0)) +
ggplot2::scale_y_continuous(expand = c(0, 0)) +
ggplot2::labs(x = "Reference MSA column") +
ggplot2::scale_fill_manual(values=c("darkred", "firebrick2", "pink")) +
ggplot2::theme_classic()
}
else {
p <- ggplot2::ggplot(md,ggplot2::aes(x=Position,y=Proportion)) +
ggplot2::geom_line(ggplot2::aes_string(color="Dissimilarity")) +
ggplot2::scale_x_continuous(expand = c(0, 0)) +
ggplot2::scale_y_continuous(expand = c(0, 0)) +
ggplot2::theme_classic()
}
if (display){
print(p)
}
p
}
#' A line plot summary of sum of pairs score between two multiple sequence alignments
#'
#' @param x an object of type "pairwise alignment comparison" (typically the summary file generated by compare_alignments)
#' @param CS indicate columns with 100 percent identity using markers on the x-axis (default = TRUE)
#' @param display display this plot (default = TRUE)
#'
#' @export
#' @examples
#' data(reference_alignment)
#' data(comparison_alignment)
#' PAC <- compare_alignments(reference_alignment, comparison_alignment, SP=TRUE)
#' plot_SP_summary(PAC)
#'
#' @note This function generates a plot that summarises the columnwise sums of pairs for the two multiple sequence alignments. For each column of the comparison alignment, it plots the proportion of conserved residue pairs as a proportion of the poassible residue pairs. The overall sum of pairs score, reverse sum of pairs score, and column score are also reported as percentages.
#'
plot_SP_summary <- function(x, CS=TRUE, display=TRUE){
columnwise.SPS <- x$sum_of_pairs$columnwise.SPS
columnwise.CS.y <- -0.05*(x$sum_of_pairs$columnwise.CS)
columnwise.CS <- x$sum_of_pairs$columnwise.CS
sum.SP <- x$sum_of_pairs$sum.of.pairs.score
sum.PS <- x$sum_of_pairs$reverse.sum.of.pairs.score
sum.CS <- x$sum_of_pairs$column.score
col <- 1:length(x$sum_of_pairs$columnwise.SPS)
plot_data <- data.frame(columnwise.SPS=columnwise.SPS,columnwise.CS=columnwise.CS,columnwise.CS.y=columnwise.CS.y,Position=col)
p <- ggplot2::ggplot(plot_data,ggplot2::aes(x=Position)) +
ggplot2::geom_line(ggplot2::aes(y=columnwise.SPS,colour="Sum of pairs score")) +
ggplot2::labs(x = "Comparison MSA column", y = "Proportion") +
ggplot2::scale_x_continuous(expand = c(0, 0)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.title = ggplot2::element_text(face = "bold")) +
ggplot2::scale_colour_discrete(breaks=c("Sum of pairs score","Column score"),
name=paste("SP score =", percent(sum.SP),
"\nPS score =", percent(sum.PS),
"\nCS score =", percent(sum.CS)))
if(CS) {
cs_data <- plot_data[which(plot_data$columnwise.CS),]
p <- p + ggplot2::geom_point(data=cs_data, ggplot2::aes(y=0))
}
if (display){
print(p)
}
p
}
#########
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.