Nothing
#'V(D)J gene usage barplots
#'
#' @description Produces a barplot with the most frequently used IgH and IgK/L Vgenes.
#' @param VDJ Either (for platypus version "v2") output from VDJ_analyze function. This should be a list of clonotype dataframes, with each list element corresponding to a single VDJ repertoire, OR (for platypus version "v3") the the VDJ matrix output of the VDJ_build() function.
#' @param group.by Character. Defaults to "sample_id". Column name of VDJ to group plot by.
#' @param HC.gene.number Numeric value indicating the top genes to be dispayed. If this number is higher than the total number of unique HC V genes in the VDJ repertoire, then this number is equal to the number of unique HC V genes.
#' @param LC.Vgene Logical indicating whether to make a barplot of the LC V genes distribution. Default is set to FALSE.
#' @param LC.gene.number Numeric value indicating the top genes to be dispayed. If this number is higher than the total number of unique LC V genes in the VDJ repertoire, then this number is equal to the number of unique LC V genes.
#' @param platypus.version Character. Defaults to "v3". Can be "v2" or "v3" dependent on the input format
#' @return Returns a list of ggplot objects which show the distribution of IgH and IgK/L V genes for the most used V genes.
#' @param is.bulk logical value indicating whether the VDJ input was generated from bulk-sequencing data using the bulk_to_vgm function. If is.bulk = T, the VDJ_Vgene_usage_barplot function is compatible for use with bulk data. Defaults to False (F).
#' @export
#' @examples
#' VDJ_Vgene_usage_barplot(VDJ = Platypus::small_vdj,
#' HC.gene.number = 2, platypus.version = "v3")
#'
VDJ_Vgene_usage_barplot <- function(VDJ,
group.by,
HC.gene.number,
LC.Vgene,
LC.gene.number,
platypus.version,
is.bulk){
Vgene <- NULL
Percentage <- NULL
VDJ_chain_count <- NULL
VJ_chain_count <- NULL
sample_id <- NULL
HC_Vgene_usage <- list()
HC_Vgene_usage_plot <- list()
LC_Vgene_usage <- list()
LC_Vgene_usage_plot <- list()
if(missing(is.bulk)) is.bulk <- FALSE
if(missing(LC.Vgene)) LC.Vgene <- FALSE
if(missing(platypus.version)) platypus.version <- "v3"
if(missing(HC.gene.number)) HC.gene.number <- 10
#naming compatibility
VDJ.matrix <- VDJ
VDJ <- NULL
if(platypus.version == "v2"){
clonotype.list <- VDJ.matrix
for (i in 1:length(clonotype.list)){
HC_Vgene_usage[[i]] <- as.data.frame(table(clonotype.list[[i]]$HC_vgene))
colnames(HC_Vgene_usage[[i]]) <- c("Vgene", "Frequency")
for (j in 1:nrow(HC_Vgene_usage[[i]])){
HC_Vgene_usage[[i]]$Percentage[j] <- HC_Vgene_usage[[i]]$Frequency[j]/sum(HC_Vgene_usage[[i]]$Frequency)*100
}
actual.gene.number <- HC.gene.number
if(HC.gene.number > nrow(HC_Vgene_usage[[i]])){
actual.gene.number <- nrow(HC_Vgene_usage[[i]])
}
#Rank based on most used V gene
ranks <- order(-HC_Vgene_usage[[i]]$Frequency)
HC_Vgene_usage[[i]] <- HC_Vgene_usage[[i]][ranks,]
HC_Vgene_usage_plot[[i]] <- ggplot2::ggplot(HC_Vgene_usage[[i]][1:actual.gene.number,], ggplot2::aes(x=Vgene, y=Percentage, fill=Vgene)) + ggplot2::geom_bar(stat="identity", size=0.5, width=0.6, color="black", show.legend = FALSE) + cowplot::theme_cowplot() + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)) + ggplot2::ylab("% of unique clones") + ggplot2::xlab("") + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) + ggplot2::scale_y_continuous(expand = c(0,0)) + ggplot2::scale_color_gradient(low="blue", high="red") + ggplot2::ggtitle(paste0("sample ",i)) + ggplot2::theme(text = ggplot2::element_text(size = 12))
}
# }
if(LC.Vgene==TRUE){
for (i in 1:length(clonotype.list)){
LC_Vgene_usage[[i]] <- as.data.frame(table(clonotype.list[[i]]$LC_vgene))
colnames(LC_Vgene_usage[[i]]) <- c("Vgene", "Frequency")
for (j in 1:nrow(LC_Vgene_usage[[i]])){
LC_Vgene_usage[[i]]$Percentage[j] <- LC_Vgene_usage[[i]]$Frequency[j]/sum(LC_Vgene_usage[[i]]$Frequency)*100
}
actual.gene.number <- LC.gene.number
if(LC.gene.number > nrow(LC_Vgene_usage[[i]])){
actual.gene.number <- nrow(LC_Vgene_usage[[i]])
}
#Rank based on most used V gene
ranks <- order(-LC_Vgene_usage[[i]]$Frequency)
LC_Vgene_usage[[i]] <- LC_Vgene_usage[[i]][ranks,]
LC_Vgene_usage_plot[[i]] <- ggplot2::ggplot(LC_Vgene_usage[[i]][1:actual.gene.number,], ggplot2::aes(x=Vgene, y=Percentage, fill=Vgene)) + ggplot2::geom_bar(stat="identity", size=0.5, width=0.6, color="black", show.legend = FALSE) + cowplot::theme_cowplot() + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)) + ggplot2::ylab("% of unique clones") + ggplot2::xlab("") + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) + ggplot2::scale_y_continuous(expand = c(0,0)) + ggplot2::scale_color_gradient(low="blue", high="red") + ggplot2::ggtitle(paste0("sample ",i)) + ggplot2::theme(text = ggplot2::element_text(size = 12))
}
}
Vgene_usage_plot <- do.call(c, list(HC_Vgene_usage_plot, LC_Vgene_usage_plot))
return(Vgene_usage_plot)
} else if(platypus.version == "v3"){
if(is.bulk == FALSE){
#filtering for max 1VDJ 1VJ chain
VDJ.matrix <- subset(VDJ.matrix, VDJ_chain_count == 1 & VJ_chain_count == 1)
}
if(missing(group.by)) group.by <- "sample_id"
if(group.by != "sample_id"){
if(group.by %in% names(VDJ.matrix)){
VDJ.matrix$sample_id <- as.character(VDJ.matrix[,group.by])
if(any(is.na(VDJ.matrix$sample_id)) == TRUE){
VDJ.matrix <- VDJ.matrix[!is.na(VDJ.matrix$sample_id),]
warning(paste0("Filtered out cells with 'NA' in grouping column"))
}
message(paste0("Grouping by: ", group.by))
} else {
warning(paste0("Group_id '",group.by, "' was not found in VDJ. Grouping by 'sample_id'"))}
}
clonotype.list <- list()
for(i in 1:length(unique(VDJ.matrix$sample_id))){
clonotype.list[[i]] <- subset(VDJ.matrix, sample_id == unique(VDJ.matrix$sample_id)[i])
#removing extra cells cells to leave only 1 per clonotype
clonotype.list[[i]] <- clonotype.list[[i]][duplicated(clonotype.list[[i]]$clonotype_id) == FALSE,]
}
names(clonotype.list) <- unique(VDJ.matrix$sample_id)
message(paste0("Sample order: ", paste0(unique(VDJ.matrix$sample_id), collapse = " ; ")))
for (i in 1:length(clonotype.list)){
HC_Vgene_usage[[i]] <- as.data.frame(table(clonotype.list[[i]]$VDJ_vgene))
colnames(HC_Vgene_usage[[i]]) <- c("Vgene", "Frequency")
for (j in 1:nrow(HC_Vgene_usage[[i]])){
HC_Vgene_usage[[i]]$Percentage[j] <- HC_Vgene_usage[[i]]$Frequency[j]/sum(HC_Vgene_usage[[i]]$Frequency)*100
}
actual.gene.number <- HC.gene.number
if(HC.gene.number > nrow(HC_Vgene_usage[[i]])){
actual.gene.number <- nrow(HC_Vgene_usage[[i]])
}
#Rank based on most used V gene
ranks <- order(-HC_Vgene_usage[[i]]$Frequency)
HC_Vgene_usage[[i]] <- HC_Vgene_usage[[i]][ranks,]
HC_Vgene_usage_plot[[i]] <- ggplot2::ggplot(HC_Vgene_usage[[i]][1:actual.gene.number,], ggplot2::aes(x=Vgene, y=Percentage, fill=Vgene)) + ggplot2::geom_bar(stat="identity", size=0.5, width=0.6, color="black", show.legend = FALSE) + cowplot::theme_cowplot() + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)) + ggplot2::ylab("% of unique clones") + ggplot2::xlab("") + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) + ggplot2::scale_y_continuous(expand = c(0,0)) + ggplot2::scale_color_gradient(low="blue", high="red") + ggplot2::ggtitle(paste0("sample ",names(clonotype.list)[i])) + ggplot2::theme(text = ggplot2::element_text(size = 12))
}
# }
if(LC.Vgene==TRUE){
for (i in 1:length(clonotype.list)){
LC_Vgene_usage[[i]] <- as.data.frame(table(clonotype.list[[i]]$VJ_vgene))
colnames(LC_Vgene_usage[[i]]) <- c("Vgene", "Frequency")
for (j in 1:nrow(LC_Vgene_usage[[i]])){
LC_Vgene_usage[[i]]$Percentage[j] <- LC_Vgene_usage[[i]]$Frequency[j]/sum(LC_Vgene_usage[[i]]$Frequency)*100
}
actual.gene.number <- LC.gene.number
if(LC.gene.number > nrow(LC_Vgene_usage[[i]])){
actual.gene.number <- nrow(LC_Vgene_usage[[i]])
}
#Rank based on most used V gene
ranks <- order(-LC_Vgene_usage[[i]]$Frequency)
LC_Vgene_usage[[i]] <- LC_Vgene_usage[[i]][ranks,]
LC_Vgene_usage_plot[[i]] <- ggplot2::ggplot(LC_Vgene_usage[[i]][1:actual.gene.number,], ggplot2::aes(x=Vgene, y=Percentage, fill=Vgene)) + ggplot2::geom_bar(stat="identity", size=0.5, width=0.6, color="black", show.legend = FALSE) + cowplot::theme_cowplot() + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)) + ggplot2::ylab("% of unique clones") + ggplot2::xlab("") + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) + ggplot2::scale_y_continuous(expand = c(0,0)) + ggplot2::scale_color_gradient(low="blue", high="red") + ggplot2::ggtitle(paste0("sample ", names(clonotype.list)[i])) + ggplot2::theme(text = ggplot2::element_text(size = 12))
}
}
Vgene_usage_plot <- do.call(c, list(HC_Vgene_usage_plot, LC_Vgene_usage_plot))
return(Vgene_usage_plot)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.