########################################################################################################
#' Graphical output of alleles division by genotypes
#'
#' The \code{genoHeatmap} function generates a graphical output of the alleles per gene in multiple samples.
#'
#'
#' @param geno_table genoytpe summary table. See details.
#' @param chain the IG chain: IGH,IGK,IGL. Default is IGH.
#' @param gene_sort if by 'name' the genes in the output are ordered lexicographically,
#' if by 'position' only functional genes are used and are ordered by their chromosomal location. Default is 'position'.
#' @param removeIGH if TRUE, 'IGH'\'IGK'\'IGL' prefix is removed from gene names.
#' @param lk_cutoff the lK cutoff value to be considerd low for texture layer. Defualt is lK<1.
#' @param mark_low_lk if TRUE, a texture is add for low lK values. Defualt is TRUE.
#' @param color_y named list of the colors for y axis labels. Defualt is NULL.
#' @param file file path for rendering the plot to pdf. If non is supplied than the plot is retured as object. Defualt is NULL.
#' @return
#'
#' The following list is returned:
#' \itemize{
#' \item \code{'p'}: heat-map visualization of the genotype inference for multiple samples.
#' \item \code{'width'}: Optimal width value for rendering plot.
#' \item \code{'height'}: Optimal width value for rendering plot.
#' }
#'
#' When a file is supplied the graph is also rendered directly to pdf.
#'
#' @details
#'
#' A \code{data.frame} created by \code{inferGenotypeBaysian}.
#'
#'
#' @export
genoHeatmap <- function(geno_table, chain = c("IGH", "IGK", "IGL"), gene_sort = "position", removeIGH = TRUE, lk_cutoff = 1, mark_low_lk = TRUE, html = FALSE, n_line = 4, line_length=60, pseudo_genes = FALSE, ORF_genes = FALSE, file = NULL, color_y = NULL) {
if (missing(chain)) {
chain = "IGH"
}
chain <- match.arg(chain)
lk_cutoff = as.numeric(lk_cutoff)
# select columns
geno_db <- geno_table[,c("subject", "gene", "GENOTYPED_ALLELES", "k_diff", "Freq_by_Clone")]
# rename the columns
names(geno_db)[3:4] <- c("ALLELES", "K")
# correct deletion annotations
geno_db$ALLELES <- gsub("Deletion","Del",geno_db$ALLELES)
# set data.table and correct missing Unk annotations and K
geno_db <- setDT(geno_db)[CJ("subject" = geno_db$subject, "gene" = geno_db$gene, unique=TRUE), on=c("subject", "gene")]
geno_db[is.na(geno_db$ALLELES) , c("ALLELES","K") := list("Unk", NA_integer_)]
# set K value for deleted genes
geno_db$K[grep("Del",geno_db$ALLELES)] <- NA_integer_
# expand row, one allele per row
geno_db <- splitstackshape::cSplit(geno_db, "ALLELES", sep = ",", direction = "long", fixed = T, type.convert = F)
# add pseudo genes and orf to color base
color_pes_orf <- c()
GENE.loc.tmp <- GENE.loc[[chain]]
if(pseudo_genes){
color_pes_orf <- c(grep("V",PSEUDO[[chain]],value = T),color_pes_orf)
}else{
GENE.loc.tmp <- c(unique(grep("V",PSEUDO[[chain]],value = T))[! grep("V",PSEUDO[[chain]],value = T) %in% GENE.loc.tmp],GENE.loc.tmp)
}
if(ORF_genes){
color_pes_orf <- c(unique(grep("OR|NL", geno_db$gene,value = T)),color_pes_orf)
}else{
GENE.loc.tmp <- c(unique(grep("OR|NL", geno_db$gene,value = T))[! unique(grep("OR|NL", geno_db$gene,value = T)) %in% GENE.loc.tmp],GENE.loc.tmp)
}
# sort the data, remove pseudo and orf if needed
geno_db <- sortDFByGene(DATA = geno_db, chain = chain, method = gene_sort, removeIGH = removeIGH, geno = T,
peseudo_remove = pseudo_genes, ORF_remove = ORF_genes)
geno_db$gene <- factor(geno_db$gene, levels = gsub("IG[H|K|L]", "", GENE.loc.tmp))
# rename genes to numbers
gene_loc <- 1:length(unique(geno_db$gene)[order(match(unique(geno_db$gene), levels(geno_db$gene)))])
names(gene_loc) <- unique(geno_db$gene)[order(match(unique(geno_db$gene), levels(geno_db$gene)))]
geno_db$GENE_LOC <- gene_loc[as.character(geno_db$gene)]
######sort the heatmap for plotting
geno_db_m <- geno_db[, n:= .N, by = c("subject", "gene")][] # count number of alleles for group
geno_db_m$ALLELES_G <- geno_db_m$ALLELES # for grouping
geno_db_m$text <- ''
geno_db_m$text_bottom <- geno_db_m$ALLELES
# change ambiguous alleles call
id_nra <- grepl("[0-9][0-9]_[0-9][0-9]", geno_db_m$ALLELES)
nra <- F
if (any(id_nra)) {
# number ambiguous alleles
num_text <- paste0('[*',1:length(unique(geno_db_m$ALLELES[id_nra])),']')
names(num_text) <- unique(geno_db_m$ALLELES[id_nra])
# text for plot
geno_db_m$text[id_nra] <- num_text[geno_db_m$ALLELES[id_nra]]
# text for legend
geno_db_m$text_bottom[id_nra] <- paste(num_text[geno_db_m$ALLELES[id_nra]],geno_db_m$ALLELES[id_nra])
# change allele to NRA - non reliable allele
geno_db_m$ALLELES[id_nra] <- "NRA"
# indicates that nra exists
nra <- T
}
# create allele palette
allele_palette <- allelePalette(geno_db_m$ALLELES)
# sort novel allele calls for plot
val_novel <- grep('^[0-9]+[_][A-Z][0-9]+[A-Z]',geno_db_m$ALLELES, value = T)
novel <- F
novel_allele_text <- c()
novel_symbol <- "\u005E"
if(length(val_novel)!=0){
# sort the palettle colors for novel alleles
id <- grep('^[0-9]+[_][A-Z][0-9]+[A-Z]',names(allele_palette$transper))
allele_palette$transper[id] <- 1
# cerate code index for novel allele
code_allele <- paste0(novel_symbol,1:length(id))
names(code_allele) <-allele_palette$AlleleCol[id]
new_allele <- paste0(novel_symbol,1:length(id),'-',allele_palette$AlleleCol[id])
names(new_allele) <-allele_palette$AlleleCol[id]
# change the text for plot
ids <- geno_db_m$ALLELES %fin% names(new_allele)
rep <- new_allele[geno_db_m$ALLELES[ids]]
rep2 <- code_allele[geno_db_m$ALLELES[ids]]
# add new allele code to data
geno_db_m[ids, c("ALLELES","text_bottom","text") := list(rep,rep,rep2)]
# change annotation in legend colors
allele_palette$AlleleCol[id] <- new_allele
names(allele_palette$transper)[id] <- new_allele
# indicates that novel exists
novel <- T
}
geno_db_m$ALLELES <- factor(geno_db_m$ALLELES, levels = allele_palette$AlleleCol)
# samples names and number
samples <- unique(geno_table$subject)
samples_n <- length(samples)
# genes names and number
genes <- unique(geno_db_m$gene)
genes_n <- length(genes)
# order the data by gene loc
setorderv(geno_db_m, c("subject","GENE_LOC"))
setkey(geno_db_m, "subject")
# sort data for matrix
geno_db_m[,"line":=12/geno_db_m$n]
allele_code <- 1:length(allele_palette$AlleleCol)
names(allele_code) <- gsub("\\^[0-9]+[-]","",allele_palette$AlleleCol)
# sort the alleles in gene box
geno_db_m[,"A_CODE":=allele_code[geno_db_m$ALLELES]+1]
geno_db_m[grep("[0-9]_[0-9]",geno_db_m$ALLELES,perl = T), "A_CODE":=allele_code["NRA"]]
setorderv(geno_db_m, c("subject","GENE_LOC","A_CODE"))
# duplicate the data by 12 box to gene
geno_db_m[,"id" := 1:.N, by = c("subject", "gene")]
geno_db_f = geno_db_m[,c("n_line" = 1:get("line")), by = c("subject", "gene", "GENE_LOC", "ALLELES_G", "A_CODE", "text_bottom"), nomatch = 0]
# transform allele codes to matrix, 12 box for each gene. each row is an individual
m <- matrix(geno_db_f[[5]],ncol = 12*genes_n,byrow = T,dimnames = list(unique(geno_db_f[[1]]),geno_db_f[[2]][1:(12*genes_n)]))
#### sort legend
# fit width of legend column text by the longest allele
longest_allele <- max(nchar(allele_palette$AlleleCol))*3+40
# legend length
leg_length <- next_divisor(longest_allele,genes_n*12)
# create legend values for matrix
leg <- function(x,allele_text_length) c(rep(x,4),rep(0,allele_text_length))
seqs <- lapply(allele_code+1,leg, allele_text_length = leg_length-4)
# add values for legend to complete the matrix, white boxes
add = ceiling(length(unlist(seqs))/(genes_n*12))*(genes_n*12) - length(unlist(seqs))
add = ifelse(add<0,0,add)
# legend matrix
m2 <- matrix(c(unlist(seqs),rep(0,add)), ncol = genes_n*12, byrow = T)
# start and end values for plot parts
start <- c(0.01)
end <- c(0.15,0.13)
size = 1
short_reads_rows = 0
## add short read text annotation at the bottom
if(nra){
bottom_annot <- unique(grep("[0-9][0-9]_[0-9][0-9]", geno_db_m$text_bottom , value = T, perl = T))
# Create text for annotating the short reads labels
annot <- splitlines(bottom_annot, genes_n*12/(4*size))
annot <- annot[!is.na(dplyr::na_if(annot,""))]
short_reads_rows = length(annot)
annot <- paste0(annot,collapse = '\n')
# add the start and value for the third part
end <- c(0.2, 0.22, 0.06)
start <- c(0.03,0.01)
}
# set the height and width of plot
height <- samples_n * 0.1 + 5 + nrow(m2)*0.2 + short_reads_rows*0.4 # number of samples, number of rows in legend, number of rows in bottom annotation
width <- genes_n * 0.3 + 1.5 # numer of genes
size_text = nrow(m)/(height*width)+1 # text size for heatmap annoations
size_text_leg = ncol(m2)/(width*longest_allele)+1 # text size for legend annotations
if(!is.null(file)){
pdf(file,onefile = F, width = width, height = height, family = "serif")
print('pdf')
}
# plot layout
layout.matrix <- matrix(c(1, 2, 3), nrow = 3, ncol = 1)
graphics::layout(mat = layout.matrix,
heights = c(3, 0.5, 0.3) # Heights of the three rows
)
# heatmap main plot
par(mar=c(2,6,8,6))
image(t(m),col = names(allele_palette$AlleleCol), breaks = 1:(length(allele_palette$AlleleCol)+1),axes=F)
# add grid lines for genes
grid(lwd=1,nx = genes_n,ny=0,col = "white",lty = 1)
# add axis annotations
axis(3,(0:(genes_n-1))/genes_n+6/(12*genes_n),names(gene_loc),las=3) # top
axis(1,(0:(genes_n-1))/genes_n+6/(12*genes_n),names(gene_loc),las=3) # bottom
# color y tick labels if supplied
colors <- "black"
if(!is.null(color_y)) colors <- color_y[rownames(m)]
axis_loc_div <- ifelse(samples_n==1, samples_n, samples_n-1)
Map(axis, side=2, at=(0:(samples_n-1))/(axis_loc_div), col.axis=colors, labels=rownames(m), lwd=0, las=1, cex.axis=0.8) #left
axis(2,at=(0:(samples_n-1))/(axis_loc_div),labels=FALSE)
# draw lines for low lk values
sub_geno = geno_db_m[geno_db_m$K<lk_cutoff,]
NR = samples_n
NC = genes_n*12
if(nrow(sub_geno)>0){
apply(sub_geno, 1,function(x){
I = which(x["subject"]==samples)-1 # row index
J = (as.numeric(x["GENE_LOC"])-1)*12 # column index
draw_segment(NR,NC,I,J,lwd=1,col="white")}
)}
# ad text annotations
ids_text <- grep('^[0-9]|Del|Unk',geno_db_m$text_bottom,invert = T)
if(length(ids_text)>0){
sub_geno = geno_db_m[ids_text,]
NR = samples_n
NC = genes_n*12
apply(sub_geno, 1,function(x){
I = which(x["subject"]==samples)-1 # row index
J = (as.numeric(x["GENE_LOC"])-1)*12 # column index
ALLELE = as.numeric(x["id"]) # allele index
N_ALLELES = as.numeric(x["n"]) # number of alleles
TEXT = x["text"] # text
Write_text(NR,NC,I,J,ALLELE,N_ALLELES,TEXT,cex=size_text)}
)}
# legend plot
par(mar=c(0,6,4,6))
image(t(m2),col = names(allele_palette$AlleleCol), breaks = 1:(length(allele_palette$AlleleCol)+1),axes=F)
# add grid lines for each row
grid(lwd=1,ny = nrow(m2), nx = 0,col = "black",lty = 2)
# add text anotation for legend
NR = nrow(m2)
NC = genes_n*12
names(allele_code) <- allele_palette$AlleleCol
invisible(tapply(allele_code,names(allele_code),function(x){
ii = which(m2==x+1,arr.ind = T)[1,]
I = ii[[1]]-1 # row index
J = ii[[2]] # column index
ALLELE = 1 # allele index
N_ALLELES = 1 # number of alleles
TEXT = names(x) # text
STEP_X<-1/(NC-1)
STEP_Y<-ifelse(1/(NR-1)==Inf,0,1/(NR-1))
text(STEP_X*J+STEP_X*leg_length*0.5,
STEP_Y*I,
TEXT,cex = size_text_leg)
}
))
# bottom text for nra, only if exists
if(nra){
# add the text at the bottom
par(mar=c(1,6,1,6))
gplots::textplot(annot, halign = "center", cex = size)
}
if(!is.null(file)) {
dev.off()
# embed the fonts to file
embedFonts(file)
} else{
p1.base <- recordPlot()
invisible(dev.off())
return(list(p = p1.base, width = width, height = height))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.