Nothing
# Region definition extension for including FWR2-4 and CDR2-3
#' @include Shazam.R
NULL
#### Extend region definition to CDR3 and FWR4 ####
#' Build a data.frame from a ChangeoClone and an igraph object containing a clonal lineage
#'
#' \code{makeGraphDf} creates a data.frame from a \link[alakazam]{ChangeoClone} and an
#' igraph \code{graph} object containing a B cell lineage tree and associated sequence data.
#' The data.frame contains the original fields and additions such as each sequence's parent in the
#' lineage tree, the lineage germline, and additional rows for inferred sequences.
#'
#' @param curCloneGraph an igraph \code{graph} object for the lineage tree generated by
#' \link[alakazam]{buildPhylipLineage}. Note that the field containing the
#' nucleotide sequence in the object must be named \code{sequence}.
#' @param curCloneObj \link[alakazam]{ChangeoClone} object used to generate the lineage.
#' @param objSeqId name of the sequence identifier field in \code{curCloneObj}.
#' @param objSeq name of the nucleotide sequence field in \code{curCloneObj}.
#'
#' @return A \code{data.frame} with sequence and lineage information, including the
#' the parent nucleotide sequence in the lineage tree(\code{parent_sequence}),
#' an internal parent identifier (\code{parent}), and additional rows for germline
#' sequence and inferred intermediate sequences.
#'
#' Values in the \code{sequence_id} field are renamed to numeric values,
#' prefixed with the clonal grouping identifier and labeled as either \code{"Inferred"}
#' or \code{"Germline"} if they are not an observed sequence. For example, for a lineage
#' with \code{clone_id = 34} the new identifiers would be of the form:
#' \code{"34_Germline"}, \code{"34_Inferred1"}, \code{"34_1"}, \code{"34_2"}, etc.
#'
#' Note that the original sequence identifier is preserved in the \code{orig_sequence_id} field
#' and the original parent sequence identifier is retained in \code{orig_parent}.
#'
#' @seealso See \link{observedMutations} to calculate mutation frequencies using
#' \code{parent_sequence} as the reference germline. See \link[alakazam]{ChangeoClone},
#' \link[alakazam]{buildPhylipLineage}, and \link[igraph]{graph} for details on the
#' input objects.
#'
#' @examples
#' # Load and subset example data
#' data(ExampleDb, package = "alakazam")
#' data(ExampleTrees, package = "alakazam")
#' graph <- ExampleTrees[[17]]
#' db <- subset(ExampleDb, clone_id == graph$clone)
#' clone <- alakazam::makeChangeoClone(db)
#'
#' # Extend data with lineage information
#' df <- makeGraphDf(graph, clone)
#'
#' @export
makeGraphDf <- function(curCloneGraph, curCloneObj, objSeqId="sequence_id", objSeq="sequence") {
# extracting the cur_clone_num from the inputs to function:
cur_clone_num <- curCloneObj@clone
# generating a data frame from the clone igraph object
# (- for getting the inferred sequences from the graph,
# and the parent information):
curCloneGraph_df <- summarizeSubtrees(curCloneGraph, fields="sequence")
# merging the db from clone object and from graph:
cur_clone_merged_df <- merge(x=curCloneObj@data, y=curCloneGraph_df,
by.x=objSeqId, by.y="name", all=T)
# Renaming sequence_id column to orig_sequence_id, and renaming parent
# to orig_parent:
cur_clone_merged_df$orig_sequence_id <- cur_clone_merged_df[,objSeqId]
cur_clone_merged_df$orig_parent <- cur_clone_merged_df$parent
# uniquifying some values in merged data frame, and filling some
# missing values:
#1. Replacing inferred sequences names with a unique name
# (using the clone number).
# Doing so for both sequence_id and parent and graph vertices
cur_clone_merged_df$parent <- gsub(pattern="Inferred",
x=cur_clone_merged_df$parent,
replacement=paste("Inferred_",
cur_clone_num, "_",
sep=""))
cur_clone_merged_df[,objSeqId] <- gsub(pattern="Inferred",
x=cur_clone_merged_df[,objSeqId],
replacement=paste("Inferred_",
cur_clone_num,
"_", sep=""))
V(curCloneGraph)$label <- gsub(pattern="Inferred",
x=V(curCloneGraph)$label,
replacement=paste("Inferred_", cur_clone_num,
"_", sep=""))
#2. Replacing Germline sequence name with a unique name
# (using the clone number).
# Doing so for both sequence_id and parent and graph vertices:
cur_clone_merged_df$parent <- gsub(pattern="Germline",
x=cur_clone_merged_df$parent,
replacement=paste("Germline_",
cur_clone_num, sep=""))
cur_clone_merged_df$sequence_id <- gsub(pattern="Germline",
x=cur_clone_merged_df[,objSeqId],
replacement=paste("Germline_",
cur_clone_num,
sep=""))
V(curCloneGraph)$label <- gsub(pattern="Germline",
x=V(curCloneGraph)$label,
replacement= paste("Germline_",
cur_clone_num, sep=""))
#3. Now need to fill in missing values for germline sequence and
# inffered sequences:
cur_clone_merged_df$clone <- cur_clone_num
cur_clone_merged_df$v_call <- curCloneObj@v_gene
cur_clone_merged_df$j_call <- curCloneObj@j_gene
cur_clone_merged_df$junction_length <- curCloneObj@junc_len
cur_clone_merged_df$germline_imgt <- curCloneObj@germline
#4. setting a new sequence_id column with following format:
#<CLONE_NUM>_<SEQUENCE_SERIAL_NUM>
#Except for Germline and Inferred names which will remain as is.
cur_clone_merged_df[,objSeqId] <- paste(cur_clone_num, "_",
c(1:length(cur_clone_merged_df$orig_sequence_id)),
sep="")
cur_clone_merged_df[,objSeqId] <- ifelse(grepl("Germline|Inferred",
cur_clone_merged_df$orig_sequence_id),
paste(cur_clone_num, "_",
cur_clone_merged_df$orig_sequence_id,
sep=""),
cur_clone_merged_df[,objSeqId])
#5. Doing the same for parent column:
# setting a new parent column with following format:
# <CLONE_NUM>_<SEQUENCE_SERIAL_NUM>
#Except for Germline and Inferred names which will remain as is.
cur_clone_merged_df$parent <- cur_clone_merged_df[match(cur_clone_merged_df$orig_parent,
cur_clone_merged_df$orig_sequence_id),
objSeqId]
#6. There are 2 sequence columns: one from curCloneGraph_df (names "sequence")
# and one from curCloneObj@data (called per argument objSeq).
# so first checking if objSeq=="sequence", and taking care accordingly:
# Removing the sequence column that came from curCloneObj@data as it does not
# include sequences of germline and inferred.
# Setting the "sequence" column to be named "sequence"
if (objSeq=="sequence") {
cur_clone_merged_df <- cur_clone_merged_df %>% select(-!!rlang::sym("sequence.x"))
cur_clone_merged_df <- rename(cur_clone_merged_df, sequence=!!rlang::sym("sequence.y"))
} else {
cur_clone_merged_df1<-cur_clone_merged_df1[!(names(cur_clone_merged_df1) %in% c(objSeq))]
}
#7. Adding the parent sequence as a new column:
cur_clone_merged_df$parent_sequence <- cur_clone_merged_df[match(cur_clone_merged_df$parent,
cur_clone_merged_df[,objSeqId]),
objSeq]
# filling the parent sequece of the Germline sequence to be its own sequence
# (=GERMLINE_IMGT):
#cur_clone_merged_df <- mutate(cur_clone_merged_df,
# parent_sequence=ifelse(is.na(parent_sequence),
# GERMLINE_IMGT,
# parent_sequence))
# now checking if the germline sequence is equal to its (only) child sequence.
# For example if "250_7" sequence parent is the "250_Germline" sequence,
# then mergin them to one line called "250_7_Germline".
germ_seq_line <- filter(cur_clone_merged_df, !!rlang::sym("orig_sequence_id") == "Germline")
germ_seq <- germ_seq_line[,objSeq]
germ_son_seq_line <- filter(cur_clone_merged_df, !!rlang::sym("orig_parent") == "Germline")
germ_son_seq <- germ_son_seq_line[,objSeq]
if (seqDist(germ_seq, germ_son_seq) == 0) {
# removing from db the line of the germline:
cur_clone_merged_df <- filter(cur_clone_merged_df, !!rlang::sym("orig_sequence_id") != "Germline")
# renaming the sequence id of the germline son - to include "Germline"
# in its name:
cur_clone_merged_df[,objSeqId]<-ifelse(cur_clone_merged_df[,"orig_parent"] == "Germline",
paste(cur_clone_merged_df[,objSeqId], "_", "Germline", sep=""),
cur_clone_merged_df[, objSeqId])
# Change the parent SEQUENCE to be NA (as it is the Germline)
cur_clone_merged_df <- mutate(cur_clone_merged_df,
parent=ifelse(!!rlang::sym("orig_parent") == "Germline",
"NA",
!!rlang::sym("parent")))
}
return(cur_clone_merged_df)
}
#' Build a RegionDefinition object that includes CDR3 and FWR4.
#'
#' \code{setRegionBoundaries} takes as input a junction length and an IMGT-numbered sequence
#' and outputs a custom \code{RegionDefinition} object that includes the boundary definitions of
#' CDR1-3 and FWR1-4 for that sequence. In contrast to the universal \code{RegionDefinition} object
#' that end with FWR3, the returned definition is per-sequence due to variable junction lengths.
#'
#' @param juncLength junction length of the sequence.
#' @param sequenceImgt IMGT-numbered sequence.
#' @param regionDefinition \code{RegionDefinition} type to calculate the region definition for.
#' Can be one of \code{IMGT_VDJ_BY_REGIONS} or \code{IMGT_VDJ},
#' which are template definitions that include CDR1-3 and FWR1-4.
#' Only these two regions include all CDR1-3 and FWR1-4 regions.
#' If this argument is set to \code{NULL}, then an empty
#' \code{RegionDefinition} will be returned.
#'
#' @return A \code{RegionDefinition} object that includes CDR1-3 and FWR1-4 for the
#' \code{sequenceImgt}, \code{juncLength}, and \code{regionDefinition} specified.
#'
#' For \code{regionDefinition=IMGT_VDJ_BY_REGIONS}, the returned \code{RegionDefinition}
#' includes:
#'
#' \itemize{
#' \item \code{fwr1}: Positions 1 to 78.
#' \item \code{cdr1}: Positions 79 to 114.
#' \item \code{fwr2}: Positions 115 to 165.
#' \item \code{cdr2}: Positions 166 to 195.
#' \item \code{fwr3}: Positions 196 to 312.
#' \item \code{cdr3}: Positions 313 to (313 + juncLength - 6) since the junction
#' sequence includes (on the left) the last codon from FWR3 and
#' (on the right) the first codon from FWR4.
#' \item \code{fwr4}: Positions (313 + juncLength - 6 + 1) to the end of the sequence.
#' }
#'
#' For \code{regionDefinition=IMGT_VDJ}, the returned \code{RegionDefinition} includes:
#'
#' \itemize{
#' \item \code{fwr}: Positions belonging to a FWR.
#' \item \code{cdr}: Positions belonging to a CDR.
#' }
#'
#' In the case that the \code{regionDefinition} argument is not one of the extended
#' regions (\code{IMGT_VDJ_BY_REGIONS} or \code{IMGT_VDJ}), the input
#' \code{regionDefinition} is returned as is.
#'
#' @seealso See \link{RegionDefinition} for the return object.
#' See \link{IMGT_SCHEMES} for a set of predefined \code{RegionDefinition} objects.
#'
#' @examples
#' # Load and subset example data
#' data(ExampleDb, package = "alakazam")
#' len <- ExampleDb$junction_length[1]
#' sequence <- ExampleDb$sequence_alignment[1]
#' region <- setRegionBoundaries(len, sequence, regionDefinition = IMGT_VDJ)
#'
#' @export
setRegionBoundaries <- function(juncLength, sequenceImgt, regionDefinition=NULL) {
# Check RegionDefinition input
if (is.null(regionDefinition)) {
rd <- makeNullRegionDefinition(nchar(sequenceImgt))
return(rd)
} else if (!is(regionDefinition, "RegionDefinition")) {
stop(deparse(substitute(regionDefinition)), " is not a valid RegionDefinition object")
} else if (!(regionDefinition@name %in% c("IMGT_VDJ_BY_REGIONS", "IMGT_VDJ"))) {
return(regionDefinition)
}
# all slots except for boundaries and seqLength are already defined in regionDefinition
# First need to extract sequence length from sequence:
seqLength <- nchar(sequenceImgt)
# juncLength doesn't include alignment gaps, which are in sequenceImgt
# and need to be added to correctly identify the boundaries
# Also, sequence_alignment can have `.` that represent indels
junction_length_helper <- !strsplit(sequenceImgt[[1]], "")[[1]] %in% c("-", ".")
junction_length_helper[1:310 - 1] <- 0
if (juncLength > 0) {
junction_end <- which(cumsum(junction_length_helper[1:length(junction_length_helper)])==juncLength[[1]])[1]
if (is.na(junction_end)) {
warning("junction ends past 'sequenceImgt'")
junction_end <- nchar(sequenceImgt)
num_gaps <- sum(!junction_length_helper[310:junction_end])
cdr3_end <- junction_end
} else {
num_gaps <- sum(!junction_length_helper[310:junction_end])
juncLength <- juncLength + num_gaps
cdr3_end <- 313 + as.integer(juncLength) - 6 - 1
}
} else {
cdr3_end <- 0
}
# now for the boundaries slot:
boundaries <- factor(shazam::IMGT_V_BY_REGIONS@boundaries,
levels=c(levels(shazam::IMGT_V_BY_REGIONS@boundaries), "cdr3", "fwr4"))
if (cdr3_end > 312) {
boundaries[313:cdr3_end] <- factor("cdr3")
if (cdr3_end < nchar(sequenceImgt)) {
boundaries[(cdr3_end+1):seqLength] <- factor("fwr4")
}
} else {
# If you are here, the junction is too short, <= 6nt
warning("CDR3 end < CDR3 start. Couldn't identify CDR3 and FWR4. Aligned junction length is: ", juncLength)
}
# build RegionDefinition object
if (regionDefinition@name == "IMGT_VDJ") {
boundaries <- gsub(pattern="fwr.", replacement = "fwr", x=boundaries, perl=TRUE)
boundaries <- gsub(pattern="cdr.", replacement = "cdr", x=boundaries, perl=TRUE)
boundaries <- factor(boundaries, levels=c("fwr", "cdr"))
}
rd <- new("RegionDefinition",
name=regionDefinition@name,
description=regionDefinition@description,
boundaries=boundaries, seqLength=unname(seqLength),
regions=regionDefinition@regions,
labels=regionDefinition@labels,
citation=regionDefinition@citation)
return(rd)
}
# Calculating an extended (=that includes cdr1/2/3 and fwr1/2/3/4) region definition
# for a specific clone in database.
# Inputs:
# - clone_num: the clone number for which to calculate the region definition.
# - db: a ChangeoClone database that includes clone numbers.
# - seq_col: the name of the db column containing the sequence that is imgt aligned.
# - juncLengthColumn: the name of the db column containing the junction length.
# - clone_col: the name of the db column containing the clone number.
# - regionDefinition: the region definition type to be output for this clone.
# Output:
# A regionDefinition object for the specific clone
# Note: regionDefinition needs to be calculated specifically for the clone if it
# is of type IMGT_VDJ or IMGT_VDJ_BY_REGIONS, as it includes also cdr3 and fwr4
# which are specific to clone.
# Note: The region definition is same for all sequences in clone - so doing it
# based on first sequence in clone.
getCloneRegion <- function(clone_num, db, seq_col="sequence",
juncLengthColumn="junction_length",
clone_col="clone",
regionDefinition=NULL) {
# Check region definition
if (!is.null(regionDefinition) & !is(regionDefinition, "RegionDefinition")) {
stop(deparse(substitute(regionDefinition)), " is not a valid RegionDefinition object")
}
# subseting the db to lines for specific clone
clone_db <- db[db[[clone_col]] == clone_num,]
if ( length(unique(clone_db[[juncLengthColumn]])) >1 ) {
stop("Expecting clones where all sequences have the same junction lenth. Different lengths found for clone ", clone_num)
}
# getting one of the sequences of the specific clone:
seq <- clone_db[[seq_col]][1]
junc_len <- clone_db[[juncLengthColumn]][1]
reg <- setRegionBoundaries(juncLength=junc_len, sequenceImgt=seq,
regionDefinition=regionDefinition)
return(reg)
}
# Status: experimental, not exported function
# data(SingleDb, package="alakazam")
# germline_db <- list(
# "IGHV3-11*05"="CAGGTGCAGCTGGTGGAGTCTGGGGGA...GGCTTGGTCAAGCCTGGAGGGTCCCTGAGACTCTCCTGTGCAGCCTCTGGATTCACCTTC............AGTGACTACTACATGAGCTGGATCCGCCAGGCTCCAGGGAAGGGGCTGGAGTGGGTTTCATACATTAGTAGTAGT......AGTAGTTACACAAACTACGCAGACTCTGTGAAG...GGCCGATTCACCATCTCCAGAGACAACGCCAAGAACTCACTGTATCTGCAAATGAACAGCCTGAGAGCCGAGGACACGGCCGTGTATTACTGTGCGAGAGA",
# "IGHD3-10*01"="GTATTACTATGGTTCGGGGAGTTATTATAAC",
# "IGHJ5*02"="ACAACTGGTTCGACCCCTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCAG"
# )
# pja <- shazam:::plotJunctionAlignment(SingleDb, germline_db,regionDefinition=IMGT_VDJ_BY_REGIONS)
# pja$p
plotJunctionAlignment <- function(db_row, germline_db,
sequence_alignment="sequence_alignment",
v_call="v_call", d_call="d_call", j_call="j_call",
v_germline_start="v_germline_start", v_germline_end="v_germline_end",
d_germline_start="d_germline_start", d_germline_end="d_germline_end",
j_germline_start="j_germline_start", j_germline_end="j_germline_end",
np1_length="np1_length", # np2_length="np2_length",
junction="junction", junction_length="junction_length",
germline_alignment="germline_alignment",
regionDefinition=NULL) {
# Check for valid columns
check <- checkColumns(db_row, c(sequence_alignment,
v_call, j_call, # d_call,
v_germline_start, v_germline_end,
# d_germline_start, # d_germline_end,
j_germline_start, j_germline_end,
np1_length, # np2_length,
junction, junction_length)
)
if (check != TRUE) { stop(check) }
if (!is.null(d_call)) {
d_columns <- c(d_call, d_germline_start, d_germline_end)
found <- d_columns %in% colnames(db_row)
if (any(!found)) {
stop( "Column(s) ", paste(d_columns[!found], sep="", collpase=",") ," not found.")
}
}
if (!germline_alignment %in% colnames(db_row)) {
warning("The column germline_alignment doesn't exist. Assigned NA value.")
db_row[[germline_alignment]] <- NA
}
# Check region definition
if (!is.null(regionDefinition)) {
if (!is(regionDefinition, "RegionDefinition")) {
stop(deparse(substitute(regionDefinition)), " is not a valid RegionDefinition object")
}
}
junction_seq <- db_row[[junction]]
v_allele <- getAllele(db_row[[v_call]], first=T)
d_allele <- getAllele(db_row[[d_call]], first=T)
j_allele <- getAllele(db_row[[j_call]], first=T)
#if (!is.na(d_allele)) {
v_germline <- germline_db[[v_allele]]
if (!is.na(d_allele)) {
d_germline <- germline_db[[d_allele]]
}
j_germline <- germline_db[[j_allele]]
seq_aln <- db_row[[sequence_alignment]]
germ_aln <- db_row[[germline_alignment]]
getPositionDf <- function(seq, label) {
sequence_chars <- strsplit(seq,"")[[1]]
sequence_chars
position_df <- data.frame("nucleotide"=sequence_chars,
"pos"=1:nchar(seq),
"label"=label,
stringsAsFactors = F)
position_df
}
sequence_aln_df <- getPositionDf(seq_aln,sequence_alignment)
sequence_aln_df[['aligned']] <- T
if (!is.na(germ_aln)) {
germ_aln_df <- getPositionDf(germ_aln,germline_alignment)
germ_aln_df[['aligned']] <- T
} else {
germ_aln_df <- data.frame()
}
v_germline_df <- getPositionDf(v_germline,"v_germline")
v_germ_start <- as.numeric(db_row[[v_germline_start]])
v_germ_end <- as.numeric(db_row[[v_germline_end]])
v_germline_df[['aligned']] <- v_germline_df$pos >= v_germ_start & v_germline_df$pos <= v_germ_end
v_germ_start_off <- 1 - v_germ_start
v_germline_df$pos <- v_germline_df$pos + v_germ_start_off
v_length <- v_germ_end - v_germ_start + 1
np1_len <- db_row[[np1_length]]
if (!is.na(d_allele)) {
d_germline_df <- getPositionDf(d_germline,"d_germline")
d_germ_start <- as.numeric(db_row[[d_germline_start]])
d_germ_end <- as.numeric(db_row[[d_germline_end]])
d_germline_df[['aligned']] <- d_germline_df$pos >= d_germ_start & d_germline_df$pos <= d_germ_end
d_germ_start_off <- 1-d_germ_start
d_germline_df$pos <- v_length + np1_len + d_germline_df$pos + d_germ_start_off
# d_length <- d_germ_end - d_germ_start + 1
} else {
d_germline_df <- data.frame()
}
# np2_len <- db_row[[np2_length]]
j_germline_df <- getPositionDf(j_germline,"j_germline")
j_germ_start <- as.numeric(db_row[[j_germline_start]])
j_germ_end <- as.numeric(db_row[[j_germline_end]])
j_germline_df[['aligned']] <- j_germline_df$pos >= j_germ_start & j_germline_df$pos <= j_germ_end
j_length <- j_germ_end - j_germ_start + 1
# Use end as reference
j_germline_df$pos <- nchar(seq_aln) - j_length + 1 + j_germline_df$pos - j_germ_start
df <- bind_rows(sequence_aln_df, v_germline_df, d_germline_df, j_germline_df, germ_aln_df)
if (is.na(junction_seq) | db_row[[junction_length]]<1 ) {
message("No junction available for this sequence.")
junction_df <- NULL
} else {
# junction_position <- #stri_locate(seq_aln,fixed=junction_seq)
junction_start <- 310
j_len <- db_row[[junction_length]]
# junction_end <- junction_start + j_len - 1
# Get aligned junction, with gaps
junction_alignment_helper <- !stringi::stri_split_boundaries(seq_aln, type="char")[[1]] %in% c("-",".")
junction_alignment_helper[1:junction_start-1] <- 0
junction_end <- which(cumsum(junction_alignment_helper[1:length(junction_alignment_helper)])>j_len)[1] - 1
if (is.na(junction_end)) {
warning("The junction ends past sequence_alignment. Using the last position as junction_end.")
junction_end <- length(junction_alignment_helper)
}
junction_alignment <- stringi::stri_sub(seq_aln,310,junction_end)
# junction_end <- junction_start + nchar(db_row[[junction]]) -1
junction_df <- data.frame("nucleotide"=strsplit(junction_alignment,"")[[1]], stringsAsFactors = F)
junction_df[['pos']] <- c(junction_start:junction_end)
junction_df[['label']] <- "junction"
junction_df[['aligned']] <- T
}
missing_junction_nt <- db_row[[junction_length]] - sum(junction_df$aligned)
if ( missing_junction_nt > 0 ) {
junction_nt <- gsub("[\\.\\-]","",db_row[['junction']])
missing_nt <- stringi::stri_sub(junction_nt,
nchar(junction_nt)-missing_junction_nt+1,
nchar(junction_nt))
junction_df_missing <- getPositionDf(missing_nt,"junction")
junction_df_missing[['aligned']] <- F
junction_df_missing[['pos']] <- junction_df_missing[['pos']] + max(junction_df[['pos']])
junction_df <- bind_rows(junction_df, junction_df_missing)
}
# addRegionDefinition boundaries
region_definition <- setRegionBoundaries(db_row[[junction_length]], db_row[[sequence_alignment]], regionDefinition )
if (!is.null(regionDefinition)) {
rdf <- data.frame(
"nucleotide"=as.character(region_definition@boundaries),
"pos"=1:length(region_definition@boundaries),
"label"="region_definition",
"aligned"=T,
stringsAsFactors = F)
} else {
rdf <- data.frame()
}
df <- bind_rows(df, junction_df, rdf)
ordered_labels <- c("region_definition",sequence_alignment, "junction", "v_germline", "d_germline","j_germline", germline_alignment)
df[['label']] <- factor(df[['label']], levels=rev(ordered_labels), ordered = T)
color_palette <- c(
"A" = "lightskyblue2",
"T" = "khaki2",
"G" = "palegreen3",
"C" = "coral2",
"." = "white",
"N" = "grey80",
"-" = "black",
"cdr1" = "#FFDCD2",
"cdr2" = "#FFDCD2",
"cdr3" = "#FFB189",
"fwr1" = "#8CCEDB",
"fwr2" = "#8CCEDB",
"fwr3" = "#8CCEDB",
"fwr4" = "#8CCEDB"
)
fig_theme <- function(font_size=7) {
theme_bw() +
theme(text=element_text(size=font_size),
axis.title=element_text(size=font_size),
axis.text=element_text(size=font_size),
axis.text.x=element_text(size=font_size),
axis.text.y=element_text(size=font_size),
#axis.ticks=element_blank(),
#axis.ticks=theme_segment(colour = "black"),
#panel.background=element_rect(fill = NA, colour = "black", linewidth = 0.25),
panel.border=element_blank(),
#panel.grid.major=element_line(colour = "grey", size = 0.05),
#panel.grid.minor=element_line(colour = "grey", size = 0.05),
panel.grid.major.y=element_blank(),
panel.grid.minor.y=element_blank(),
panel.grid.major.x=element_blank(),
panel.grid.minor.x=element_blank(),
panel.spacing=unit(0.25, "lines"),
plot.title=element_text(size=font_size,
face="bold",
lineheight=0.8),
legend.position="top",
legend.text=element_text(size=font_size),
# legend.title = element_text(size=font_size),
legend.title=element_blank(),
legend.spacing=unit(0.25, "lines"),
legend.box="horizontal",
legend.box.spacing=unit(0.25, "lines"),
legend.key.height=unit(1,"line"),
legend.key.width=unit(1,"line"),
strip.text = element_text(size = font_size, face="plain"),
strip.background = element_blank(),
plot.margin= unit(c(0, 0, 0, 0), "lines"))
}
p <- ggplot(data=df %>%
dplyr::filter(.data$label == "region_definition") %>%
dplyr::mutate(label=factor(.data$label, levels = ordered_labels, ordered = T)),
aes(x=.data$pos, y=.data$label, fill=.data$nucleotide, alpha=.data$aligned)) +
geom_tile( height=0.3) +
geom_tile(data=df %>%
dplyr::filter(.data$label != "region_definition") %>%
dplyr::mutate(label=factor(.data$label, levels = ordered_labels, ordered = T)),
color="grey50") +
fig_theme() +
scale_fill_manual(values=color_palette) +
scale_alpha_manual(values=c("TRUE"=1, "FALSE"=0.2), guide="none") +
scale_y_discrete(breaks=ordered_labels, labels=ordered_labels, limits=rev(ordered_labels), expand=c(0, 0)) +
ylab("") + xlab("IMGT position") +
guides(fill=guide_legend(nrow=1)) +
scale_x_continuous(expand=c(0, 0))
list(p=p, data=df)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.