Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
crop = knitr::hook_pdfcrop
)
suppressMessages(library(htmltools))
## ----echo=FALSE---------------------------------------------------------------
htmltools::img(src = knitr::image_uri("piglet_logo.svg"),
alt = 'logo',
style = 'position:absolute; top:0; right:0; padding:10px;border: none !important;')
## ----plot-amplicon, fig.width=11, fig.height=2, echo=FALSE, fig.cap="**V library amplicon length.** Each row is a different V coverage, S1 for full length, S2 for BIOMED-2 primers, and S3 for adaptive coverage. The colors indicates the V regions according to IMGT numbering, where dark gray represents the IMGT gaps."----
FWR1 <-
"caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttct"
CDR1 <- "ggatacaccttc............accggctactat"
FWR2 <- "atgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacgg"
CDR2 <- "atcaaccctaac......agtggtggcaca"
FWR3 <-
"aactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgt"
CDR3 <- "gcgagaga"
seq <- c("seq1" = paste0(FWR1, CDR1, FWR2, CDR2, FWR3, CDR3))
mat_letters = matrix(sample(letters[1:4], 100, replace = TRUE), 10)
i = 1
regions <- c()
letters <- c()
reg <- c("FWR1", "CDR1", "FWR2", "CDR2", "FWR3", "CDR3")
for (s in toupper(c(FWR1, CDR1, FWR2, CDR2, FWR3, CDR3))) {
letters <- c(letters, strsplit(s, "")[[1]])
regions <-
c(regions, ifelse(grepl("[.]", strsplit(s, "")[[1]]), "gap", reg[i]))
i = i + 1
}
togap <- function(vgap, vdj) {
##add in vdj gaps
gapadd <- vdj
for (i in which(unlist(strsplit(vgap, "", fixed = T)) == ".")) {
gapadd <-
paste0(substr(gapadd, 1, i - 1), ".", substr(gapadd, i, nchar(gapadd)))
}
return(gapadd)
}
primer <- "GGCCTCAGTGAAGGTCTCCTGCAAG"
seq <- toupper(paste0(FWR1, CDR1, FWR2, CDR2, FWR3, CDR3))
loc <-
unlist(aregexec(
text = gsub("[.]", "", seq),
pattern = primer,
max.distance = 4
))
seq_n_gap <- gsub("[.]", "", seq)
preceding <- substr(seq_n_gap, 1, (loc[1] - 1 + nchar(primer)))
preceding <- gsub("[AGCT]", "N", preceding)
fr1_seq <- substr(seq_n_gap, (loc[1] + nchar(primer)), nchar(seq_n_gap))
seq_paste <- paste0(preceding, fr1_seq)
seq_gapped <- togap(seq, seq_paste)
FWR1_s2 <- strsplit(seq_gapped, toupper(CDR1))[[1]][1]
i = 1
for (s in toupper(c(FWR1_s2, CDR1, FWR2, CDR2, FWR3, CDR3))) {
if (any(grepl("N", s))) {
letters <- c(letters, strsplit(s, "")[[1]])
regions <-
c(regions, ifelse(grepl("[.]", strsplit(s, "")[[1]]), "", ifelse(grepl(
"N", strsplit(s, "")[[1]]
), "", reg[i])))
} else{
letters <- c(letters, strsplit(s, "")[[1]])
regions <-
c(regions, ifelse(grepl("[.]", strsplit(s, "")[[1]]), "gap", reg[i]))
}
i = i + 1
}
lower_thresh <- 225
upper_seq <- substr(seq, lower_thresh, nchar(seq))
lower_seq <- strsplit(seq, upper_seq, fixed = T)[[1]][1]
lower_seq <- gsub("[ATCG]", "N", lower_seq)
s3_seq <- paste0(lower_seq, upper_seq)
seqs_s3 <-
sapply(toupper(c(FWR1, CDR1, FWR2, CDR2, FWR3, CDR3)), function(s) {
s <- gsub("[ATCG]", "N", s)
stringi::stri_extract(s3_seq, fixed = toupper(s))
}, USE.NAMES = F)
seqs_s3[5] <-
strsplit(strsplit(s3_seq, gsub("[ATCG]", "N", toupper(CDR2)), fixed = T)[[1]][2], toupper(CDR3), fixed = T)[[1]][1]
seqs_s3[6] <- toupper(CDR3)
i = 1
for (s in seqs_s3) {
letters <- c(letters, strsplit(s, "")[[1]])
regions <- c(regions, ifelse(grepl("[.]", strsplit(s, "")[[1]]),
"", ifelse(grepl(
"N", strsplit(s, "")[[1]]
), "", reg[i])))
i = i + 1
}
mat_seq <- matrix(letters,
nrow = 3,
ncol = 320,
byrow = T)
mat_regions <- matrix(regions,
nrow = 3,
ncol = 320,
byrow = T)
fam_col <-
c(
"brown4",
"darkblue",
"darkorchid4",
"darkgreen",
"firebrick",
"darkorange3",
"deeppink4",
"deepskyblue4",
"darkslategrey"
)
split_col <- unlist(sapply(1:6, function(i) {
rep(reg[i], nchar(c(FWR1, CDR1, FWR2, CDR2, FWR3, CDR3)[i]))
}))
split_col <- factor(split_col, levels = reg)
vreg <- ComplexHeatmap::Heatmap(
mat_regions,
name = "V regions",
col = structure(c("white", "#00000099", fam_col[1:6]),
names = c("", "gap", reg)),
heatmap_height = grid::unit(5, "cm"),
row_split = c("S1", "S2", "S3"),
cluster_rows = F,
cluster_columns = F,
show_heatmap_legend = F,
column_split = split_col
)
ComplexHeatmap::draw(vreg, padding = grid::unit(c(0, 0, 0, 1), "cm"))
## ----germline-----------------------------------------------------------------
library(piglet)
data(HVGERM)
## ----functionality------------------------------------------------------------
data(hv_functionality)
## ----lst-germlineset----------------------------------------------------------
germline <- HVGERM
## keep only functional alleles
germline <- germline[hv_functionality$allele[hv_functionality$functional=="F"]]
## keep only alleles that start from the first position of the V sequence
germline <- germline[!grepl("^[.]", germline)]
## keep only alleles that are at minimum 318 nucleotide long
germline <- germline[nchar(germline) >= 318]
## keep only localized alleles (remove NL)
germline <- germline[!grepl("NL", names(germline))]
## ----lst-germlineset-code, ref.label='lst-germlineset', anchor="block", eval=FALSE----
# germline <- HVGERM
# ## keep only functional alleles
# germline <- germline[hv_functionality$allele[hv_functionality$functional=="F"]]
# ## keep only alleles that start from the first position of the V sequence
# germline <- germline[!grepl("^[.]", germline)]
# ## keep only alleles that are at minimum 318 nucleotide long
# germline <- germline[nchar(germline) >= 318]
# ## keep only localized alleles (remove NL)
# germline <- germline[!grepl("NL", names(germline))]
## ----lst-asc, fig.cap=""------------------------------------------------------
asc <- inferAlleleClusters(
germline_set = germline,
trim_3prime_side = 318,
mask_5prime_side = 0,
family_threshold = 75,
allele_cluster_threshold = 95)
## ----asc-plot, fig.height=18, fig.width=15, fig.cap="**Allele similarity clusters.** The out most circle is the allele names, the second layer are the ASC groups, each group is labeled and colored. The third circle is the clustering dendrogram, the branches are colored by the ASC families. The blue and orange dashed lines are the 95% and 75% similarity ASC threshold."----
plot(asc)
## ----frw1---------------------------------------------------------------------
germline_frw1 <- artificialFRW1Germline(germline, mask_primer = T)
## ----eval=FALSE---------------------------------------------------------------
# zenodo_doi <- "10.5281/zenodo.7401189"
# asc_archive <-
# recentAlleleClusters(doi = zenodo_doi, get_file = TRUE)
## ----eval=FALSE---------------------------------------------------------------
# allele_cluster_table <- extractASCTable(archive_file = asc_archive)
## ----echo=FALSE---------------------------------------------------------------
allele_cluster_table <- read.delim('asc_alleles_table.tsv',sep='\t')
## ----echo=FALSE---------------------------------------------------------------
asc_tables <- dplyr::left_join(
asc@alleleClusterTable %>%
dplyr::select(new_allele, imgt_allele) %>% dplyr::group_by(new_allele) %>%
dplyr::summarise(imgt_allele = paste0(sort(
unique(imgt_allele)
), collapse = "/")),
allele_cluster_table %>% dplyr::select(new_allele, imgt_allele) %>% dplyr::group_by(new_allele) %>%
dplyr::summarise(imgt_allele = paste0(sort(
unique(imgt_allele)
), collapse = "/")),
by = "new_allele",
suffix = c(".piglet", ".zenodo")
)
## -----------------------------------------------------------------------------
# loading TIgGER AIRR-seq b cell data
data <- tigger::AIRRDb
## -----------------------------------------------------------------------------
allele_cluster_table <-
allele_cluster_table %>% dplyr::group_by(new_allele, func_group, thresh) %>%
dplyr::summarise(imgt_allele = paste0(sort(unique(imgt_allele)), collapse = "/"),
.groups = "keep")
## -----------------------------------------------------------------------------
# storing original v_call values
data$v_call_or <- data$v_call
# assigning the ASC alleles
asc_data <- assignAlleleClusters(data, allele_cluster_table)
head(asc_data[, c("v_call", "v_call_or")])
## -----------------------------------------------------------------------------
# reforming the germline set
asc_germline <- germlineASC(allele_cluster_table, germline = HVGERM)
## -----------------------------------------------------------------------------
# inferring the genotype
asc_genotype <- inferGenotypeAllele_asc(
asc_data,
alleleClusterTable = allele_cluster_table,
germline_db = asc_germline,
find_unmutated = T
)
head(asc_genotype)
## -----------------------------------------------------------------------------
# get the genotype alleles
alleles <- unlist(strsplit(asc_genotype$genotyped_imgt_alleles, ","))
# get the genes
genes <- gsub("[*][0-9]+", "", alleles)
# extract the alleles
alleles <- sapply(strsplit(alleles, "[*]"), "[[", 2)
# make sure to extract only alleles
alleles <- gsub("([0-9]+).*$", "\\1", alleles)
# create the genotype
genotype <- data.frame(alleles = alleles, gene = genes)
# plot the genotype
tigger::plotGenotype(genotype = genotype)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.