#' Discriminate between beta-sharing clones and dual-alpha TCR clones (optimized
#' for rare clones)
#'
#' \code{dual_tail()} distinguishes between clones that share a common beta
#' chain and dual TCR clones with two productive alpha chains. The procedure
#' tests the null hypothesis that two candidate alpha, beta pairs with the
#' same beta represent two separate clones by using the frequency estimates
#' to calculate the number of wells that both clones are expected to be in.
#' This is compared to the actual number of wells that both clones appear in,
#' and if the actual number is greater than the expected number, than the
#' pairs are chosen to represent a dual TCR clone.
#'
#' @param alpha Matrix recording which alpha chains appear in each well of the
#' data. See \code{\link{create_data}}.
#' @param beta Matrix recording which beta chains appear in the each well of the
#' data. See \code{\link{create_data}}.
#' @param freq_results Output of \code{\link{freq_estimate}}.
#' @param numb_cells Vector containing the number of cells sampled in the wells
#' of each column of the plates.
#'
#' @return A n x 3 matrix where n is the number of candidate clones, column 1
#' is the beta index of the clone, and column 2-3 are the alpha indices of
#' the clone
#'
#' @export
dual_tail <- function(alpha, beta, freq_results, numb_cells) {
freq_results <- freq_results[order(freq_results[, "MLE"], decreasing = TRUE), ]
freq_results <- freq_results[!is.na(freq_results[, "MLE"]), ]
dual_procedure <- function(data_alph, data_beta, freq, cells) {
# preallocate matrix to record the expected
# col 1: beta index, col 2: alpha1, col 3: alpha2
# col 4: expt # of wells, col 5: actual # of wells
rec <- matrix(nrow = 0, ncol = 6)
colnames(rec) <- c("beta1", "beta2",
"alpha1", "alpha2",
"expt_wells", "act_wells")
# checking the clones with each beta chain
# -calculate the expected number of plates for every pair of alpha associated
# associated with beta under assumption that they are independent clones
# -if beta, alpha_k1, alpha_k2 show up more often than expected, then they
# must be dual
for (clon in 1:max_beta) {
x <- freq[freq[, "beta1"] == clon, , drop = FALSE] # find clones with the beta index
numb_cand <- nrow(x) # find number of alphas associated with beta
if (numb_cand > 1) { # if more than 1 alpha
combos <- utils::combn(numb_cand, 2) # find all combos of pairs
for (ind in 1:ncol(combos)) { # check each combo
alpha1 <- x[combos[1, ind], "alpha1"]
alpha2 <- x[combos[2, ind], "alpha1"]
f1 <- x[combos[1, ind], "MLE"] # freq est of beta alpha1
f2 <- x[combos[2, ind], "MLE"] # freq est of beta alpha2
exp_plates <- 0 # expected number of plates
for (samp in seq_along(sample_size_well)) { # find the number of expected well apperances in each column
number_cells <- sample_size_well[samp]
exp_plates <- numb_sample[samp] *
(1 - (1 - f1)^number_cells - (1 - f2)^number_cells +
(1 - (f1 + f2))^number_cells) + exp_plates
}
dat_plates <- sum(data_beta[, clon] == 1 &
data_alph[, alpha1] == 1 &
data_alph[, alpha2] == 1)
rec <- rbind(rec, c(clon, clon, alpha1, alpha2, exp_plates, dat_plates))
}
}
} # end for going through all beta chains
rec_tail <- rec
if (nrow(rec) > 2) {
# make decisions on if two clones are independent or actually a dual TCR by
# looking at the ratio of actual # of wells vs expected # of wells
# -do this by clustering the ratios into "high" and "low" ratios using k-means
# -the "high" cluster are more likely to be duals
well_ratio <- rec[, "act_wells"]/rec[, "expt_wells"] # ratio of actual to expected
if (length(unique(well_ratio)) > 1) {
km_out <- stats::kmeans(well_ratio, 2, nstart = 200) # kmeans of ratios in 2 clusters
# figure out which cluster represents "high"
clus_ind1 <- which(km_out$cluster == 1) # one cluster
clus_ind2 <- which(km_out$cluster == 2) # the other cluster
# determine which cluster has the higher ratios, making it "high" cluster
if (mean(well_ratio[clus_ind1]) > mean(well_ratio[clus_ind2])) {
clus_ind <- clus_ind1
} else {
clus_ind <- clus_ind2
} # end if - else
} # end if
} else {
clus_ind <- NULL
} # end if else - nrow
list(results = rec, cluster = clus_ind)
} # end function - dual_procedure
# parameters
max_beta <- ncol(beta) # determine maximum beta index
sample_size_well <- numb_cells[, 1] # number of cells per well
numb_sample <- numb_cells[, 2] # number of wells w/ sample size
# perform the dual discrimination for the tail
tail_dual <- dual_procedure(alpha, beta, freq_results, numb_cells)
tail_rec <- tail_dual$results
tail_rec <- tail_rec[tail_dual$cluster, 1:4, drop = FALSE]
colnames(tail_rec) <- c("beta1", "beta2", "alpha1", "alpha2")
as.data.frame(tail_rec)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.