#' @useDynLib alphabetr
#' @importFrom Rcpp sourceCpp
NULL
#' Identify candidate alpha/beta pairs.
#'
#' \code{bagpipe()} uses the alphabetr resampling procedure on sequencing data
#' to identify candidate alpha/beta pairs. The procedure takes a subsample of
#' the data without replacement, calculates association scores with
#' \code{\link{chain_scores}}, and then for each well, uses the Hungarian
#' algorithm to determine the most likely pairings for the chains found in the
#' well. Each time this is done is a replicate, and the number of replicates is
#' specified as an option. A threshold is then used to filter the candidate
#' pairs that appear in proportion of the replicates larger than the threshold,
#' resulting in the final list of candidate pairs. Bagpipe is an acronym for
#' \strong{b}ootstrapping \strong{a}lphabetr \strong{g}enerated
#' \strong{p}a\strong{i}rs \strong{p}rocedur\strong{e} (based on older versions
#' that utilized bootstrapping)
#'
#' @param alpha Matrix recording which alpha chains appear in each well of the
#' data. See \code{\link{create_data}}.
#' @param beta Matrix recording which beta chains appear in the each well of the
#' data. See \code{\link{create_data}}.
#' @param replicates The number of times the resampling procedure is repeated,
#' i.e. the number of replicates. At least 100 replicates is recommended.
#' @param frac The fraction of the wells resampled in each replicate. Default
#' is 75\% of the wells
#' @param bootstrap Legacy option. Calls a bootstrapping strategy (which
#' resamples with replacement) instead of sampling a subset without
#' replacement.
#'
#' @return A n x 5 matrix where n is the number of clones determined by
#' \code{bagpipe()}. Each row represents the chains of the clone.
#' Columns 1 and 2 represent the beta chain indices of the clone.
#' Columns 3 and 4 represent the alpha chain indices of the clone.
#' Column 5 represents the number of replicates that this clone was found in.
#' Column 5 is used to filter our the clones that have not be determined by
#' a certain "threshold" proportion of replicates.
#'
#' Note that column 1 == column 2 and column 3 == column 4 since
#' \code{bagpipe()} does not try to determine dual-alpha or dual-beta clones.
#'
#' @examples
#' # see the help for create_clones() and create_data()
#' clones <- create_clones(numb_beta = 1000,
#' dual_alpha = .3,
#' dual_beta = .06,
#' alpha_sharing = c(0.80, 0.15, 0.05),
#' beta_sharing = c(0.75, 0.20, 0.05))
#' dat <- create_data(clones$TCR, plate = 5,
#' error_drop = c(.15, .01),
#' error_seq = c(.05, .001),
#' error_mode = c("lognormal", "lognormal"),
#' skewed = 10,
#' prop_top = 0.6,
#' dist = "linear",
#' numb_cells = matrix(c(50, 480), ncol = 2))
#'
#' \dontrun{
#' # normally want to set replicates to 100 or more
#' pairs <- bagpipe(alpha = dat$alpha,
#' beta = dat$beta,
#' replicates = 5,
#' frac = 0.75,
#' bootstrap = FALSE)
#'
#' # using a threshold of 0.3 of replicates
#' pairs <- pairs[pairs[, 5] > 0.3, ]
#' }
#' @export
bagpipe <- function(alpha, beta, replicates = 100, frac = 0.75, bootstrap = FALSE) {
# If user asks for bootstrapping, then the resampled size must be equal
# to the original sample size
if (bootstrap == TRUE) frac <- 1.0
# Ensure that there are the same number of wells in the alpha and beta data
if (nrow(alpha) != nrow(beta))
stop("Different number of wells in the alpha and beta data inputs.")
if (replicates < 1) stop("The replicates arguments need to be at least 1.")
numb_plates <- nrow(alpha) / 96
size_rep <- floor(frac * nrow(alpha)) # number of wells to sample
# for CRAN checks
list_pair1 <- list_pair2 <- NULL
for (plat in 1:replicates) {
# choose a random subset of the wells of the data without replacement
# (unless the user wants to bootstrap and use replacement)
ind <- sample(1:(96 * numb_plates), size_rep, replace = bootstrap)
data_alph <- alpha[ind, ]
data_beta <- beta[ind, ]
# calculate association scores between
pref <- chain_scores(data_a = data_alph, data_b = data_beta)
# number of alpha/beta chains
numb_alph <- ncol(data_alph) # columns of data_alph is the number of alpha chains
numb_beta <- ncol(data_beta) # columns of data_beta is the number of beta chains
# matrices of scores
score_alph <- pref$ascores # matrices of scores that alpha (row i) has for beta (col j)
score_beta <- pref$bscores # matrices of scores that beta (row i) has for beta (col j)
rm(pref)
# This section loops through each well, looks at the alpha and beta chains
# in the well and use the Hungarian algorithm to choose the set of one-to-
# one beta-alpha pairings that maximizes the sum of the association scores
# Entry (i, j) of track_pairs keeps track the number of wells beta_i and
# alpha_j are chosen in; a threshold is then calculated to filter out the
# beta_i/alpha_j pairs that are most likely not true pairs
track_pairs <- matrix(0, nrow = numb_beta, ncol = numb_alph)
#track_assign <- list(length = nrow(data_beta))
score_mat <- score_beta + t(score_alph)
for (well in 1:nrow(data_beta)) {
# find the alpha and beta chain indices in the well
ind_alph <- which(data_alph[well, ] == 1)
ind_beta <- which(data_beta[well, ] == 1)
# Hungarian algorithm is posed differently depending on whether there are
# more alpha or beta chains in the well
if (length(ind_beta) <= length(ind_alph)) {
# use the hungarian algorithm to obtain the beta_i/alpha_j pairings that
# maximize the sum of the scores for the well
assign <- clue::solve_LSAP(score_mat[ind_beta, ind_alph, drop = FALSE],
maximum = TRUE)
assign <- matrix(c(ind_beta, ind_alph[assign]), ncol = 2)
#track_assign[[well]] <- assign
track_pairs[assign] <- track_pairs[assign] + 1
} else {
assign <- clue::solve_LSAP(t(score_mat[ind_beta, ind_alph, drop=FALSE]),
maximum = TRUE)
assign <- matrix(c(ind_beta[assign], ind_alph), ncol = 2)
#track_assign[[well]] <- assign
track_pairs[assign] <- track_pairs[assign] + 1
}
}
# Determine a threshold for the # number of wells that a candidate pair must
# be picked in in order to make the cut
threshold <- mean(track_pairs[track_pairs > 0])
# arr.ind = TRUE will return a n by 2 matrix, which col 1 is the beta index
# and col 2 is the alpha index; finding the beta, alpha pairs that are above
# the threshold
app <- which(track_pairs > threshold, arr.ind = TRUE)
app <- app[order(app[, 1]), ]
# Sorting the resulting candidate pairs into a list pair where the ith
# element of the list contains the alpha indices pairing with beta_i
list_pair <- list(length = numb_beta)
for (i in 1:numb_beta) {
# find the candidate pairs with the beta index and save those alpha
# indices to the ith element of the list
ind_beta <- which(app[, 1] == i)
list_pair[[i]] <- unique(as.vector(app[ind_beta, 2]))
} # end for i
# recording the beta_i\alpha_j pairs from the output of the replicate to the
# variable "list_pairX" where X is the X_th replicate
result_name <- paste("list_pair", plat, sep = "")
assign(result_name, list_pair)
} # end for - plat
# combine all the results of the replicates into one master list
# The vector of the i_th element of list_master represents the indices of the
# alpha chains paired with beta_i
list_master <- mapply(c, list_pair1, list_pair2, SIMPLIFY = FALSE)
rm(list_pair1, list_pair2)
for (i in 3:replicates) {
list_name <- paste("list_pair", i, sep = "")
list_master <- mapply(c, list_master, eval(parse(text = list_name)),
SIMPLIFY = FALSE)
rm(list = list_name)
}
# determine the number of candidate pairs by adding up all the number of
# unique alpha chain indices paired with each beta chain
cand_pairs <- sum(sapply(lapply(list_master, unique), length))
jack_results <- matrix(0, nrow = cand_pairs, ncol = 3)
ind <- 1
for (ind_beta in 1:length(list_master)) {
if (length(list_master[[ind_beta]]) > 0) {
alphas <- table(list_master[[ind_beta]])
indices <- as.numeric(names(alphas))
prop_replicates <- as.vector(alphas) / replicates
i1 <- ind
i2 <- ind + length(unique(indices)) - 1
jack_results[i1:i2, 1] <- ind_beta
jack_results[i1:i2, 2] <- indices
jack_results[i1:i2, 3] <- prop_replicates
ind <- ind + length(unique(indices))
}
}
jack_results <- jack_results[, c(1, 1, 2, 2, 3), drop = FALSE]
colnames(jack_results) <- c("beta1", "beta2", "alpha1", "alpha2", "prop_replicates")
jack_results
} # end function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.