#' Calculate absolute number or fraction of unique 2-way overlap.
#'
#' \code{overlap.2way} quantifies the amount of unique overlap between two input vectors.
#'
#' @param vec1 a vector of non-zero length. Vector can contain non-unique entries.
#' E.g.: sequence reads, clonal assignment numbers.
#' @param vec2 a vector of non-zero length. Vector can contain non-unique entries.
#' @param returnType whether to return the absolute number or the fraction of overlap.
#' Abbreviations accepted.
#' @return A whole number or a fraction quantifying the amount of unique 2-way
#' overlap between \code{vec1} and \code{vec2}. \code{NA} if either of
#' the input vectors has length zero.
#' @details For \code{returnType="frac"}, the denominator used is the smaller
#' number of the two counts of unique objects in \code{vec1} and
#' \code{vec2}.
#' @examples
#' # vec1 has 5 unique entries; vec2 has 2
#' # absolute number of unique overlapping entries is 2
#' overlap.2way(vec1=1:5, vec2=c(2,4,4), returnType="a")
#'
#' # fraction of unique overlapping entries is 1 (2/2)
#' # the number of unique entries in vec2 is used as the denominator
#' overlap.2way(vec1=1:5, vec2=c(2,4,4), returnType="f")
#' @export
overlap.2way = function(vec1, vec2, returnType=c("abs", "frac")) {
returnType = match.arg(returnType)
if (length(vec1)==0 | length(vec2)==0) {
return(NA)
}
n.uniq.1 = length(unique(vec1))
n.uniq.2 = length(unique(vec2))
n.uniq.union = length(unique(c(vec1, vec2)))
n.id = n.uniq.1 + n.uniq.2 - n.uniq.union
if (returnType=="frac") {
# weighted avg; weights = n.uniq.i/(n.uniq.1+n.uniq.2)
# frac = n.id*2 / (n.uniq.1+n.uniq.2)
# use smaller n.uniq.i as denominator
frac = n.id / min(n.uniq.1, n.uniq.2)
return(frac)
} else if (returnType=="abs") {
return(n.id)
}
}
#' Calculate within-subject 2-way and 3-way clonal overlap.
#'
#' \code{overlap.clonal} calculates the overlap in terms of clonal assignment between
#' samples from the same subject.
#'
#' @param df \code{data.frame} containing columns specified by \code{col.clone}
#' and \code{col.group}.
#' @param col.clone a \code{character}. Name of column containing clonal assignments.
#' @param col.group a \code{character}. Name of column containing sample information.
#' @return A \code{list} consisting of:
#' \itemize{
#' \item \code{mtx.all}: a \code{data.frame} in which each row tabulates the number
#' of sequences from each sample assigned to the clone represented by
#' that row. Row names correspond to clonal assignments. Column names
#' correspond to names of samples.
#' \item \code{count.2way}: a vector indicating the counts of 2-way clonal
#' overlap. \code{NULL} if there are fewer than 2 unique samples in column
#' specified by \code{col.group}.
#' \item \code{mtx.2way}: a list of \code{data.frame}s; each matrix corresponds to a
#' different sample-sample combination and includes clones to which both
#' samples are assigned. \code{NULL} if there are fewer than 2 unique samples
#' in column specified by \code{col.group}.
#' \item \code{count.3way}: a vector indicating the counts of 3-way clonal
#' overlap. \code{NULL} if there are fewer than 3 unique samples in column
#' specified by \code{col.group}.
#' \item \code{mtx.3way}: a list of \code{data.frame}s; each matrix corresponds to a
#' different sample-sample-sample combination and includes clones to which
#' all three samples are assigned. \code{NULL} if there are fewer than 3
#' unique samples in column specified by \code{col.group}.
#' \item \code{count.uniq}: a vector indicating the counts of unique clones
#' in each sample.
#' }
#' @details The same results can be expected from
#' \itemize{
#' \item \code{overlap.clonal(df, "CLONE", "SAMPLE")$count.2way["A-D"]}
#' \item \code{overlap.2way(df$CLONE[df$SAMPLE=="D"], df$CLONE[df$SAMPLE=="A"], returnType="abs")}.
#' }
#' @examples
#' # TBA
#' @export
overlap.clonal = function(df, col.clone, col.group) {
stopifnot( (col.clone %in% colnames(df)) &
(col.group %in% colnames(df)) )
# get a matrix documenting distribution of sequences from a clone
# into different groups
# row: unique clone
# column: groups
clones = sort(unique(df[, col.clone]))
groups = sort(unique(df[, col.group]))
mtx = matrix(NA, nrow=length(clones), ncol=length(groups),
dimnames=list(clones, groups))
for (i in 1:length(clones)) {
cur.tab = table(df[df[, col.clone]==clones[i], col.group])
mtx[i, match(names(cur.tab), colnames(mtx))] = cur.tab
}
if (length(groups)>=2) {
# get a vector counting 2-way overlap; and
# extract into a list rows from mtx corresponding to said overlaps
count.vec.2 = vector(mode="integer")
ov.list.2 = vector(mode="list")
combo.2 = utils::combn(x=groups, m=2)
for (i in 1:ncol(combo.2)) {
# name for current overlap
cur.name = paste0(combo.2[, i], collapse="-")
# for each row/clone in mtx, count # NAs across columns involving current groups
cur.na.count = apply(mtx[, combo.2[, i]], 1, function(x){sum(is.na(x))})
# 0 NA in a row means clone is in both groups
count.vec.2 = c(count.vec.2, sum(cur.na.count==0))
names(count.vec.2)[length(count.vec.2)] = cur.name
# store these clones
ov.list.2 = c(ov.list.2, list(data.frame(mtx[cur.na.count==0, ])))
names(ov.list.2)[length(ov.list.2)] = cur.name
}
} else {
count.vec.2 = NULL
ov.list.2 = NULL
}
if (length(groups)>=3) {
# get a vector counting 3-way overlap; and
# extract into a list rows from mtx corresponding to said overlaps
count.vec.3 = vector(mode="integer")
ov.list.3 = vector(mode="list")
combo.3 = utils::combn(x=groups, m=3)
for (i in 1:ncol(combo.3)) {
# name for current overlap
cur.name = paste0(combo.3[, i], collapse="-")
# for each row/clone in mtx, count # NAs across columns involving current groups
cur.na.count = apply(mtx[, combo.3[, i]], 1, function(x){sum(is.na(x))})
# 0 NA in a row means clone is in all three groups
count.vec.3 = c(count.vec.3, sum(cur.na.count==0))
names(count.vec.3)[length(count.vec.3)] = cur.name
# store these clones
ov.list.3 = c(ov.list.3, list(data.frame(mtx[cur.na.count==0, ])))
names(ov.list.3)[length(ov.list.3)] = cur.name
}
} else {
count.vec.3 = NULL
ov.list.3 = NULL
}
return(list(mtx.all=data.frame(mtx),
count.2way=count.vec.2, mtx.2way=ov.list.2,
count.3way=count.vec.3, mtx.3way=ov.list.3,
count.uniq=sapply(groups, function(g){
length(unique(df[df[, col.group]==g, col.clone]))
}) ))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.