##' @rdname binSet
##' @title Apply bins to peak tables, creating arrays of AFLP data
##'
##' @description Functions for converting peak tables to AFLP data arrays,
##' and combining multiple arrays for analysis.
##'
##' @details \code{binSet} takes a peak table, applies the
##' given bins, and returns a 3D aflp data array.
##'
##' \code{aflpCombine} takes two aflp data arrays, as produced from
##' \code{binSet}, and combines them into a single array
##'
##' @param pt A peak table, as generated by \code{fsa2PeakTab}
##'
##' @param bins A bin table, as produced by \code{fsaRGbin}
##'
##' @param pref A prefix to append to bin names, to allow for later
##' combining data from different primer pairs
##'
##' @param hthresh The height threshold to score peaks as 'present'. You
##' can revisit this decision later in the process.
##'
##' @param aflp1 aflp data arrays, as produced by \code{binSet}
##' @param aflp2 aflp data arrays, as produced by \code{binSet}
##'
##' @return \code{binSet} returns a 3D array. The rows are
##' samples, the columns are fragments, and the third dimension
##' splits the array into allele calls ("alleles"), peak heights
##' ("heights") and fragment size in basepairs ("sizes"). Thus,
##' to retrieve the presence/absence matrix of allele calls, use:
##' \code{aflp[ , , "alleles"]}, where \code{aflp} is the object
##' returned by \code{binSet}.
##'
##' Other data summaries are readily available by creative slicing and
##' dicing of the 3D array.
##'
##' @author Tyler Smith
##'
##' @export
##'
binSet <- function(pt, bins, pref, hthresh = 100){
pt <- pt[order(pt$bp), ]
bins <- bins[order(bins[,1]), ]
sizes <- pt$bp
samples <- levels(pt$sample.name)
size.mat <- matrix(NA, ncol = nrow(bins), nrow = length(samples))
height.mat <- size.mat
row.names(size.mat) <- samples
row.names(height.mat) <- samples
colnames(size.mat) <- bins[,1]
colnames(height.mat) <- bins[,1]
bin <- 1
peak <- 1
while(bin <= nrow(bins) & peak <= length(sizes)) {
if(sizes[peak] < bins[bin, 1]){ # peak is too small, move to next peak
peak <- peak + 1
}
else if (sizes[peak] >= bins[bin, 2]){ # peak is bigger than bin, move to next bin
bin <- bin + 1
}
else if (sizes[peak] >= bins[bin, 1] & sizes[peak] < bins[bin, 2]) { # count it!
name <- pt$sample.name[peak]
height <- pt$height[peak]
size <- pt$bp[peak]
size.mat[name, bin] <- size
height.mat[name, bin] <- height
peak <- peak + 1
}
}
res <- array(dim = c(dim(height.mat), 3),
dimnames = list(sample = rownames(height.mat),
fragment = colnames(height.mat),
values = c("alleles", "heights", "sizes")))
res[,,2] <- height.mat
res[,,3] <- size.mat
alleles <- res[, , "heights"]
alleles[is.na(alleles)] <- 0
alleles[alleles >= hthresh] <- 1
alleles[alleles > 1] <- 0
res[, , 1] <- alleles
##print(paste(pref, round(colMeans(res[,,"sizes"], na.rm = TRUE), 1), sep = "."))
colnames(res) <- paste(pref, round(colMeans(res[,,"sizes"], na.rm = TRUE), 1), sep = ".")
return(res)
}
#' @rdname binSet
#' @export
#'
aflpCombine <- function(aflp1, aflp2) {
common <- intersect(rownames(a), rownames(b))
return(abind::abind(a[common,,], b[common,,], along = 2))
}
## unbin <- function(gm){
## levs <- levels(as.factor(row.names(gm)))
## res <- data.frame(sample.name = factor(levels = levs),
## height = numeric(), Size = numeric(), bin = numeric())
## for (i in 1:ncol(gm)) {
## targets <- complete.cases(gm[ , i, "sizes"])
## samples <- row.names(gm)[targets]
## heights <- gm[targets, i, "heights"]
## sizes <- gm[targets, i, "sizes"]
## bin <- rep(i, sum(targets))
## tmpdf <- data.frame(sample.name = factor(samples, levels = levs), height = heights,
## Size = sizes, bin = bin)
## tmpdf <- tmpdf[order(tmpdf$bp), ]
## res <- rbind(res, tmpdf)
## }
## rownames(res) <- NULL
## return(res)
## }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.