#' Gathers abundance informations from an alignment
#'
#' Given an alignment table (indices of matched peaks across several samples)
#' such as that within a \code{progressiveAlignment} or
#' \code{multipleAlignment} object, this routines goes through the raw data and
#' collects the abundance of each fragment peak, as well as the retention times
#' across the samples.
#'
#' This procedure loops through the the table of matched peaks and gathers the
#'
#' @param pD a \code{peaksDataset} object, to get the abundance data from
#' @param obj either a \code{multipleAlignment} or \code{progressiveAlignment}
#' object
#' @param newind list giving the
#' @param method method used to gather abundance information, only \code{apex}
#' implemented currently.
#' @param findmzind logical, whether to take a subset of all m/z indices
#' @param useTIC logical, whether to use total ion current for abundance
#' summaries
#' @param top only use the top \code{top} peaks
#' @param intensity.cut percentage of the maximum intensity
#' @return Returns a list (of lists) for each row in the alignment table. Each
#' list has 3 elements: \item{mz}{a numerical vector of the m/z fragments used}
#' \item{rt}{a numerical vector for the exact retention time of each peak
#' across all samples} \item{data}{matrix of fragment intensities. If
#' \code{useTIC = TRUE}, this matrix will have a single row}
#' @author Mark Robinson
#' @seealso \code{\link{imputePeaks}}
#' @references Mark D Robinson (2008). Methods for the analysis of gas
#' chromatography - mass spectrometry data \emph{PhD dissertation} University
#' of Melbourne.
#' @keywords manip
#' @examples
#'
#' require(gcspikelite)
#'
#' ## paths and files
#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep = "/")
#' cdfFiles <- dir(gcmsPath, "CDF", full = TRUE)
#' eluFiles <- dir(gcmsPath, "ELU", full = TRUE)
#'
#' ## read data, peak detection results
#' pd <- peaksDataset(cdfFiles[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
#' pd <- addAMDISPeaks(pd, eluFiles[1:2])
#'
#' ## multiple alignment
#' ma <- multipleAlignment(pd, c(1,1), wn.gap = 0.5, wn.D = 0.05, bw.gap = 0.6,
#' bw.D = 0.2, usePeaks = TRUE, filterMin = 1, df = 50,
#' verbose = TRUE, metric = 1, type = 1)
#'
#' ## gather apex intensities
#' d <- gatherInfo(pd, ma)
#'
#' ## table of retention times
#' nm <- list(paste("MP", 1:length(d), sep = ""), c("S1", "S2"))
#' rts <- matrix(unlist(sapply(d, .subset, "rt")), byrow = TRUE, nc = 2,
#' dimnames = nm)
#'
#' @export gatherInfo
gatherInfo <- function(pD, obj, newind=NULL, method=c("apex"),
findmzind=TRUE, useTIC=FALSE, top=NULL,
intensity.cut=.05){
## ## correlationAlignment() data implementation
## ## start
## if(class(obj) == 'correlationAlignment')
## {
## ## slot rt
## l.rt <- lapply(1:nrow(obj@Alignment), function(z){
## sapply(seq(along=obj@Alignment), function(x, y){
## n <- names(obj@Alignment)[x] #!
## r <- pD@peaksrt[[n]][obj@Alignment[y,x]]
## }, y=1:nrow(obj@Alignment))[z,]
## })
## names(l.rt) <- sapply(l.rt, function(x){names(x) <- 'rt'})
## ## slot data
## l.d <- lapply(1:nrow(obj@Alignment), function(y){
## rx <- lapply(1:ncol(obj@Alignment), function(x){
## nam <- names(obj@Alignment)[x]
## idx <- obj@Alignment[y,x]
## if(is.na(idx) == FALSE){
## ## dat <- pD@peaksdata[nam][1][[1]][,idx]
## ## must match
## ## the column name not the clolumn position
## dat <- pD@peaksdata[nam][1][[1]][,which(colnames(pD@peaksdata[nam][1][[1]]) == idx)]
## }else{
## dat <- rep(NA, length(pD@peaksdata[nam][1][[1]][,idx]))
## }
## return(dat)
## })
## dd <- do.call(cbind, rx)
## return(dd)
## })
## names(l.d) <- sapply(l.d, function(x){names(x) <- 'data'})
## ## trace back raw id spectra
## l.Raw <- lapply(1:nrow(obj@Alignment), function(x){
## cbind.data.frame(Sample=colnames(obj@Alignment),
## rawID=as.numeric(obj@Alignment[x,]))
## })
## names(l.Raw) <- sapply(l.Raw, function(x){names(x) <- 'traceRaw'})
## res <- lapply(1:length(l.d), function(x){
## c(l.Raw[x], l.rt[x], l.d[x])
## })
## names(res) <- paste("feat", seq(along=res), sep='_')
## return(res)
## }
## ## end
## pind - indices of peaks
## rind - indices of runs
## newind ... comes from an imputation step, if at all.
method <- match.arg(method)
if(is(obj, "multipleAlignment"))
{
pind <- obj@betweenAlignment@ind
rind <- obj@betweenAlignment@runs
groups <- obj@betweenAlignment@groups
}
else
if(is(obj, "progressiveAlignment"))
{
n <- length(obj@merges)
pind <- obj@merges[[n]]$ind # for example, if obj=ma$pas[1]
rind <- obj@merges[[n]]$runs
groups <- rep(names(obj),length(rind))
}
## collect rt
out <- matrix(NA, nrow(pind), ncol(pind))
for(j in 1:ncol(pind)){
data <- pD@peaksrt[[rind[j]]]
keep <- which(!is.na(pind[,j]))
out[keep,j] <- data[pind[keep,j]]
}
colnames(out) <- paste(groups,rind,sep=".")
rownames(out) <- 1:nrow(out)
## collect mz and intensity
peaks <- vector("list", nrow(pind))
for(i in 1:length(peaks)){
if(findmzind)
{
mzind <- rep(0, length(pD@mz))
for(j in 1:ncol(pind)){
data <- pD@peaksdata[[rind[j]]]
if(!is.na(pind[i,j]))
mzind <- mzind + data[,pind[i,j]]
}
## mzind <- which(mzind > 0)
mzind <- which(mzind > intensity.cut * max(mzind)) # intensity filter
}
else
{
mzind <- 1:length(pD@mz)
}
if(useTIC)
this.d <- matrix(, nrow=1, ncol=ncol(pind))
else
this.d <- matrix(, nrow=length(mzind), ncol=ncol(pind))
for(j in 1:ncol(pind)){
r <- rind[j]
overlap <- NULL
if(!is.na(pind[i,j]))
if(method == "apex")
{
if(useTIC)
{
this.d[1,j] <- sum(pD@rawdata[[r]][,pD@peaksind[[r]][pind[i,j]]])
} ##
else
{
## this.d[,j] <- pD@rawdata[[r]][mzind,pD@peaksind[[r]][pind[i,j]]] # ?
this.d[,j] <- pD@peaksdata[[r]][mzind,pind[i,j]]
}
}
else
{
## method = "area"
## nn <- length(pD@peaksind.start[[r]])
## w <- which(round(pD@peaksind.start[[r]][-1]) < round(pD@peaksind.end[[r]][-nn]))
## overlap <- cbind(w,w+1)
## v <- which(pind[i,j] == overlap, arr.ind=TRUE)
## if(nrow(v) > 0) {
## ## do mixture thing.
## pks <- overlap[v[1,1],]
## cat("overlap -- run",r,"-- peaks",pks,"--",v[1,2],"\n")
## if(r == 12 & pks[1] == 4)
## return(list(mzind=mzind))
## this.d[,j] <- partitionWithNormalMixture2(pD,r,pks[1],pks[2],mzind,useTIC=useTIC)[,v[1,2]]
## if(j == ncol(pind)) cat("\n")
## }else{
## ## just simple sum of intensity
## st <- pD@peaksind.start[[r]][pind[i,j]]
## en <- pD@peaksind.end[[r]][pind[i,j]]
## if (useTIC)
## this.d[1,j] <- sum(pD@rawdata[[r]][,st:en]) ##
## else
## this.d[,j] <- rowSums(pD@rawdata[[r]][mzind,st:en])
## }
}
if(!is.null(newind))
if(!is.na(newind$apex[i,j])){
if(method == "apex"){
if(useTIC)
this.d[1,j] <- sum(pD@rawdata[[r]][,newind$apex[i,j]]) ##
else
this.d[,j] <- pD@rawdata[[r]][mzind,newind$apex[i,j]]
out[i,j] <- pD@rawrt[[r]][newind$apex[i,j]]
}else{ ## method="area"
## st <- newind$start[i,j]
## en <- newind$end[i,j]
## cat(r,i,j,st,en,"\n")
## if (useTIC)
## this.d[1,j] <- sum(pD@rawdata[[r]][,st:en]) ##
## else
## this.d[,j] <- rowSums(pD@rawdata[[r]][mzind,st:en])
}
}
}
colnames(this.d) <- paste(groups, rind, sep=".")
peaks[[i]] <- list(rt=out[i,], mz=pD@mz[mzind], data=this.d)
}
if(!is.null(top)){
for(i in seq(along=peaks)){
o <- sort(order(-rowSums(peaks[[i]]@data, na.rm=TRUE))[1:top])
peaks[[i]]@mz <- peaks[[i]]@mz[o]
peaks[[i]]@data <- peaks[[i]]@data[o,]
}
}
return(peaks)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.