MS Data

Documented in betweenAlignment

#' Data Structure for "between" alignment of many GCMS samples
#'
#' This function creates a "between" alignment (i.e. comparing merged peaks)
#'
#' \code{betweenAlignment} objects gives the data structure which stores the
#' result of an alignment across several "pseudo" datasets. These pseudo
#' datasets are constructed by merging the "within" alignments.
#'
#' @aliases betweenAlignment betweenAlignment-class betweenAlignment-show show,
#' betweenAlignment-method
#' @param pD a \code{peaksDataset} object
#' @param cAList \code{list} of \code{clusterAlignment} objects, one for each
#' experimental group
#' @param pAList \code{list} of \code{progressiveAlignment} objects, one for
#' each experimental group
#' @param impList \code{list} of imputation lists
#' @param filterMin minimum number of peaks within a merged peak to be kept in
#' the analysis
#' @param gap gap parameter
#' @param D retention time penalty parameter
#' @param usePeaks logical, whether to use peaks (if \code{TRUE}) or the full
#' 2D profile alignment (if \code{FALSE})
#' @param df distance from diagonal to calculate similarity
#' @param verbose logical, whether to print information
#' @param metric numeric, different algorithm to calculate the similarity
#' matrix between two mass spectrum. \code{metric=1} call
#' \code{normDotProduct()}; \code{metric=2} call \code{ndpRT()};
#' \code{metric=3} call \code{corPrt()}
#' @param type numeric, two different type of alignment function
#' @param penality penalization applied to the matching between two mass
#' spectra if \code{(t1-t2)>D}
#' @param compress logical whether to compress the similarity matrix into a
#' sparse format.
#' @return \code{betweenAlignment} object
#' @author Mark Robinson
#' @seealso \code{\link{multipleAlignment}}
#' @references Mark D Robinson (2008).  Methods for the analysis of gas
#' chromatography - mass spectrometry data \emph{PhD dissertation} University
#' of Melbourne.
#' @keywords classes
#' @examples
#'
#' 	require(gcspikelite)
#' 	## see 'multipleAlignment'
#' @importFrom stats median
#' @export betweenAlignment
betweenAlignment <- function(pD, cAList, pAList, impList, filterMin = 1,
                             gap = 0.7, D = 10, usePeaks = TRUE, df = 30,
                             verbose = TRUE, metric = 2,  type = 2,
                             penality = 0.2, compress = FALSE){
    n <- length(pAList)
    if(length(filterMin) == 1)
    {
        filterMin <- rep(filterMin, n)
    }
    pkd <- vector("list", n)
    pkr <- vector("list", n)
    filtind <- vector("list", n)  # list of filtered indices
    newind <- vector("list", n)
    for(g in 1:n){
        pa <- pAList[[g]]
        nm <- length(pa@merges)
        runs <- pa@merges[[nm]]$runs
        ind <- pa@merges[[nm]]$ind
        keep <- rowSums(!is.na(ind)) >= filterMin[g] ## RR
        if (verbose)
        {
            cat("[betweenAlignment]", names(pAList)[g], ": Removing", nrow(ind) - sum(keep),"peaks.\n")
            ind <- ind[keep,]
        }
        newind[[g]] <- lapply(impList[[g]], FUN = function(u,k) u[k,], k = keep)
        filtind[[g]] <- ind
        rt <- matrix(NA, nrow(ind), ncol(ind))
        mz <- pD@mz
        peaksdata <- matrix(0, nrow = length(pD@mz), ncol = nrow(ind))
        for(j in 1:ncol(ind)){
            if(usePeaks)
            {
                cur.ds <- pD@peaksdata[[runs[j]]]
                cur.rt <- pD@peaksrt
            }
            else
            {
                cur.ds <- pD@rawdata[[runs[j]]]
                cur.rt <- pD@rawrt
            }
            for(i in 1:nrow(ind)){
                if(!is.na(ind[i, j]))
                {
                    peaksdata[,i] <- peaksdata[,i] + cur.ds[,ind[i,j]]
                    rt[i,j] <- cur.rt[[runs[j]]][ind[i,j]]
                }
            }
        }
      pkd[[g]] <- peaksdata
	if(!usePeaks)
  {
      pkd[[g]] <- pkd[[g]] / outer(rep(1, nrow(peaksdata)), rowSums(!is.na(ind)))
  } # average over the number of samples
        pkr[[g]] <- apply(rt, 1, median, na.rm = TRUE)
    }
    # wRA <- new("peaksDataset", mz = mz, rawdata = NULL, files = names(cAList), 
    #             rawrt = NULL, peaksdata = pkd, peaksrt = pkr, filtind = filtind)
    wRA <- new("peaksDataset",
               mz = mz, rawdata = list(NULL), files = names(cAList),
               rawrt = list(NULL), peaksdata = pkd, peaksrt = pkr)
    ## return(wRA)
    ## filtind <-
    ## RR
    cA <- clusterAlignment(wRA, runs = 1:n, gap = gap, D = D, df = df,
                           metric = metric, type = type,
                           compress = compress, penality = penality) ## bug here with filterMin > 1
    pA <- progressiveAlignment(wRA, cA, gap = gap, D = D, df = df,
                               compress = compress, type = type)
    ##
    ## cA <- clusterAlignment(wRA, 1:n, gap = gap, D = D, df = df, verbose = verbose)
    ## pA <- progressiveAlignment(wRA, cA, gap = gap, D = D, df = df, verbose = verbose)

    nm <- length(pA@merges)
    ind <- pA@merges[[nm]]$ind
    groups <- pA@merges[[nm]]$runs
    full.runs <- NULL
    full.groups <- NULL
    full.ind <- matrix(NA, nrow(ind), length(pD@rawdata))
    full.newind <- vector("list", 3)
    for(i in 1:length(full.newind)){
        full.newind[[i]] <- matrix(NA, nrow(ind), length(pD@rawdata))
    }
    names(full.newind) <- c("apex", "start", "end")

    col <- 1
    for(i in 1:length(groups)){
        g <- groups[i]
        n <- length(pAList[[g]]@merges)
        full.runs <- c(full.runs, pAList[[g]]@merges[[n]]$runs)
        nr <- length(pAList[[g]]@merges[[n]]$runs)
        full.groups <- c(full.groups, rep(wRA@files[g],nr))
        this.ind <- filtind[[g]]
        rw <- which(!is.na(ind[,i]))
        cl <- col:(col + ncol(this.ind) - 1)
        full.ind[rw, cl] <- this.ind
        if(!is.null(newind[[g]]$apex))
        {
            for(j in 1:length(full.newind)){
                full.newind[[j]] <- newind[[g]][[j]]
            }
        }
        col <- col + ncol(this.ind)
    }

    new("betweenAlignment",
        mergedPeaksDataset = wRA,
        ind = full.ind,
        imputeind = full.newind,
        runs = full.runs,
        groups = full.groups,
        cA = cA,
        pA = pA,
        filtind = filtind,
        newind = newind
        )
}


#' Show method for "between" alignment of many GCMS samples
#'
#' This function show the results of a "between" alignment
#'
#' \code{betweenAlignment} objects gives the data structure which stores the
#' result of an alignment across several "pseudo" datasets. These pseudo
#' datasets are constructed by merging the "within" alignments.
#' @title betweenAlignment-show
#' @param object 
#' @return \code{betweenAlignment} object
#' @author Mark Robinson
#' @export
#' @noRd
setMethod("show","betweenAlignment",
          function(object) {
              cat("An object of class \"", class(object), "\"\n", sep = "")
              cat(length(object@mergedPeaksDataset@peaksrt), "groups:",
                  sapply(object@mergedPeaksDataset@peaksrt, length),
                  "merged peaks\n"
                  )
          }
          )