R/detailedLook.R

#' Assessing the Significance of Recurrent DNA Copy Number Aberrations		
#'
#' @param x An n by m numeric matrix containing DNA copy number data from n subjects at m markers.
#'
#' @param marker.data A dataframe containing marker position data for markers in the autosomes.  
#'   Column 1 contains the chromosome number for each marker, and column 2 contains the position 
#'   (in base pairs) each markers.  Additional columns, if present, represent information about 
#'   the markers (e.g. probe names).
#' 
#' @param annot.file A cytoband annotation dataframe.  Each row corresponds to a distinct cytoband, 
#'   and column 1 contains the chromosome number, column 2 contains the start position (in base pairs),
#'   column 3 contains the end position (in base pairs), and column 4 contains the cytoband name 
#'   (e.g. p21.3).  Additional columns may be present, but they are not used.
#'
#' @param num.perms A positive integer that represents the number of cyclic shifts used to create the 
#'   empirical null distribution.
#'
#' @param num.iters A positive integer that represents the number of distinct gain (loss) loci that 
#'   will be assessed.
#'
#' @param gain.loss A character string that indicates whether recurrent gains (\code{gain.loss = "gain"}) 
#'   or recurrent losses (\code{gain.loss = "loss"}) are assessed.
#'
#' @param reformat.annot A logical value that indicates whether annot.file needs to be reformatted 
#'   (default = FALSE).  See the "note" section of \code{\link{makeCytoband}} for additional information.
#'
#' @param random.seed An optional random seed (default = NULL).
#'
#' @return A matrix with \code{num.iters} rows.  The entries of each row correspond to the marker that is 
#'   being assessed.  More specifically, the entries are (1) the chromosome number, (2) the marker position 
#'   (in base pairs), (3) additional marker information present in \code{marker.data}, (4) the marker number, 
#'   and (5) the p-value obtained from the null distribution, (6) the endpoints of the peak interval (in base 
#'   pairs), as described in Bioinformatics (2011) 27(5) 678 - 685.
#'
#' @details This function applies the \emph{Detailed Look} version of DiNAMIC's cyclic shift procedure to assess 
#'   the statistical significance of recurrent DNA copy number aberrations.  Either recurrent gains 
#'   (\code{gain.loss = "gain"}) or recurrent losses (\code{gain.loss = "loss"}) are assessed using a null 
#'   distribution based on \code{num.perms} cyclic shifts of \code{x}.  Iterative calls to DiNAMIC's 
#'   \emph{peeling} procedure (implemented here in the \code{\link{peeling}} function) allow users to assess 
#'   the statistical significance of num.iters distinct gains (losses).  As noted in Bioinformatics (2011) 27(5) 
#'   678 - 685, the Detailed Look procedure recalculates the null distribution after each iteration of the peeling 
#'   procedure.  While this approach is more computationally intensive, simulations suggest that it provides more 
#'   power to detect recurrent gains (losses).
#'
#' @examples
#' detailedLook(wilms.data, wilms.markers, annot.file, 100, 3)   
#'
#' @export

detailedLook = function (x, marker.data, annot.file, num.perms, num.iters, gain.loss = "gain", 
    reformat.annot = FALSE, random.seed = NULL) 
	{
    n = dim(x)[1]
    m = dim(x)[2]
    r = dim(marker.data)[2]
    small.marker.data = as.matrix(marker.data[, 1:2])
    chrom.vec = small.marker.data[, 1]
    cytoband = makeCytoband(small.marker.data, annot.file, reformat.annot)
    marker.matrix = as.matrix(marker.data)
    gain.loss.ind = as.numeric(gain.loss == "gain") - as.numeric(gain.loss == 
        "loss")
    data.matrix = x * gain.loss.ind
    output.matrix = c()
    for (i in (1:num.iters)) {
        null.dist = findNull(data.matrix, num.perms, random.seed)
        col.sums = colSums(data.matrix)
        obs.max = max(col.sums)
        k = which.max(col.sums)
        p.val = min(mean(obs.max < null.dist) + 1/num.perms, 
            1)
        peeling.data = peeling(data.matrix, marker.matrix, cytoband, 
            k)
        data.matrix = peeling.data[[1]]
        interval = peeling.data[[2]]
        output.matrix = rbind(output.matrix, c(marker.matrix[k, 
            ], k, p.val, marker.matrix[interval[1], 2], marker.matrix[interval[2], 
            2]))
		}
		colnames(output.matrix) = c(colnames(marker.data), "Marker", 
        "p-Val", "Peak Int. (L)", "Peak Int. (R)")
    return(output.matrix)
	}

Try the dinamic package in your browser

Any scripts or data that you put into this service are public.

dinamic documentation built on May 29, 2024, 8:45 a.m.