R/parameters.estimation.R

Defines functions estimate.parameters iter.update.abs.cna update.abs.cna segs.to.cna.matrix null.seg.matrix call.sig.abs.segments max.mode compute.gauss.kernel cna.matrix.to.segs iter.extract.single.samp.params cancel.recurrent.breaks all.samp.param.estimation extract.parameter.at.kws single.samp.param.estimation.abs single.samp.param.estimation calc.single.scale.roughness gen.perm.profiles shuffle.cna

# Copyright 2015 Netherlands Cancer Institute
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
# http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


#' Shuffle in a slot-machine way the log ratios per sample.
#' 
#' @param cna.matrix A SxP \code{matrix} containg log ratios as returned by \code{extract.matrix}.
#' 
#' @return A CNA matrix with shuffled log ratios (twice).
#' @noRd
shuffle.cna <- function(cna.matrix, random.vector=NULL) {
  # TODO: set seed for random number generator if wanted.
  #       For comparison with matlab might be better to use
  #       a vector of precomputed random numbers previously generated
  #       using the random.vector parameter
  #       set.seed(x)
  s.num <- nrow(cna.matrix)
  p.num <- ncol(cna.matrix)
  # In case a shuffle order has been precomputed it uses
  # the random.vector to shuffle cna.matrix in the wanted order
  # otherwise it computes a new one
  if (isempty(random.vector)) {
    random.vector <- randi(p.num, s.num, 1)
  } else {
    if (length(random.vector) < s.num) {
      stop(paste('The random.vector must be at least', s.num, 'intergers long.'))
    }
  }
  
  random.vector <- as.vector(random.vector)
  
  for (i in seq_len(s.num)) {
    cna.matrix[i,] <- cna.matrix[i, mod((random.vector[i]:(random.vector[i] + p.num - 1)) - 1, p.num) + 1]
  }
  
  cna.matrix
}


#' Generates \code{num.iter} permutations of the map.loc data.matrix and returns aggregated log ratios.
#' 
#' @param cna.matrix The \code{matrix} containg log ratios as returned by \code{extract.matrix}.
#' @param num.iter The total number of permutations to generate.
#' 
#' @return The \code{list} containg aggregated log ratios for all permutations across samples and
#'         their cumulative sum.
#' @noRd
###
# MATLAB MAPPING
# Generates random permutations inside.
# This function returns agrProfiles as generated by genPermProfiles.
# nrow(test.env$random.matrix) == num.iter
gen.perm.profiles <- function(cna.matrix, num.iter, test.env=NULL) {
  # NOTE: The shuffled CNA matrix will have the columns repeated twice
  if(!is.null(test.env$random.matrix) && nrow(test.env$random.matrix) < num.iter) {
    stop(paste('The random.matrix in test.env must have at least', num.iter, 'rows.'))
  }
  agr.profiles <- zeros(num.iter, ncol(cna.matrix))
  for (i in seq_len(num.iter)) {
      agr.profiles[i,] <- colSums(shuffle.cna(cna.matrix, test.env$random.matrix[1,]))
      test.env$random.matrix <- test.env$random.matrix[-1,,drop=F]
  }
  agr.profiles <- cbind(agr.profiles, agr.profiles)
  agr.cum.profiles <- cbind(rep(0, num.iter), t(apply(agr.profiles, 1, cumsum)))
  
  list(agr.profiles=agr.profiles, agr.cum.profiles=agr.cum.profiles)
}


#' Generate parameters based on the current permutation
#'
#' @param matrix The cumulative sum of the matrix generated by \code{gen.perm.profiles}.
#' @param kw
#' @param kw.break
#' @param p.num The number of probes per sample
#' 
#' @return Returns a list containing various parameters to be inserted in the \code{per.sample} matrix
#' @noRd
###
# MATLAB MAPPING INFO
# matrix = params.agrCumIter as returned by genPermProfiles
calc.single.scale.roughness <- function(matrix, kw, kw.break, p.num) {
  kw.n <- kw.break;
  kw.p <- kw - kw.break;
  
  lr.sn <- -(matrix[,(kw.n + 1):(p.num + kw.n)] - matrix[,1:p.num]) / kw.n
  if (kw.p != 0) {
    lr.sp <- (matrix[,(kw.p + kw.n + 1):(p.num + kw.p + kw.n)] - matrix[,(kw.n + 1):(p.num + kw.n)]) / kw.p
    lr.s <- lr.sp + lr.sn
  } else {
    lr.s <- lr.sn
  }
  
  lr.s.diff <- t(apply(lr.s, 1, diff))
  p.s.diff <- ncol(lr.s.diff)
  lr.s.vars <- apply(lr.s, 1, var)
  lr.s.diff.vars <- apply(lr.s.diff, 1, var)
  lr.s.covs <- rowSums(lr.s[,1:(ncol(lr.s)-1)] * lr.s.diff) / (p.s.diff - 1);
  # Assamble all the pieces together and return a list
  list(mus=vector('numeric', length(lr.s.vars)), vars=lr.s.vars, diff.vars=lr.s.diff.vars, covs=lr.s.covs)
}

#'
#' @param cum.perm.profiles The cumulative sum of the matrix generated by \code{gen.perm.profiles}.
#' @param max.chrom.len The maximum number of probes per chromosome as returned by \code{max.chrom.length}.
#' @param p.num The number of probes per sample as returned by \code{probes.per.sample}.
#' @noRd
# MATLAB MAPPING INFO
# cum.perm.profiles = params.agrCumIter as returned by genPermProfiles
# [~, p.num] = size(data.cna)
single.samp.param.estimation <- function(cum.perm.profiles, max.chrom.len, p.num, kw.ratio=1/10, min.kw=1, kw.fact=2) {
  max.kw <- min(max.chrom.len, round(kw.ratio * p.num))
  num.kw <- floor(log2(max.kw / min.kw) / log2(kw.fact) + 1)
  kws <- unique(c(round(min.kw * kw.fact ^ (0:(num.kw-1))), max.kw))
  num.kws <- length(kws)
  # FIXME: should the next line be num.kws.half <- tail(which(kws <= max.kw/2), n=1)?
  num.kws.half <- sum(kws <= max.kw/2)
  per.sample <- matrix(list(), nrow=num.kws, ncol=2 * num.kws.half)
  
  for (kw.i in 2:num.kws) {
    # FIXME: should the next line be num.kws.half <- tail(which(kws <= kws[kw.i]/2), n=1)?
    num.kws.half <- sum(kws <= kws[kw.i]/2)
    for (kw.j in seq_len(num.kws.half)) {
      per.sample[[kw.i, kw.j]] <- calc.single.scale.roughness(cum.perm.profiles, kws[kw.i], kws[kw.j], p.num)
      per.sample[[kw.i, 2 * num.kws.half - kw.j + 1]] <- calc.single.scale.roughness(cum.perm.profiles, kws[kw.i], kws[kw.i] - kws[kw.j], p.num);
    }
  }
  
  list(kws=kws, per.sample=per.sample)
}

###
# MATLAB MAPPING
# max.chrom.len = getMaxChromLength(data.chrom)
# p.num = ncol(data.cna)
# agr.p.cum.iter = params.agrPCumIter
# agr.n.cum.iter = params.agrPCumIter
single.samp.param.estimation.abs <- function(max.chrom.len, p.num, agr.p.cum.iter, agr.n.cum.iter, t.sign, kw.ratio=1/10, min.kw=1, kw.fact=2) {
  max.kw <- min(max.chrom.len, round(kw.ratio * p.num))
  num.kw <- floor(log2(max.kw/min.kw)/log2(kw.fact) + 1)
  kws <- unique(c(round(min.kw * kw.fact ^ (0:(num.kw-1))), max.kw))
  num.kws <- length(kws)
  num.kws.half <- tail(which(kws <= max.kw/2), n=1)
  per.sample <- matrix(list(), nrow=num.kws, ncol=2 * num.kws.half)

  for (kw.i in 2:num.kws) {
    num.kws.half = tail(which(kws <= kws[kw.i]/2), n=1)
    for (kw.j in seq_len(num.kws.half)) {
      if (t.sign == 1) {
        per.sample[[kw.i, kw.j]] = calc.single.scale.roughness(agr.p.cum.iter, kws[kw.i], kws[kw.j], p.num)
        per.sample[[kw.i, 2*num.kws.half-kw.j+1]] = calc.single.scale.roughness(agr.p.cum.iter, kws[kw.i], kws[kw.i] - kws[kw.j], p.num)
      } else {
        per.sample[[kw.i, kw.j]] = calc.single.scale.roughness(agr.n.cum.iter, kws[kw.i], kws[kw.j], p.num)
        per.sample[[kw.i, 2*num.kws.half-kw.j+1]] = calc.single.scale.roughness(agr.n.cum.iter, kws[kw.i], kws[kw.i] - kws[kw.j], p.num)
      }
    }
  }
  
  list(kws=kws, per.sample=per.sample)
}


#' Extract sample parameter from the \code{per.sample} list matrix structure
#' @noRd
extract.parameter.at.kws <- function(per.sample, kw.i, kw.j, p.s) {
  lr.s.mus <- per.sample[[kw.i, kw.j]]$mus;
  lr.s.vars <- per.sample[[kw.i, kw.j]]$vars;
  lr.s.diff.vars <- per.sample[[kw.i, kw.j]]$diff.vars;
  lr.s.covs <- per.sample[[kw.i, kw.j]]$covs;
  
  lr.s.mu <- mean(lr.s.mus);
  lr.s.var <- mean(lr.s.vars);
  lr.s.diff.var <- mean(lr.s.diff.vars);
  lr.s.cov <- mean(lr.s.covs);
  
  denom <- sqrt(max(lr.s.var * lr.s.diff.var - lr.s.cov^2, 0))
  int.sqrt.v <- (p.s - 1) * (atan((lr.s.diff.var + lr.s.cov) / denom) - atan(lr.s.cov / denom))
  
  list(mu=lr.s.mu, var=lr.s.var, int.sqrt.v=int.sqrt.v)
}


all.samp.param.estimation <- function(kws, per.sample, p.num, min.var=1e-5) {
  num.kws <- length(kws)
  num.kws.half <- sum(kws <= max(kws)/2)
  
  mus <- matrix(0, nrow=num.kws, ncol=2 * num.kws.half)
  vars <- matrix(0, nrow=num.kws, ncol=2 * num.kws.half)
  int.sqrt.vs <- matrix(0, nrow=num.kws, ncol=2 * num.kws.half)
  
  for (kw.i in 1:num.kws) {
    p.s <- p.num - kws[kw.i] + 1
    num.kws.half = sum(kws <= kws[kw.i]/2)
    for (kw.j in seq_len(num.kws.half)) {
      param.list <- extract.parameter.at.kws(per.sample, kw.i, kw.j, p.s)
      mus[kw.i, kw.j] <- param.list$mu
      vars[kw.i, kw.j] <- param.list$var
      int.sqrt.vs[kw.i, kw.j] <- param.list$int.sqrt.v
      
      param.list <- extract.parameter.at.kws(per.sample, kw.i, 2 * num.kws.half - kw.j + 1, p.s)
      mus[kw.i,  2 * num.kws.half - kw.j + 1] <- param.list$mu
      vars[kw.i,  2 * num.kws.half - kw.j + 1] <- param.list$var
      int.sqrt.vs[kw.i,  2 * num.kws.half - kw.j + 1] <- param.list$int.sqrt.v
    }
  }
  
  list(kws=kws, mus=mus, vars=vars, int.sqrt.vs=int.sqrt.vs)
}


cancel.recurrent.breaks <- function(cna.matrix, modules) {
  num.mods <- length(modules)
  s.num <- nrow(cna.matrix)
  cna.diff <- t(apply(cbind(rep(0, s.num), cna.matrix), 1, diff))
  f.var <- zeros(s.num, num.mods)
  breaks <- rep(0, num.mods - 1)
  
  for (mod.i in seq_len(num.mods)) {
    f.var[, mod.i] <- rowMeans(cna.matrix[, modules[[mod.i]]$I:(modules[[mod.i]]$I + modules[[mod.i]]$kw - 1), drop=F])
    if (mod.i < num.mods) {
      breaks[mod.i] <- modules[[mod.i]]$I + modules[[mod.i]]$kw
    }
  }
  
  f.diff <- t(apply(f.var, 1, diff))
  cna.diff[, breaks] <- cna.diff[, breaks] - f.diff
  
  t(apply(cna.diff, 1, cumsum))
}


###
# MATLAB MAPPING
# Generates random permutations inside.
iter.extract.single.samp.params <- function(cna.matrix, map.loc.agr, max.chrom.len, fdr,
                                            samps.use=vector('numeric'),
                                            max.iter=5, num.perm=10,
                                            test.env=NULL, quiet=T) {
  
  if (fdr <= 0) {
    stop('The FDR must be > 0')
  }
  
  p.num <- ncol(cna.matrix)
  
  agr.cum.profiles <- gen.perm.profiles(cna.matrix, num.perm, test.env)$agr.cum.profiles

  single.sample.params <- single.samp.param.estimation(agr.cum.profiles, max.chrom.len, p.num)
  params <- all.samp.param.estimation(single.sample.params$kws,
                                      single.sample.params$per.sample,
                                      p.num)
  
  num.breaks.prev <- 0
  canc.prev.cna.matrix <- cna.matrix
  sig.mods.prev <- list()
  
  iter <- 1
  
  repeat {
    if (!quiet)
      print(paste('iter:', iter))
    merge.results <- merge.modules.v2(cna.matrix, map.loc.agr, p.num,
                                      samps.use, matrix(nrow=0,ncol=0), sig.mods.prev,
                                      agr.cum.profiles, params, -log10(fdr))
    modules <- merge.results$modules
    break.info <- merge.results$break.info
    sig.mods <- merge.results$sig.mods
    num.breaks <- length(break.info)
    
    if ((num.breaks <= num.breaks.prev) | (iter > max.iter)) {
      canc.cna.matrix <- canc.prev.cna.matrix
      break
    }
    
    sig.mods.prev <- sig.mods
    num.breaks.prev <- num.breaks
    canc.cna.matrix <- cancel.recurrent.breaks(cna.matrix, modules)
    
    agr.cum.profiles <- gen.perm.profiles(canc.cna.matrix, num.perm, test.env)$agr.cum.profiles
    single.samp.params <- single.samp.param.estimation(agr.cum.profiles, max.chrom.len, p.num)
    params <- all.samp.param.estimation(single.samp.params$kws,
                                        single.samp.params$per.sample,
                                        p.num)
    
    if (max(params$vars) < 1e-9) {
      agr.cum.profiles <- gen.perm.profiles(canc.prev.cna.matrix, num.perm, test.env)$agr.cum.profiles
      single.samp.params <- single.samp.param.estimation(agr.cum.profiles, max.chrom.len, p.num)
      params <- all.samp.param.estimation(single.samp.params$kws,
                                          single.samp.params$per.sample,
                                          p.num)
      break
    }
    
    canc.prev.cna.matrix <- canc.cna.matrix
    iter <- iter + 1
  }
  
  list(params=params, canc.cna.matrix=canc.cna.matrix, modules=modules)
}


cna.matrix.to.segs <- function(cna.matrix, segments) {
  num.segs <- length(segments)
  sample.num <- nrow(cna.matrix)
  seg.matrix <- zeros(sample.num, num.segs)
  for (i in seq_len(num.segs)) {
    seg.start <- segments[[i]]$I
    seg.end <- segments[[i]]$I + segments[[i]]$kw - 1
    seg.matrix[, i] <- rowMeans(cna.matrix[,seg.start:seg.end, drop=F])
  }
  seg.matrix
}


compute.gauss.kernel <- function(num.stds, kw) {
  x <- -round(kw * num.stds):round(kw * num.stds)
  g.k <- exp(-x^2 / (2 * kw^2))
  g.k <- g.k/sum(g.k)
  g.k
}


max.mode <- function(profile, filter.std) {
  if (filter.std == 0) {
    return(list(x=0, y.orig=0, z=0, peak=0))
  }
  
  hist.res <- filter.std / 100
  num.bins <- (max(profile) - min(profile)) / hist.res
  if (num.bins == 0) {
    return(list(x=0, y.orig=0, z=0, peak=0))
  }
  
  hist.results <- hist(profile, seq(min(profile), max(profile), length.out=floor(num.bins)+1), plot=FALSE)
  y <- hist.results$counts
  x <- hist.results$mids
  
  filter.std.n <- round(filter.std / median(diff(x)))
  num.stds <- 3
  wind.ext <- num.stds * filter.std.n
  
  y.l <- y[seq.int(wind.ext, 1, -1)]
  y.r <- y[seq.int(length(y), (length(y) - wind.ext), -1)]
  
  y.orig <- y
  y <- c(y.l, y, y.r)
  
  gauss.kernel <- compute.gauss.kernel(num.stds, filter.std.n)
  
  z <- conv(y, gauss.kernel)
  z <- z[(2 * wind.ext):(2 * wind.ext + length(x) - 1)]
  
  peak.i <- which(z == max(z))[1]
  peak <- x[peak.i]
  
  list(x=x, y.orig=y.orig, z=z, peak=peak)
}


###
# MATLAB MAPPING
# Generates random permutations inside.
gen.perm.abs.profiles <- function (cna.matrix, cna.matrix.agr, num.iter, amp.level, amp.sign, test.env=NULL) {
  if(!is.null(test.env$random.matrix) && nrow(test.env$random.matrix) < num.iter) {
    stop(paste('The random.matrix in test.env must have at least', num.iter, 'rows.'))
  }
  
  cna.matrix.abs <- cna.matrix
  if (amp.sign == +1) {
    cna.matrix.abs[cna.matrix.abs < amp.level] <- 0
  } else {
    cna.matrix.abs[cna.matrix.abs > amp.level] <- 0
  }
  
  perm.mu <- sum(rowMeans(cna.matrix.abs))
  perm.std <- sqrt(sum(apply(cna.matrix.abs, 1, var)))
  mode.level <- max.mode(cna.matrix.agr, perm.std)$peak
  
  if (amp.sign == +1) {
    amp.mu <- max(perm.mu, mode.level)
  } else {
    amp.mu <- min(perm.mu, mode.level)
  }
  
  mu.adjust = amp.mu - perm.mu
  
  p.num <- ncol(cna.matrix)
  agr.profiles <- zeros(num.iter, p.num)
  for (i in seq_len(num.iter)) {
    agr.profiles[i,] <- colSums(shuffle.cna(cna.matrix.abs, test.env$random.matrix[1,]))
    test.env$random.matrix <- test.env$random.matrix[-1,,drop=F]
  }
  
  agr.profiles <- cbind(agr.profiles, agr.profiles)
  agr.profiles <- agr.profiles + mu.adjust
  agr.cum.profiles <- cbind(rep(0, num.iter), t(apply(agr.profiles, 1, cumsum)))

  list(agr.profiles=agr.profiles, agr.cum.profiles=agr.cum.profiles)
}


#'
#' @param cna.matrix.abs.agr The vector
#' @param agr.cum.perms The matrix
#' @param segments The list of modules
#' @noRd
call.sig.abs.segments <- function(cna.matrix.abs.agr, agr.cum.perms, segments, fdr, amp.sign) {
  num.segs <- length(segments)
  sig.is <- rep(F, num.segs)
  p.num <- length(cna.matrix.abs.agr)
  num.perm <- nrow(agr.cum.perms)
  
  for (i in seq_len(num.segs)) {
    kw <- segments[[i]]$kw
    seg.start <- segments[[i]]$I
    seg.end <- segments[[i]]$I + segments[[i]]$kw - 1
    seg.agr <- mean(cna.matrix.abs.agr[seg.start:seg.end])
    cna.perm.s <- (agr.cum.perms[,(kw+1):(p.num+kw)] - agr.cum.perms[,1:p.num]) / kw
    
    if (amp.sign == +1) {
      e.emp <- sum(t(colSums(t(apply(cna.perm.s >= seg.agr, 1, diff)) > 0)))  + sum(cna.perm.s[,1] >= seg.agr)
    } else {
      e.emp <- sum(t(colSums(t(apply(cna.perm.s <= seg.agr, 1, diff)) > 0)))  + sum(cna.perm.s[,1] <= seg.agr)
    }
    
    e.emp <- e.emp / num.perm

    if (e.emp <= fdr/2) {
      sig.is[i] = T
    }

  }

  sig.is
}


null.seg.matrix <- function(seg.matrix, segments, sig.i) {
  seg.matrix.null <- seg.matrix
  s.num <- nrow(seg.matrix)
  null.is = which(!sig.i)
  
  for (i in seq_len(length(segments))) {
    if (!sig.i[i]) {
      next
    }
  
    null.l.i = null.is[tail(which(null.is < i), n=1)]
    null.r.i = null.is[which(null.is > i)[1]]
    
    if (is.na(null.l.i) || isempty(null.l.i)) {
      kw.l <- 1
      mu.l <- rep(0, s.num)
    } else {
      kw.l <- segments[[null.l.i]]$kw
      mu.l <- seg.matrix[, null.l.i]
    }
  
    if (is.na(null.r.i) || isempty(null.r.i)) {
      kw.r <- 1
      mu.r <- rep(0, s.num)
    } else {
      kw.r <- segments[[null.r.i]]$kw
      mu.r <- seg.matrix[, null.r.i]
    }
  
    mu.seg <- (mu.l * kw.l + mu.r * kw.r) / (kw.l + kw.r)
    
    seg.matrix.null[, i] <- mu.seg
  }
  
  seg.matrix.null
}


segs.to.cna.matrix <- function(seg.matrix, segments, p.num) {
  num.segs <- length(segments)
  s.num <- nrow(seg.matrix)
  cna.matrix <- zeros(s.num, p.num)
  
  for (i in seq_len(num.segs)) {
    seg.start <- segments[[i]]$I
    seg.end <- segments[[i]]$I + segments[[i]]$kw - 1
    seg.mus <- seg.matrix[, i]
    cna.matrix[, seg.start:seg.end] <- matrix(seg.mus, nrow=length(seg.mus), ncol=segments[[i]]$kw)
  }
  
  cna.matrix
}


###
# MATLAB MAPPING
# Generates random permutations inside.
update.abs.cna <- function(data.cna.matrix, amp.level, del.level,
                           seg.matrix, cna.matrix,
                           cna.matrix.p.agr, cna.matrix.n.agr,
                           segments, fdr, p.num,
                           num.iter=10, test.env=NULL) {

  agr.p.cum.perms <- gen.perm.abs.profiles(cna.matrix, cna.matrix.p.agr, num.iter, amp.level, +1,
                                           test.env=test.env)$agr.cum.profiles
    
  agr.n.cum.perms <- gen.perm.abs.profiles(cna.matrix, cna.matrix.n.agr, num.iter, del.level, -1,
                                           test.env=test.env)$agr.cum.profiles

  sig.p.is <- call.sig.abs.segments(cna.matrix.p.agr, agr.p.cum.perms, segments, fdr, +1)
  sig.n.is <- call.sig.abs.segments(cna.matrix.n.agr, agr.n.cum.perms, segments, fdr, -1)
  num.sig.segs <- sum(sig.p.is | sig.n.is)
  
  seg.matrix.null <- null.seg.matrix(seg.matrix, segments, sig.p.is | sig.n.is)
  seg.matrix.diff <- seg.matrix.null - seg.matrix
  cna.matrix.diff <- segs.to.cna.matrix(seg.matrix.diff, segments, p.num)
  cna.matrix <- data.cna.matrix + cna.matrix.diff
  
  list(cna.matrix=cna.matrix, num.sig.segs=num.sig.segs)
}


###
# MATLAB MAPPING
# Generates random permutations inside.
iter.update.abs.cna <- function(cna.matrix, amp.level, del.level, segments, fdr,
                                max.iter=5, num.perm=10, test.env=NULL) {
  p.num <- ncol(cna.matrix)
  cna.matrix.p <- cna.matrix
  cna.matrix.p[cna.matrix.p < amp.level] <- 0
  cna.matrix.p.agr <- colSums(cna.matrix.p)

  cna.matrix.n <- cna.matrix
  cna.matrix.n[cna.matrix.n > del.level] <- 0
  cna.matrix.n.agr <- colSums(cna.matrix.n)

  seg.matrix <- cna.matrix.to.segs(cna.matrix, segments)
  
  num.sig.segs.prev <- 0

  update.results <- update.abs.cna(cna.matrix, amp.level, del.level,
                                   seg.matrix, cna.matrix,
                                   cna.matrix.p.agr, cna.matrix.n.agr,
                                   segments, fdr, p.num,
                                   test.env=test.env)
  cna.matrix.new <- update.results$cna.matrix
  num.sig.segs <- update.results$num.sig.segs
  
  
  iter <- 1
  while (num.sig.segs > num.sig.segs.prev) {
    
    if (iter > max.iter) {
      break
    }

    num.sig.segs.prev <- num.sig.segs
    
    update.results <- update.abs.cna(cna.matrix, amp.level, del.level,
                                     seg.matrix, cna.matrix.new,
                                     cna.matrix.p.agr, cna.matrix.n.agr,
                                     segments, fdr, p.num,
                                     test.env=test.env)
    cna.matrix.new <- update.results$cna.matrix
    num.sig.segs <- update.results$num.sig.segs
    
    iter <- iter + 1
  }
  
  p.profiles.results <- gen.perm.abs.profiles(cna.matrix.new, cna.matrix.p.agr, num.perm, amp.level, +1,
                                              test.env=test.env)
  agr.p.profiles <- p.profiles.results$agr.profiles
  agr.p.cum.profiles <- p.profiles.results$agr.cum.profiles
  
  n.profiles.results <- gen.perm.abs.profiles(cna.matrix.new, cna.matrix.n.agr, num.perm, del.level, -1,
                                              test.env=test.env)
  agr.n.profiles <- n.profiles.results$agr.profiles
  agr.n.cum.profiles <- n.profiles.results$agr.cum.profiles
  
  list(cna.matrix=cna.matrix.new,
       agr.p.perms=agr.p.profiles, agr.p.cum.perms=agr.p.cum.profiles,
       agr.n.perms=agr.n.profiles, agr.n.cum.perms=agr.n.cum.profiles)
}


#'
#' @param map.loc The \code{data.table} containg segmented location mapping as returned by \code{ensure.min.probes}.
#' @param amp.level Threshold used for calling amplifications. All copy number segments with amplitudes below 
#'                  this value are set to zero.
#' @param del.level Threshold used for calling deletions. All copy number segments with amplitudes above 
#'                  this value are set to zero.
#' @param fdr Event based false descovery rate. This value corresponds to the expected proportion of called
#'            regions that are false positives (recurrences that occur by chance and contain no driver genes)
#' @noRd
estimate.parameters <- function(cna.matrix, map.loc.agr, max.chrom.len, amp.level, del.level, fdr,
                                samps.use=vector('numeric'), max.iter=5, num.perm=10,
                                test.env=NULL, quiet=T) {

  p.num <- ncol(cna.matrix)
  single.samp.params <- iter.extract.single.samp.params(cna.matrix, map.loc.agr, max.chrom.len, fdr,
                                                        samps.use=samps.use, max.iter=max.iter, num.perm=num.perm,
                                                        test.env=test.env, quiet=quiet)
  params <- single.samp.params$params
  modules.base <- single.samp.params$modules
  
  abs.params <- iter.update.abs.cna(cna.matrix, amp.level, del.level, modules.base, fdr,
                                    max.iter=max.iter, num.perm=num.perm, test.env=test.env)
  agr.p.cum.iter <- abs.params$agr.p.cum.perms
  agr.n.cum.iter <- abs.params$agr.n.cum.perms
  
  single.params.p <- single.samp.param.estimation.abs(max.chrom.len, p.num, agr.p.cum.iter, agr.n.cum.iter, +1)
  params.p <- all.samp.param.estimation(single.params.p$kws,
                                        single.params.p$per.sample,
                                        p.num)
  # NOTE: it might not be necessary to save the per.sample structure
  params.p$per.sample <- single.params.p$per.sample
  params.p$agr.cum.iter <- agr.p.cum.iter
  
  single.params.n <- single.samp.param.estimation.abs(max.chrom.len, p.num, agr.p.cum.iter, agr.n.cum.iter, -1)
  params.n <- all.samp.param.estimation(single.params.n$kws,
                                        single.params.n$per.sample,
                                        p.num)
  # NOTE: it might not be necessary to save the per.sample structure
  params.n$per.sample <- single.params.n$per.sample
  params.n$agr.cum.iter <- agr.n.cum.iter
  
  list(params.p=params.p, params.n=params.n)
}
NKI-CCB/RUBIC documentation built on Dec. 17, 2021, 5:17 a.m.