R/SynEx-internal.R

Defines functions .getSegment .chr.tosegment .tosegment .larsCBS .bic.rss

.bic.rss <-
  function(RSS, n, Cn, edf, S)
  {
    ll <- - (log(RSS/n) + log(n)*edf*Cn/n)
    new.r <- ((ll[length(ll)] - ll[-1])/(ll[length(ll)] -
                                           ll[2])) * (length(ll) - 1) + 1
    diff2 <- diff(new.r, diff = 2) > S
    if (!any(diff2))
      return(0)
    maxll = max(which(diff2)) + 1
    return(maxll)
  }

.larsCBS <-
  function(z, selection = .selection.default(), collapse.k = 0, variation.control = TRUE, rss = FALSE, S = 0.1, ...)
  {
    suppressPackageStartupMessages(require(lars))
    make.CNA <- function(data){
      chrom <- rep(1, length(data))
      maploc <- c(1:length(data))
      return(CNA(data, chrom, maploc))
    }
    if(variation.control == TRUE){
      aa <- rnorm(round(length(z)/5), mean=0.5-round(median(z), 1), sd=0.5*sd(z))
      bb <- rnorm(round(length(z)/5), mean=0.5-round(median(z), 1), sd=0.5*sd(z))
      pseudoz <- c(aa, z, bb)
      y <- pseudoz
    }
    else{
      y <- z
    }
    if(collapse.k!=0){
      coly <- collapse(y, collapse.k)
      oCBS <- segment(make.CNA(coly))
      psi <- c(oCBS$segRows[, 1])[-1]*collapse.k
    }
    else{
      oCBS <- segment(make.CNA(y), verbose = 0, ...)
      psi <- c(oCBS$segRows[, 1])[-1]
    }
    if (length(psi)==0){
      # cat("No estimated breakpoint in the given sequence")
      result <- list(id.entry = NULL, psiCBS = NULL, psiLARS = NULL,
                     n.psi = 0, means = round( mean(y), 4), medians = round(median(y), 4))
    }
    else{
      n <- length(y)
      x <- 1:n
      k <- length(psi)
      Z <- matrix(rep(x, k), nrow = n)
      PSI <- matrix(rep(psi, rep(nrow(Z), ncol(Z))), ncol = ncol(Z))
      V <- ifelse((Z > PSI), 1, 0)
      ### Modified from jumpoints() in package cumSeg ###
      psi0 <- psi
      display1 <- selection$display
      edf.psi <- selection$edf.psi
      type <- selection$type
      Cn <- eval(parse(text = selection$Cn))
      S <- S
      tipoAlg <- selection$alg

      olars <- lars(abs(V), y = y, type = tipoAlg, normalize = FALSE,
                    intercept = TRUE, trace = display1)
      id.var.entry <- (1:ncol(V))[order(olars$entry)]
      if(edf.psi)
        edf <- (olars$df - 1) * 2 + 1
      else edf <- olars$df

      RSS <- olars$RSS
      if(rss == TRUE)     min.r <- .bic.rss(RSS, n, Cn, edf, S)
      else      min.r <- which.min(log(RSS/n) + log(n) * edf * Cn/n)

      id <- sort(c(0, id.var.entry)[1:min.r])
      id <- id[-1]
      psi1 <- psi[id]

      if(variation.control == TRUE){
        psi1 <- psi1[psi1 < (round(length(z)/5)+length(z))-20]-round(length(z)/5)-5
        psi <- psi[psi < (round(length(z)/5)+length(z))-20]-round(length(z)/5)-5
        psi1 <- psi1[psi1>20]
        psi <- psi[psi>20]
        if(collapse.k!=0){
          psi1 <- psi1[psi1 > collapse.k*2 & psi1 < (collapse.k*2+length(z))]
          psi <- psi[psi > collapse.k*2 & psi < (collapse.k*2+length(z))]
        }
      }
      start <- c(0, psi1)
      end <- c(psi1, length(z))
      k <- length(start)
      ave <- tapply(1:k, 1:k, function(i){ mean(z[start[i]:end[i]]) })
      med <- tapply(1:k, 1:k, function(i){ median(z[start[i]:end[i]]) })
      result <- list(id.entry = id.var.entry, psiCBS = psi, psiLARS = psi1,
                     n.psi = length(psi1), means = round(ave, 4), medians = round(med, 4))
      result$data <- z
    }
    class(result) <- "larsCBSsegmented"
    return(result)
  }

.selection.default <-
  function (display = FALSE, Cn = "log(log(n))", alg = c("stepwise", "lasso"), edf.psi = TRUE) {
    ### Modified from sel.control() in package cumSeg ###
    alg <- match.arg(alg)
    list(display = display, Cn = Cn, edf.psi = edf.psi, alg = alg)
  }


.tosegment <- function(y){
  diffy <- diff(y)
  changePoints <- c(1:length(y))[diffy != 0]
  changePoints <- changePoints[changePoints!=1 & changePoints != length(y)]
  z <- data.frame(c(1, changePoints), c(changePoints, length(y)), y[c(changePoints, length(y))])
  colnames(z) <- c("start", "end", "value")
  return(z)
}

.chr.tosegment <- function(y, ratioData){
  chrs <- unique(ratioData[, 1])
  allsegments <- NULL
  for(i in 1:length(chrs)){
    suby <- y[ratioData[, 1]%in%chrs[i]]
    subRatio <- ratioData[ratioData[, 1] %in% chrs[i], ]
    segmentSuby <- .tosegment(suby)
    segmentRes <- data.frame(subRatio[segmentSuby[, 1], 1], subRatio[segmentSuby[, 1], 2], subRatio[segmentSuby[, 2], 2], segmentSuby[, 3])
    colnames(segmentRes) <- c("chr", "start", "end", "ratio")
    allsegments <- rbind(allsegments, segmentRes)
  }
  return(allsegments)
}

.getSegment <- function(df, value_field = c("ratio", "normalized"), segmentMethod = c("CBS", "SomaticaEx", "TrendFiltering")) {

  subRatio <- df[!is.na(df[, value_field]) & is.finite(df[, value_field]) &
                   df[, value_field] > 0.1, c("chr", "start", "end", value_field)]
  bin.size <- df$end[1] - df$start[1] + 1
  if(segmentMethod == "CBS"){
    suppressPackageStartupMessages(require(DNAcopy))
    seg <- segment(CNA(log2(subRatio[, value_field]+0.0001), subRatio[, "chr"], subRatio[, "start"]), verbose = 0)
    segRes <- seg$output[, c(2:4, 6)]
    segRes[, 2] <- segRes[, 2] - 1
    segRes[, 3] <- segRes[, 3] + bin.size - 1
  }

  if(segmentMethod == "SomaticaEx"){
    segRes <- larsCBSsegment(cbind(subRatio[, c("chr", "start")], log2(subRatio[, value_field]+0.0001)), verbose = F, k = 10)
    segRes[, 4] <- round(as.numeric(as.character(segRes[, 4])), 4)
    segRes[, 2:3] <- apply(segRes[, 2:3], 2, as.numeric)
    segRes[, 2] <- segRes[, 2] - 1
    segRes[, 3] <- segRes[, 3] - 1
    segRes[, 1] <- as.character(segRes[, 1])
  }

  if(segmentMethod == "TrendFiltering"){
    suppressPackageStartupMessages(require(flsa))
    lambda2 <- 0:60/10
    ratioDiff <- diff(subRatio[, value_field])
    ratioDiff <- ratioDiff[!is.nan(ratioDiff) & is.finite(ratioDiff)]
    noiseLevel <- sqrt(sum(abs(ratioDiff)^2)/length(ratioDiff))
    flsaRes <- flsa(log2(subRatio[, value_field]+0.0001), lambda2 = lambda2)
    sdList <- apply(flsaRes, 1, function(x){
      return(sd(x - log2(subRatio[, value_field]+0.0001)))
    })
    best.num <- which.min(abs(sdList - noiseLevel))
    denoised.ratio <- flsaRes[best.num, ]
    segRes <- .chr.tosegment(round(denoised.ratio, 2), subRatio)
    segRes[, 2] <- segRes[, 2] - 1
    segRes[, 3] <- segRes[, 3] - 1
  }
  segRes <- data.frame(segRes)

  colnames(segRes) <-  c("chr", "start", "end", "log2ratio")
  segRes <- segRes[segRes[, "end"] != 0, ]
  segRes <- segRes[segRes[, "end"] > segRes[, "start"], ]
  return(segRes)
}
ChenMengjie/SynthEx documentation built on Oct. 1, 2020, 8:45 a.m.