R/CNAnorm-methods.R
In CNAnorm: A normalization method for Copy Number Aberration in cancer samples

#### "helper" functions that will be called by function (called by methods) or
# other functions not exported in the environment, and inaccessible to user


.discreteNorm <- function(object, normBy = object) {
    toNorm = object
    if (length(valid.peaks(normBy)) == 0) {
        normBy <- validation(normBy)
    } 
    ploidy <- valid.ploidy(normBy)
    peaks <- valid.peaks(normBy)

    if (length(ploidy) != length(peaks)) {
        stop("Peaks and Ploidy vector have different length")
    } else if (length(ploidy) == 1){
        shift = 0
        scale = 1/peaks[1]
    } else {
        MQR <- getMQR(ploidy, peaks)
        interpDiploidXloc <- c(0,2) * MQR[1] + MQR[2] # y = mx + q
        shift = interpDiploidXloc[1]
        scale = 1 / ( interpDiploidXloc[2] - interpDiploidXloc[1] )
    }
    
    toNorm@DerivData@ratio.n <- (ratio(toNorm) - shift) * scale
    toNorm@Res@validated.ratioMedian <- (normBy@Res@ratioMedian - shift) * scale
    toNorm@Res@validated.closestPeak <- (normBy@Res@closestPeak - shift) * scale
    if ( length(ratio.s(toNorm)) > 0 ) {
        toNorm@DerivData@ratio.s.n <- (ratio.s(toNorm) - shift) * scale
    } 
    if ( length(segMean(toNorm)) > 0 ) {
        toNorm@DerivData@segMean.n <- ( 2^segMean(toNorm) - shift ) * scale
    }

    
    toNorm@Res@shifting = shift
    toNorm@Res@scaling = scale
    return (toNorm)
}

.validation <- function (object, peaks = sugg.peaks(object), ploidy = sugg.ploidy(object)) {
    if (length(peaks) != length(ploidy)) {
        stop ("Peaks and ploidy must have same length\n" )
    } else if (any(round(ploidy) != ploidy)) {
        stop ("Ploidy must be integer\n")
    } else {
        object@Res@validated.peaks = peaks
        object@Res@validated.ploidy = ploidy
        MQR <- getMQR(ploidy, peaks)
        interpDiploidXloc <- c(0,2) * MQR[1] + MQR[2]
        shift = interpDiploidXloc[1]
        scale = 1 / ( interpDiploidXloc[2] - interpDiploidXloc[1] )
        if (length(interpDiploidXloc) == 1) {
            content = "Unknown"
        } else if (length( interpDiploidXloc) == 2){
            content = as.character(
                signif(100*(1-interpDiploidXloc[1]/interpDiploidXloc[2]), 4))
        } else {
            stop("interpDiploidXloc has more than 2 elements!")
        }

        object@Res@validated.tumContent <- content
        object@Res@validated.ratioMedian <- 
            (object@Res@ratioMedian - shift) * scale
        object@Res@validated.closestPeak <- 
            (object@Res@closestPeak - shift) * scale
        object@Res@validated.R           <- signif(MQR[3], 4) 
    }

    return (object)
}

cumGenPosition <- function (object){
    Chrs <- chrs(object)
    Pos <- pos(object, show = "start")
    numWin <- length(Pos)
    toAdd <- 0
    uChrs <- unique(Chrs)
    numUChrs <- length(uChrs)
    newPos <- rep(NA, numWin)
    for (i in 1:numUChrs){
        isThisChr <- Chrs == uChrs[i]
        newPos[isThisChr] <- Pos[isThisChr] + toAdd
        toAdd <- toAdd + max(Pos[isThisChr], na.rm = TRUE)
    } 
    return (newPos)
}

.plotGenome <- function (object, maxRatio = 8, minRatio = -1, 
    superimpose = character(0), gPar = NULL, numHorLables = 10, 
    colorful = FALSE, show.centromeres = TRUE, idiogram = NULL,
    fixVAxes = FALSE, supLineColor = character(0), 
    supLineCex = character(0), dot.cex = .2, ...) {
    # superimpose can be one of the following: "DNACopy" or "smooth"    

    if (is.null(gPar)){ # load default values
        gPar <- makeDefaultGraphParamteres()
    }

    
    if (length(supLineColor) == 3){
        warning("Parameter supLineColor is deprecated, see ?gPar")
        gPar$genome$colors$segLine <- supLineColor[1]
        gPar$genome$colors$smoothLine <- supLineColor[2]
        gPar$genome$colors$neutral.dot <- supLineColor[3]
    } 
    if (length(supLineCex) == 2){
        warning("Parameter supLineCex is deprecated, see ?gPar")
        gPar$genome$lwd$segLine <- supLineCex[1]
        gPar$genome$lwd$smoothLine <- supLineCex[2]
    }

    if (dot.cex != .2){
        # the user probably set it
        warning("Parameter dot.cex is deprecated, see ?gPar")
        gPar$genome$cex$neutral.dot <- dot.cex
    }

    # temporary
    # gPar <- data(gPar)
    tPar <- gPar$genome

    if (length(ratio.n(object)) == 0){
        stop("Object does not contains normalized ratio. See ?discreteNorm\n")
    } else if (sum(!is.na(ratio.n(object))) == 0 ){
        warning("Object does not contains valid ratio\n")
#         labelsList <- makeNumericLabels(axTicks(1), 3)
#         xAxLab <- paste (tPar$lab$xAxes.low, " (", labelsList$unit , ") of ",
#             unique(chrs(object)), sep = "") 
        plot(c(0,1), c(0,1),  xaxt="n", yaxt="n", ylab="", xaxs = "i", 
            type = 'n', xlab = '')
        noPointText <- paste("No valid values to plot for", 
            paste(unique(chrs(object)), collapse = ', '))
        text(0.5, 0.5, noPointText, pos = 1)
        return(1)

    }

    par(mar = tPar$mar)
    
    bolT <- ratio.n(object) < maxRatio & ratio.n(object) > minRatio
    
    where <- which(bolT)
    outOfPlotPlus <- which(ratio.n(object) > maxRatio)
    outOfPlotMinus <- which(ratio.n(object) < minRatio)
#     xAxes <- 1:length(ratio.n(object))
    xAxes <- cumGenPosition(object)
    numDots <- length(xAxes)

    yAxesoutOfPlotPlus <- rep(maxRatio, length(outOfPlotPlus))
    yAxesoutOfPlotMinus <- rep(minRatio, length(outOfPlotMinus))
    xRange <- range(xAxes[where], na.rm = TRUE)
    yRange <- range(ratio.n(object)[where], na.rm = TRUE)
    yRange[1] <- yRange[1] - 0.1
    yRange[2] <- yRange[2] + 0.1

    if (fixVAxes){
        plot(xRange, c(minRatio - 0.1, maxRatio + 0.1), type = 'n', xlab = "",
            xaxt="n", yaxt="n", ylab="", xaxs = "i", ...) 
        } else {
        plot(xRange, yRange, type = 'n', xlab = "", xaxt="n", yaxt="n", ylab="", 
        xaxs = "i", ...) 
    }
    if (colorful){
        AllX <- xAxes[where]
        AllY <- ratio.n(object)[where]
        onlyLossX <- AllX[AllY < object@Res@validated.closestPeak]
        onlyLossY <- AllY[AllY < object@Res@validated.closestPeak]
        onlyGainX <- AllX[AllY >= object@Res@validated.closestPeak]
        onlyGainY <- AllY[AllY >= object@Res@validated.closestPeak]
        points(onlyLossX, onlyLossY, cex = tPar$cex$loss.dot, pch = tPar$pch$loss.dot, 
            col = tPar$colors$loss.dot)
        points(onlyGainX, onlyGainY, cex = tPar$cex$gain.dot, pch = tPar$pch$gain.dot, 
            col = tPar$colors$gain.dot)
    } else {
        points(xAxes[where], ratio.n(object)[where], cex = tPar$cex$neutral.dot, 
            pch = tPar$pch$neutral.dot, col = tPar$colors$neutral.dot)
    }
    if (length(c(outOfPlotPlus, outOfPlotMinus)) > 0 ) {
        points(xAxes[outOfPlotPlus], yAxesoutOfPlotPlus + 0.1, pch = tPar$pch$over.dot, 
            cex = tPar$cex$over.dot, col = tPar$color$over.dot)
        points(xAxes[outOfPlotMinus], yAxesoutOfPlotMinus - 0.1, pch = tPar$pch$below.dot, 
            cex = tPar$cex$below.dot, col = tPar$color$below.dot)
    }
    # numDots <- length(where)
    xTickWidth <- numDots/(numHorLables + 1)
#    breakPoints <- seq(from = 1, to = max(xAxes, na.rm = TRUE), length.out =
#        numHorLables + 1) 
    breakPoints <- seq(from = 1, to = max(xAxes, na.rm = TRUE), 
        length.out = numHorLables + 1)
    middlePoints = round(breakPoints[1:length(breakPoints)-1] + 
        breakPoints[2:length(breakPoints)])/2
       
    if (length(unique(chrs(object))) > 1){
        lableNames <- chrs(object)
        tickNames <- pos2chr(chrs(object), xAxes, middlePoints)
        tickLocation = rep(NA, length(tickNames))
        for (i in 1: length(tickNames)) {
#             tickLocation[i] = round(mean(which(chrs(object) %in% tickNames[i])) )
            thisChrPos <- which(chrs(object) %in% tickNames[i])
            tickLocation[i] <- mean(xAxes[thisChrPos])
        }
        axis(1, at = tickLocation, labels = tickNames)
        xAxLab <- tPar$lab$xAxes.low
    } else {
        labelsList <- makeNumericLabels(axTicks(1), 3)
        tickNames <- labelsList$labels
# 
        tickLocation <-  axTicks(1)
        axis(1, at = tickLocation, labels = tickNames)
        xAxLab <- paste (tPar$lab$xAxes.low, " (", labelsList$unit , ") of ",
            unique(chrs(object)), sep = "") 
    }

    title(xlab = xAxLab)

    if (fixVAxes){
        yMax = maxRatio
    } else {
        yMax = floor(max(ratio.n(object)[where], na.rm = TRUE))
    }
    axis(2, at = seq(0, yMax, by = .5), labels=2*(seq(0, yMax, by = .5)), 
        col.axis = 'blue')
    mtext(tPar$lab$yAxes.left, side = 2, line = 3, cex = tPar$cex$yAxes.left,
        col= tPar$colors$yAxes.left)
    roundPloidy <- signif(object@Res@validated.closestPeak, digits=2)
    axis(4, at = seq(0, yMax, by = roundPloidy), 
        labels=signif(seq(0, yMax, by = roundPloidy)/roundPloidy, digits = 2))
    mtext(tPar$lab$yAxes.right, side=4, line=3, cex = tPar$cex$yAxes.right, 
        col = tPar$colors$yAxes.right)

    abline(h = seq(0, yMax, by = .5), col = tPar$colors$grid, lwd = tPar$lwd$grid, 
        lty = tPar$lty$grid)
    
   
    # plot vertical lines on the edge between chromosomes
    # allChr <- Res$data$Chr[where]
    allChr <- chrs(object)
    wChrChange <- which(allChr[1:length(allChr)-1] != allChr[2:length(allChr)])
    chrChange <- c(1, xAxes[wChrChange], max(xAxes, na.rm = TRUE))
    abline(v = chrChange, col = tPar$colors$chrVertLines, lty = tPar$lty$chrVertLines,
        lwd = tPar$lwd$chrVertLines)
    
    abline(h = object@Res@validated.closestPeak, col = tPar$colors$closestPeak, 
        lwd = tPar$lwd$closestPeak, lty = tPar$lty$closestPeak)
    abline(h = object@Res@validated.ratioMedian, col = tPar$colors$ratioMedian,
        lwd = tPar$lwd$ratioMedian, lty = tPar$lty$ratioMedian )

    # show the centromere
    if (show.centromeres){
        if (is.null(idiogram)){
            hg19_hs_ideogr <- NULL # this is to avoid a NOTE in CMD CHECK
            rm(hg19_hs_ideogr)# this is to avoid a NOTE in CMD CHECK
            data(hg19_hs_ideogr)
            idiogram <- hg19_hs_ideogr
        }
        uChrs <- unique(chrs(object))
        centrm.location <- rep(NA, length(uChrs))
        for (i in 1:length(uChrs)){
            wCentr <- which(uChrs[i] == as.character(idiogram$chrom) & 
                as.character(idiogram$gieStain) == "acen")
            centrm.location[i] <- idiogram$chromEnd[wCentr[1]] + 
                min(xAxes[chrs(object) == uChrs[i]]) - min(xAxes[chrs(object) == uChrs[1]])
        }
        abline(v = centrm.location, col = tPar$colors$centrm, lwd = tPar$lwd$centrm,
            lty = tPar$lty$centrm)
    }


    # superimpose DNAcopy and/or smooth
    if (any(superimpose == 'smooth', na.rm = TRUE)){
        lines(xAxes[where], ratio.s.n(object)[where], lwd = tPar$lwd$smoothLine, 
            col = tPar$colors$smoothLine) #, type = 'l', lw = thisLw, col = thisCol)
    }
    if (any(superimpose == 'DNACopy', na.rm = TRUE)){
        if (length(segMean.n(object)) != length(object)) {
            stop ("No DNAcopy segmentation values. Please use 'addDNACopy' (before normalisation step)")
        } 

        myX <- xAxes[where]
        myY <- segMean.n(object)[where]
        transitions <- .startStop(myY)
        for (i in 1:length(transitions$start)){
            SEx <- c(myX[transitions$start[i]], myX[transitions$end[i]])
            SEy <- c(myY[transitions$start[i]], myY[transitions$end[i]])
            lines(SEx, SEy, lwd = tPar$lwd$segLine, col = tPar$colors$segLine) # type = 'l', lw = thisLw, col = thisCol)
        }
    }

    if (!all(superimpose %in% c('smooth', 'DNACopy'))) {
        warning("'superimpose' must be either 'smooth' or 'DNACopy'. skipping...\n")
    }
}

pos2chr <- function (chrName, cumPosition, look4Pos){
    look4chr <- rep(NA, length(look4Pos))
    for (i in 1:length(look4Pos)){
        wMin <- which(abs(look4Pos[i] - cumPosition) == min(abs(look4Pos[i] - cumPosition)))
        look4chr[i] <- chrName[wMin[1]] # wMin could be a two elements array if look4Pos[i] in half way between two points
    }
    return (look4chr)
}

.pos <- function(object, show = "start"){
    show.choices <- c("start", "end")
    if (match.arg(tolower(show), show.choices) == "start"){
        return (object@InData@Pos)
    } else if (match.arg(tolower(show), show.choices) == "end") {
        mws <- medianWinSize(object)
        return (object@InData@Pos + mws)
    }
}

.arms <- function (object, banding_df) {
    # banding_df is a data frame. see hg19_hs_ideogr for info
    arms <- paste(banding_df$chrom, substr(banding_df$name, 1, 1), sep="")
    uArms <- unique(arms)
    newNames <- rep(NA, length(object))
    # first fill those not in banding_df
    notInBanding <- ! (chrs(object) %in% unique(banding_df$chrom))
    newNames[notInBanding] <- chrs(object)[notInBanding]
    for (i in 1:length(uArms)){
        wArm <- arms == uArms[i]
        chrName <- as.character(unique(banding_df$chrom[wArm]))
        startArm <- min(banding_df$chromStart[wArm], na.rm = TRUE)
        endArm <- max(banding_df$chromEnd[wArm], na.rm = TRUE)
        whereThisArm <- chrs(object) == chrName & pos(object) > startArm & pos(object) <= endArm 
        newNames[whereThisArm] <- uArms[i]
    }
    return (newNames)
}

medianWinSize <- function(object){
    uChrs <- unique(chrs(object))
    numChr <- length(uChrs)
    medians <- rep(NA, numChr)
    for (i in 1:numChr){
        subSet <- chrs(object) %in% uChrs[i]
        medians[i] <- median(diff(pos(object[subSet]), na.rm = TRUE))
    }
    globalMedian <- median(medians, na.rm = TRUE)
    return (globalMedian)
}

makeNumericLabels <- function(x, sDigits){
    # get an array of genomic positions. It figures out the best displey (Gbp,
    # Mbp, Kbp, bp) and the number of significative digits
    hX <- max(x, na.rm = TRUE)
    lX <- min(x, na.rm = TRUE)
    sD2use <- round(log10(hX) - log10(hX - lX)) + sDigits
    if(log10(hX) > 10){ # better report in Gbp
        textX <- as.character(signif(x/1000000000, sD2use))
        myUnit <- "Gbp"
    } else if (log10(hX) > 7){ # better reprot in Mbp
        textX <- as.character(signif(x/1000000, sD2use))
        myUnit <- "Mbp"
    } else if (log10(hX) > 4) { # better report in Kbp
        textX <- as.character(signif(x/1000, sD2use))
        myUnit <- "kbp"
    } else { # veeery zoomed, just report in bp
        textX <- as.character(signif(x, sD2use))
        myUnit <- "bp"
    }
    results <- list (labels = textX, unit = myUnit)
    return (results)
}

.startStop <- function (x){
    # look for consecutive identical values in x and return its position of
    # start and end.
    # NAs produce breaks. A new start and stop will be set.
    transitionBool <- x[1:length(x)-1] != x[-1]

    transitions <- which(transitionBool | is.na(transitionBool))
#     notNApostions <- which(!is.na(x))
#     start <- c(notNApostions[1], transitions + 1)
#     end <- c(transitions, notNApostions[length(notNApostions)])
    start <- c(1, transitions + 1)
    end <- c(transitions, length(x))
    results <- list (start = start, end = end)
    return (results)
}


.peakPloidy <- function(object, method = 'mixture', exclude = character(0),
    ploidyToTest = 12, sd = 5, dThresh = 0.01, n = 2048, adjust = 0.9, 
    # force.smooth = TRUE, reg=FALSE, ds=1.5, zero.cont=FALSE, ...) {
    force.smooth = TRUE, reg=FALSE, ds=1.5, zero.cont=FALSE, ...) {
    
    densityBasedMethods <- c('density', 'median', 'mode', 'closest')
    whatM <- checkMethod(method, c('mixture', densityBasedMethods))
    # check if we have smoothed signal...
#     if (length(ratio.s(object)) == 0 & force.smooth){
#         message("smoothing ratio...")
#         object <- addSmooth(object)
#     }
    
    chrName <- chrs(object)
    ratio <- ratio2use(object)
    isOutlier <- findOutliers(ratio, sd, dThresh, n = n, adjust = adjust)
    ok4density <- (! chrName %in% exclude) & (! isOutlier) & (! is.na(ratio))
    if (whatM %in% densityBasedMethods){
        object <- .guessPeaksAndPloidy(object, ok4density = ok4density, n = n, 
            adjust = adjust, method = whatM)
        object@Params@method = whatM
    } else if (whatM == 'mixture') {
        options(show.error.messages = FALSE)
        sk <- try(suggest.k(ratio[ok4density], chrName[ok4density], np = seq(3, ploidyToTest, by = 1), 
            n = n, adjust = adjust, reg=reg, ds=ds, zero.cont=zero.cont, ...))
        if (class(sk) == "try-error"){
            message ("method 'mixture' failed with the following message:")
            message (sk[1])
            message ("falling back to method 'density'...")
            object <- .peakPloidy(object, method = 'density', exclude = exclude,
                ploidyToTest = ploidyToTest, sd = sd, dThresh = dThresh, n = n,
                adjust = adjust, reg = reg, ds = ds, zero.cont = zero.cont, ...)
            return(object)
        }
        options(show.error.messages = TRUE)
#         sk <- suggest.k(ratio[ok4density], chrName[ok4density], np = seq(3, ploidyToTest, by = 1), 
#             n = n, adjust = adjust, reg=reg, ds=ds, zero.cont=zero.cont, ...)   
        gn <- global.norm(ratio[ok4density], chrName[ok4density], k = sk[[1]][1], reg=reg,
	    ds=ds, zero.cont=zero.cont, ...)
        object@Params@method <- 'mixture'
        pl <- gn$pl[[1]]
        peaks <- gn$muest1[[1]]
        object@Res@suggested.ploidy <- pl
        object@Res@suggested.peaks <- peaks
        MQR <- getMQR(pl, peaks)
        interpDiploidXloc <- c(0,2) * MQR[1] + MQR[2]
        shift = interpDiploidXloc[1]
        scale = 1 / ( interpDiploidXloc[2] - interpDiploidXloc[1] )

        if (length(interpDiploidXloc) == 1) {
            content = "Unknown"
        } else if (length( interpDiploidXloc) == 2){
            content = as.character(
                signif(100*(1-interpDiploidXloc[1]/interpDiploidXloc[2]), 4))
        } else {
            stop("interpDiploidXloc has more than 2 elements!")
        }
        object@Res@suggested.tumContent <- content
        object@Res@suggested.R <- signif(MQR[3], 4)
        ratioMedian <- median(ratio[ok4density], na.rm = TRUE)
        object@Res@ratioMedian <- ratioMedian
        medianPeaksIndex <- which(abs(peaks-ratioMedian)  ==
            min(abs(peaks-ratioMedian), na.rm = TRUE))
        object@Res@closestPeak <- peaks[medianPeaksIndex]
        

    } else {
        stop('method must be either "mixture" or "density"\n')
    }
    
    object@Params@gp.excludeFromDensity <- exclude
    object@DerivData@ok4density         <- ok4density
    object@Params@density.n             <- n
    object@Params@density.adjust        <- adjust

    return(object) 


}

findOutliers <- function(ratio, sd, dThresh, ...){
    s <- median(ratio, na.rm = TRUE) + sd * sd(ratio, na.rm = TRUE)
    
    d <- density(ratio, na.rm = TRUE, ...)
    selected <- which(d$y >= dThresh*max(d$y))
    dT <- max(d$x[selected], ra.rm=T)

    isOutlier <- ratio > dT & ratio > s
    isOutlier[is.na(isOutlier)] <- FALSE
    return(isOutlier)
}


checkMethod <- function(method, avaiables){
    if (length(method) != 1 | !is.character(method)){
        stop ("method must be one element character vector\n")
    }
    where <- grep(method, avaiables, ignore.case = TRUE)
    if (length(where) == 0){
        stop (paste("method '", method, "' not recognized\n", sep = ''))
    } else if (length(where) > 1) {
        stop (paste("method '", method, "' matches multiple formal options\n", sep = ''))
    } else {
        return(avaiables[where])
    }
}




userOrDefault <- function(user, default, elNum) {
    if (elNum > length(user) | is.na(user[elNum])) {
        return (default[elNum])
    } else {
        return (user[elNum])
    }
}



smoothSignal <- function (signal, chrName, lambda = 7, ...) {
    ratio.s <- rep (NA, length(signal))
    uniqChrNames <- unique(chrName)
    for (n in 1:length(uniqChrNames)){
        isThisChr <- chrName %in% uniqChrNames[n]
        if (length(which(!is.na(signal[isThisChr]))) < 5) {
            ratio.s[isThisChr] <- rep(mean(signal[isThisChr], na.rm = TRUE), length(which(isThisChr)))     
        } else {
            ratio.s[isThisChr] <- smooth.it(signal[isThisChr], chrName[isThisChr], lambda = lambda, ...)
        }   
    } 
    return(ratio.s)
}

smooth.it <- function(a, ch, lambda = 7, ...){
    if(sum(is.na(a))> 0 | sum(is.infinite(a))> 0 | sum(is.nan(a))>0){
        a[is.infinite(a)] <- NA
        a[is.na(a)] <- NA
        a[a==0] <- 0.0001
        temp <- smoothseg(a,chrom=ch, lambda = lambda, impute.only=TRUE)
        y <- smoothseg(temp, chrom=ch, lambda = lambda, ...)
        return(y)
    } else {
        a[a==0] <- 0.0001
        y <- smoothseg(a, chrom=ch, lambda = lambda, ...)
        return(y)
    }
} # end of function

gcNormalize <- function (gcC, ratio, points2use = NULL, maxNumPoints = 10000) {
    # perform GC content normalization. Returns normalizing vector
    # gcC: vector with gc content (x)
    # ratio: vector with number of reads (y)
    # points2use: vector (TRUE/FALSE) with gcC and ratio to use. If NULL, all
    #   gcC and ratio will be used
    # maxNumPoints: maximal number of points to use to get the vector. Owing to
    #   computational time, and possible huge size of dataset, only maxNumPoints
    #   randomly selected are used to estimate the normalization curve
    # 
    # Returns a vector of the value of Loess Curve for all points in gcC. It
    #   keeps the same order as gcC. NA will be present where gcC is NA and
    #   where points2use is FALSE
    validX <- which(!is.na(gcC) & !is.na(ratio))
    # check if all points are good or only some of them
    if (is.null(points2use)) {
        validGc <- validX
    } else {
        validGc <- which(!is.na(gcC) & !is.na(ratio) & points2use)
    }   
    # if there are too many points, randomly select (maxNumPoints)
    if (length(validGc) > maxNumPoints ) { 
        validGc <- sample(validGc, maxNumPoints)
    }   
    # make a data fram with relevant names (important!)
    gcDist <- data.frame(gc = gcC[validGc], ratio = ratio[validGc])
    # fit loess with ratio as function of gc (names must correspond to those in
    # the data frame)
    gcDist.lo <- loess(ratio ~ gc, gcDist)
    # initialize vector with prediciton
    normVector <- rep(NA, length(ratio))
    # predict the expected number of reads as function of the observed GC
    # content
    normVector[validX] <- predict(gcDist.lo, data.frame(gc = gcC[validX]))

    medianNorm <- median(normVector, na.rm = TRUE)
    vectorRatio <- normVector/medianNorm
    ratio.gcC <- ratio/vectorRatio
    return(ratio.gcC)
} # end of function

specialDensity <- function (object, special, adjust, n) {
    if (length(special) != 0){
        specialPlot <- chrs(object) %in% special
        if (length(which(specialPlot)) < 2 | length(which(!is.na(ratio(object)[specialPlot]))) < 2) {
            message(paste("    ", special,  "does not have enough values to estimate density. Skipping."))
            X <- NA
            Y <- NA
            Dn <- list(x = X, y = Y)
            return (Dn)
        }
        Dn <- myDensity(ratio(object)[specialPlot], adjust = adjust)
        X <- Dn$x
        Y <- Dn$y/length(object)*length(which(specialPlot))
    } else {
        X <- NA
        Y <- NA
    }
    Dn <- list(x = X, y = Y)
    return (Dn)
}

selectGoodGuess <- function (Density, MQRDS, spacingTolerance, interceptRatio, 
    possiblePloidy, peakRatio, peakSpan){

    
    isApeak <- myPeaks(Density$y, span = peakSpan)
    peakPos <- which(isApeak)
    peakValues <- Density$y[peakPos]
    highPeaks <- isApeak & (Density$y >= max(peakValues/peakRatio))

    # get the highest of the first three peaks
    first3validPeaks <- peakValues[peakValues %in% Density$y[highPeaks]][seq(1,3)]
    highestPeakIndex <- which(first3validPeaks == max(first3validPeaks, na.rm = TRUE))

    # select max R, positive intercept, highest peak is on ploidy == 2
    # which possible ploidy has max R
    maxR <- MQRDS[,1] >= max(MQRDS[,1])*spacingTolerance
    # which possible ploidy would have a positive intercept?
    positiveIntercept <- MQRDS[,2] >= interceptRatio
    # which possible ploidy has the highest peak either on 2?
    highestPeakOn2 <- possiblePloidy[,highestPeakIndex]==2
    highest1orMore <- possiblePloidy[,highestPeakIndex]>=1
    highest2orMore <- possiblePloidy[,highestPeakIndex]>=2

    # first get only possible solutions (no negative counts for P = 0 =>
    # intercept must be >= 0)
    goodGuess <- positiveIntercept
    if (length(which(positiveIntercept)) > 1) {
    # next look for the best/very-good alignments of points
        goodGuess <- maxR & positiveIntercept
    }

    # next look for the soltion that have a peak at ploidy = 1 or more
    if (length(which(goodGuess)) > 1) {
        goodGuess <- goodGuess & highest2orMore
    }

    # here, in same rare cases, it happens that no solutions are
    # available. We need to re-run using a smaller threshold to allow some
    # "elasticity". Recursive function.
    if (length(which(goodGuess)) == 0) {
        return (logical(0))
#             warning("No possible solutions. deacreasing parameter 'spacingTolerance'")
#             spacingTolerance = spacingTolerance * .95
#             cnaList <- guessPeaksAndPloidy(cnaList, smooth = smooth, 
#                 excludeFromDensity = excludeFromDensity, 
#                 peakRatio = peakRatio, ploidyToTest = ploidyToTest, 
#                 peakSpan = peakSpan  , adjust = adjust,
#                 spacingTolerance = spacingTolerance , interceptRatio = interceptRatio) 
#             return (cnaList)
    }



    # if still more than one option, check wich one has at least a peak for
    # ploidy >=1 (avoid genome with 1 and 0 copy)
    if ( length(which(goodGuess)) > 1 ) {
        okPloidy <- possiblePloidy[,dim(possiblePloidy)[2]] >= 2
        goodGuess <- goodGuess & okPloidy
    }

    # if still more than one option get the one(s) that require smallest
    # range of ploidy (less gaps)
    if ((length(which(goodGuess))) > 1 ) {
        ploidyDiff <- possiblePloidy[goodGuess,dim(possiblePloidy)[2]] -  possiblePloidy[goodGuess,1]
        lessGap <- which(ploidyDiff==min(ploidyDiff))
        goodGuessTmp <- rep(FALSE, length(goodGuess))
        goodGuessTmp[which(goodGuess)[lessGap]] <- TRUE
        goodGuess <- goodGuessTmp
    }

    # if still there is more than than one options, chose the one with the
    # smallest ploidy
    if (length(which(goodGuess)) > 1 ) {
        maxPloidy <- apply(possiblePloidy, 1, max)
        minMaxPloidy <- which(goodGuess)[which(maxPloidy[goodGuess] == min(maxPloidy[goodGuess]))]
        goodGuess[] <- FALSE
        goodGuess[minMaxPloidy] <- TRUE
    }

    # if still more than a solution (?) warn and pick the first.
    if (length(which(goodGuess)) > 1 ) {
        warning("Two or more solution equally possible. Picking one first available.")
        stillGood <- which(goodGuess)
        goodGuess[] <- FALSE
        goodGuess[stillGood[1]] <- TRUE
    }

    return (goodGuess)
    
}

MQRDS <- function(MQR){
    MQRDS <- data.frame(matrix(NA, length(MQR[,2]), 3))        
    colnames(MQRDS) <- c("R", "shift", "scale")

    for (n in 1:length(MQR[,1])) {
        M <- MQR[n,1]
        Q <- MQR[n,2]
        R <- MQR[n,3]
        y0 <- Q
        y2 <- 2 * M + Q
        shift <- MQR[n,2]
        scale <- 1 / (y2 - y0)
        MQRDS[n,] <- c(R, shift, scale)
    }
    return(MQRDS)
}

MQR <- function(possibleMatrix, ploidyToTest, peakToUse){
    MQR <- data.frame(matrix(NA, length(possibleMatrix[,2]), 3))
    colnames(MQR) <- c("M", "Q", "R")
    for (n in 1:length(possibleMatrix[,1])) {
        valToUse <- which(possibleMatrix[n,]==1)
        MQR[n,] <- getMQR(seq(0,ploidyToTest)[valToUse], peakToUse)
    }
    return(MQR)
}

possibleBinNumbers <- function (numDigits, numOnes) {
# produce a binary matrix of all combination on how to sort numOnes of 1s with
# numDigits digits (0 or 1)
    numOfnums = 2^numDigits 
    m <- matrix(nrow=numOfnums, ncol=numDigits)
    for (n in 1:numDigits) {
        # cat (paste("Cicle", n, "of", numDigits, "\n"))
        m[,n] <- c(rep(1, numOfnums/(2^n)), rep(0, numOfnums/(2^n)))

    }
    sumByRow <- apply(m,1,sum)
    m <- m[sumByRow == numOnes,]
    return(m)
} # end of function

getMQR <- function (xs, ys) {
# fit line across x and y, return coefficient (m) and correlation coefficient
# (R)
    mod.summary <- summary(lm(ys ~ xs))
    M <- mod.summary$coefficients[2]
    Q <- mod.summary$coefficients[1]
    R2 <- mod.summary$r.squared
    return (c(M, Q, R2))
}

myPeaks <- function (series, span = 3) {
# found here 
# http://finzi.psych.upenn.edu/R/Rhelp02a/archive/33097.html
# and adapted
    z <- embed(series, span)
    s <- span%/%2
    v<- max.col(z, "first") == 1 + s
    result <- c(rep(FALSE,s),v,rep(FALSE,s))
    return(result)
}

myDensity <- function (signal, span = 3, adjust = 1) {
    # we don't want outliers. Get only values that are smaller than the max of:
    # span times the median (up to pentaploid?)
    # Median plus 3 times the SD (if it is really aneuploid with many "peaks")
    Median <- median(signal, na.rm = TRUE)
    SD <- sd(signal, na.rm = TRUE)
    bigY <- max(c(span*Median, Median + span * SD))
    OKsignal <- signif(signal[signal <= bigY & !is.na(signal)], 5)
    Density <- density(OKsignal, adjust = adjust, na.rm = TRUE)
    return(Density)
}

ratio2use <- function(object){
    # is there is smoothed ratio, use it, otherwise use ratio
    if (length(ratio.s(object)) == length(object)){
        ratio <- ratio.s(object)
    } else {
        ratio <- ratio(object)
    }
    return (ratio)
}


# findOutliers <- function(ratio, quantile, sd){
#     q <- quantile(ratio, quantile/100, na.rm = TRUE)
#     s <- median(ratio, na.rm = TRUE) + sd * sd(ratio, na.rm = TRUE)
#     isOutlier <- ratio > q & ratio > s
#     return(isOutlier)
# }

# .peaksAndPloidy <- function (object, method = "mixture", exclude = character(0),
#     peakRatio = 50, ploidyToTest = 14, spacingTolerance = .999,
#     interceptRatio = -0.1, quantile = 99.5, sd = 5, ...){
#     
#     }
# 

## function called by the Method (setMethod)

.guessPeaksAndPloidy <- function (object, ok4density = rep(TRUE, length(object)),
    peakRatio = 50, ploidyToTest = 14, spacingTolerance = .99,         
    interceptRatio = -0.1, adjust = 0.9, n = 2048, method = 'density') {
  
    # a variable that should not be changed
    peakSpan = 3

    # get data in handy variables
    ratio <- ratio2use(object)
    chrName <- chrs(object)
    
    interpDiploidXloc = NA
    Density <- density(ratio[ok4density], adjust = adjust, n = n)
    isApeak <- myPeaks(Density$y, span = peakSpan)
    peakPos <- which(isApeak)
    peakValues <- Density$y[peakPos]
    highPeaks <- isApeak & (Density$y >= max(peakValues/peakRatio))

    # peak closest to the median
    ratioMedian <- median(ratio, na.rm = TRUE)
    peakX <- Density$x[highPeaks]
    peakBol <- (Density$x %in% peakX) & highPeaks
    medianPeaksIndex <- which(abs(Density$x[peakBol]-ratioMedian)  ==  
        min(abs(Density$x[peakBol]-ratioMedian), na.rm = TRUE))
    peakClosestToMedian <- Density$x[peakBol][medianPeaksIndex] 


    if (method == 'mode'){
        maxPeak <- Density$x[Density$y == max(Density$y, na.rm = TRUE)]
        scaling <- 1/maxPeak
        interpDiploidXloc <- maxPeak
        MQR <- data.frame(M = maxPeak/2, Q = 0, R = 1)
        suggPeak <- maxPeak
        PP <- 2
        content = "Unknown"
    } else if (method == 'median'){
        thisMedian <- median (ratio[ok4density], na.rm = TRUE)
        scaling <- 1/thisMedian
        interpDiploidXloc <- thisMedian
        MQR <- data.frame(M = thisMedian/2, Q = 0, R = 1)
        suggPeak <- thisMedian
        PP <- 2
        content = "Unknown"
    } else if (method == 'closest'){
        scaling <- 1/peakClosestToMedian
        interpDiploidXloc <- peakClosestToMedian
        MQR <- data.frame(M = peakClosestToMedian/2, Q = 0, R = 1)
        suggPeak <- peakClosestToMedian
        PP <- 2
        content = "Unknown"
    } else if (method == 'density'){
        if (length(which(highPeaks)) == 1) { # only one peak, scale but don't shift (impossible to calculate )
            scaling <- 1/Density$x[highPeaks]
            interpDiploidXloc <- Density$x[highPeaks]
            MQR <- data.frame(M = Density$x[highPeaks]/2, Q = 0, R = 1)
            suggPeak <- peakClosestToMedian
            PP <- 2
            content = "Unknown"
        } else {
            possibleMatrix <- possibleBinNumbers(ploidyToTest+1, length(which(highPeaks)))
            # make table with ploidy
            possiblePloidy <- matrix(NA, length(possibleMatrix[,1]), length(which(highPeaks)))
            for (n in 1:length(possibleMatrix[,1]) ) {
                possiblePloidy[n,] <- which(possibleMatrix[n,]==1) - 1
            }
            peakToUse <- Density$x[highPeaks]
            MQR <- MQR(possibleMatrix, ploidyToTest, peakToUse)
            
            # MQRDS actually contains R shift and scale
            MQRDS <- MQRDS(MQR)
            goodGuess <- selectGoodGuess(Density, MQRDS, spacingTolerance, 
                interceptRatio, possiblePloidy, peakRatio, peakSpan)

            # here, in same rare cases, it happens that no solutions are
            # available. Rerun using a smaller threshold to allow some
            # "elasticity". Recursive function.
            if (length(goodGuess) == 0) {
                warning("No possible solutions. deacreasing parameter 'spacingTolerance'")
                spacingTolerance = spacingTolerance * .95
                object <- .guessPeaksAndPloidy(object, ok4density = ok4density, 
                    peakRatio = peakRatio, ploidyToTest = ploidyToTest,
                    spacingTolerance = spacingTolerance , 
                    interceptRatio = interceptRatio, adjust = adjust, n = n)
                    
                return(object)
            }
    
            solutions <- MQRDS[goodGuess,]
            # which one is the highest valid peak?
            scaling <- solutions$scale
            shifting <- solutions$shift
            
            thisMQR <- MQR[goodGuess,]
            interpDiploidXloc <- c(0,2) * thisMQR$M + thisMQR$Q # y = mx + q
            suggPeak <- Density$x[highPeaks]
        }

        if (exists("possiblePloidy")) {
            PP <- possiblePloidy[goodGuess,]
        } else {
            PP <- 2
        }
        if (length(interpDiploidXloc) == 1) {
            content = "Unknown"
        } else if (length( interpDiploidXloc) == 2){
            content = as.character(
                signif(100*(1-interpDiploidXloc[1]/interpDiploidXloc[2]), 3))
        } else {
            stop("interpDiploidXloc has more than 2 elements!")
        }
    } else {
        stop ("Wrong 'method'. Acceptables values: 'median' or 'mode'")
    }
    if (PP[Density$x[highPeaks] == peakClosestToMedian] == 0 
            & PP[length(PP)] == ploidyToTest){
        PP <- PP + 2
    } else if (PP[Density$x[highPeaks] == peakClosestToMedian] == 1
            & PP[length(PP)] == ploidyToTest) {
        PP <- PP + 1
    }

    object@Res@suggested.ploidy        <- PP
    object@Res@suggested.peaks         <- suggPeak
    object@Res@suggested.tumContent    <- content
    object@Res@interpDiploidXloc       <- interpDiploidXloc
    object@Res@notExcluded.density     <- Density
    object@Res@notExcluded.isAPeak     <- highPeaks
    object@Res@ratioMedian             <- ratioMedian
    object@Res@closestPeak             <- peakClosestToMedian
    if (exists ("thisMQR")) {
        object@Res@MQR                 <- thisMQR
        object@Res@suggested.R         <- signif(thisMQR$R, 4)
    } else {
        object@Res@MQR <- MQR
    }
    
    # object@Params@gp.excludeFromDensity <- exclude
    return(object)
}

.validateParams <- function(funName, validParamNames, newPairs){
    newPNames <- names(newPairs)
    okNames <- newPNames %in% validParamNames
    if (all(okNames)){
        # all parameters have valid names
        return (newPairs)
    } else {
        nonValid <- paste(newPNames[!okNames], collapse = ', ')
        warning(paste("Non valid parameters to pass to function", funName, "are ignored:", nonValid))
        return (newPairs[okNames])
    }
}

.getWeightByChr <- function(method, x, chrs, stdvs){
    uCh <- unique (chrs)
    whichChr <- lapply(uCh, function(oneChr, allChr, x){x[allChr == oneChr]}, chrs, x)
    Wlist <- lapply(whichChr, .getWeight, method, stdvs)
    return(unlist(Wlist))
}

.getWeight <- function(x, method, stdvs){
    avMeths <- c('poisson', 'gaussian', 'twoTailQuantile', 'oneTailQuantile')
    isMatch <- pmatch(tolower(method), avMeths)
    if (is.na(isMatch) | length(isMatch) > 1){
        stop(paste("Method passed '", method, "' does not match valid one (", 
            paste(avMeths, collapse = ', '),")", sep = ''))
    }
    met2use <- avMeths[isMatch]
    switch(met2use, 
        poisson =  .poissonWeights(x, stdvs),
        gaussian = .gaussianWeights(x, stdvs),
        twoTailQuantile = .TTquantileWeights(x, stdvs),
        oneTailQuantile = .OTquantileWeights(x, stdvs)
    )
}

.poissonWeights <- function(x, stdvs){
    # this is symmetric around the mean
    # we need to remove x = 0 or all the centromeres would be there...
    Lambda <- mean(x[x > 0], na.rm = TRUE)
    # std = sqrt(Lambda). We use stdvs standar deviations
    std = stdvs * sqrt(Lambda)
    Distance <- abs(x - Lambda)
    return(.calcWeight(Distance, std))
}

.gaussianWeights <- function(x, stdvs){
    if (length(x) < 10){
        return(rep(1, length(x)))
    }
    # this is symmetric, around the MEDIAN
    # we need to remove x = 0 or all the centromeres would be there...
    Median <- median(x[x > 0], na.rm = TRUE)
    # std = sqrt(Lambda). We use stdvs standar deviations
    std = stdvs * sd(x[x > 0], na.rm = TRUE)
    Distance <- abs(x - Median)
    return(.calcWeight(Distance, std))
}

.OTquantileWeights <- function(x, stdvs){
    # this is a-symmetric, around the MEDIAN
    # we need to remove x = 0 or all the centromeres would be there...
    Median <- median(x[x > 0], na.rm = TRUE)
    Q <- quantile (x[x > 0], c(0.05, 0.5, 0.95))
    D1 <- Q[2] - Q[1]
    Median <- Q[2]
    D2 <- Q[3] - Q[2]
    W <- rep (NA, length(x))
    Distance <- abs(x - Median)
    W[x <= Median] <- .calcWeight(Distance[x <= Median], D1)
    W[x > Median] <- 1
    return(W)
}

.TTquantileWeights <- function(x, stdvs){
    # this is a-symmetric, around the MEDIAN
    # we need to remove x = 0 or all the centromeres would be there...
    Median <- median(x[x > 0], na.rm = TRUE)
    Q <- quantile (x[x > 0], c(0.05, 0.5, 0.95))
    D1 <- Q[2] - Q[1]
    Median <- Q[2]
    D2 <- Q[3] - Q[2]
    W <- rep (NA, length(x))
    Distance <- abs(x - Median)
    W[x <= Median] <- .calcWeight(Distance[x <= Median], D1)
    W[x > Median] <- .calcWeight(Distance[x > Median], D2)
    return(W)
}

.calcWeight <- function(x, d){
    # x is the variable, d is the range (usually fixed)
    W <- rep(NA, length(x))
    # where x is larger than d
    oR <- x > d
    W[!oR] <- 1
    W[oR] <- 1 - ((x[oR] - d) / x[oR])
    return (W)
}

.addDNACopy <- function (object, independent.arms = FALSE, ideograms = NULL, 
    DNAcopy.smooth = list(), DNAcopy.weight = character(), DNAcopy.segment = list()) {
    
    # not exposed parameter
    SDV <- 2
    
    # object is CNAnorm  
    # some input checking
    if (independent.arms){
        if (! is.data.frame(ideograms) ){
            stop("Ideograms must be a data frame when independent.arms = TRUE")
        }
        if (!all(c('chrom', 'chromStart', 'chromEnd', 'name') %in% colnames(ideograms))){
            stop("Ideograms seems invalid. Wrong colnames")
        }
    }
    
    toSegment <- object@DerivData@ratio

    toSegment[toSegment <= 0] <- .05
    # using DNA copy, segment the data
    if (independent.arms & is.data.frame(ideograms)){
        chrs2use <- arms(object, ideograms)
    } else {
        chrs2use <- chrs(object)
    }

    CNA.object <- CNA(log2(toSegment), chrs2use, pos(object), 
        data.type = 'logratio', presorted = TRUE)

    # check the user defined parameters to pass to smoott.CNA are valid    
    validNamesSmooth <- c("smooth.region", "outlier.SD.scale", "smooth.SD.scale", "trim")
    DNAcopy.smooth <- .validateParams('smooth.CNA', validNamesSmooth, DNAcopy.smooth)
    # call smooth.DNA using do.call
    smoothed.CNA.object <- do.call(smooth.CNA, c(list(CNA.object), DNAcopy.smooth))

    # check the user defined parameters to pass to smoott.CNA are valid    
    validNamesSegment <- c("alpha", "nperm", "p.method", "min.width", "kmax", "nmin", 
        "eta", "sbdry", "trim", "undo.splits", "undo.prune", "undo.SD")
    DNAcopy.segment <- .validateParams('segment', validNamesSegment, DNAcopy.segment)
    # call segment using do.call
    if (length(DNAcopy.weight) == 0){
        segObj <- do.call(segment, c(list(smoothed.CNA.object,
            verbose = 0), DNAcopy.segment))
    } else {
        WW <- .getWeightByChr(DNAcopy.weight, object@InData@Norm, chrs(object) ,SDV)
        segObj <- do.call(segment, c(list(smoothed.CNA.object,
            verbose = 0, weights = WW), DNAcopy.segment))
    }


    segID <- rep(NA, length(object))
    segMean <- rep(NA, length(object))

    for (segNum in 1:length(segObj$output$chrom)) {
        thisMean <- segObj$output$seg.mean[segNum]
        if (!is.na(thisMean)){
            thisChr <- as.character(segObj$output$chrom[segNum])
            thisStart <- segObj$output$loc.start[segNum]
            thisEnd <- segObj$output$loc.end[segNum]
            # pointInThisSeg <- cnaList$data$Chr == thisChr & cnaList$data$Pos >= thisStart & cnaList$data$Pos < thisEnd
            # pointInThisSeg <- (chrs(object) == thisChr & pos(object) >= thisStart &
            pointInThisSeg <- (chrs2use == thisChr & pos(object) >= thisStart &
                pos(object) <= thisEnd)
            segID[pointInThisSeg] <- segNum
            segMean[pointInThisSeg] <- thisMean
        }
    }
    object@DerivData@segID <- segID
    object@DerivData@segMean <- segMean
    return(object)
}

# version that requires DNA copy
# .addSmooth <- function (object, ...) {
#     # obj is CNAnorm  
#     if (length(segID(object)) != length(object)){
#         object <- .addDNACopy(object)
#     }
#     chrNames <- segID(object)
#     notNAIndex <- which(!is.na(chrNames))
#     tmpChr <- chrNames[notNAIndex]
#     tmpRatio <- ratio(object)[notNAIndex]
#     tmpRatio.s <- smoothSignal(tmpRatio, tmpChr, ...)
#     ratio.s <- rep(NA, length(object))
#     ratio.s[notNAIndex] <- tmpRatio.s 
#     object@DerivData@ratio.s <- ratio.s
#     return (object)
# }


# smooth chromosomes, intemediate vlaues across "jumps"
.addSmooth <- function (object, lambda = 7, ...) {
    # obj is CNAnorm  
    ratio <- ratio(object)
    chrNames <- as.numeric(as.factor(chrs(object)))
    
    # notNAIndex <- which(!is.na(chrNames))
    notNAIndex <- which(!is.na(ratio))
    tmpChr <- chrNames[notNAIndex]
    tmpRatio <- ratio[notNAIndex]
    tmpRatio.s <- smoothSignal(tmpRatio, tmpChr, lambda = lambda, ...)
    ratio.s <- rep(NA, length(object))
    ratio.s[notNAIndex] <- tmpRatio.s 
    object@DerivData@ratio.s <- ratio.s
    return (object)
}




.gcNorm <- function(object, exclude = character(0), maxNumPoints = 10000){
    if (length(object@InData@GC) == 0){
        stop("Impossible to perform GC content normalization: no GC content available\n")
    } 
    if (length(exclude) == 0){
        points2use = NULL
    } else {
        points2use <- ! object@InData@Chr %in% exclude
        if (length(which(points2use)) < 2) {
            warning ("Not enough points to perform GC normalization.\n")
            return (object)
        }
    }
    object@DerivData@ratio <- gcNormalize(gcC(object), object@InData@ratio,
        points2use = points2use, maxNumPoints = maxNumPoints)
    # now save parameters used
    object@Params@gc.excludeFromGCNorm = exclude
    object@Params@gc.maxNumPoints = maxNumPoints

    return (object)    
}

makeTextLabels <- function (xLoc, Ploidy) {
    if (length(xLoc) != length(Ploidy)) {
        stop ("xLoc and Ploidy must have the same length")
    }
    textLab <- rep(NA, length(xLoc))
    for (i in 1:length(textLab)) {
#         textLab[i] <- paste("x = ", as.character(signif(xLoc[i],2)), 
#         ", Pl = ", as.character(Ploidy[i]), sep = '')
        textLab[i] <- paste(as.character(signif(xLoc[i],2)), " -> ", as.character(Ploidy[i]), sep = '')
    }
    return(textLab)
}

plotPeaksMixture <- function (object, special1 = character(0), special2 = character(0),
    show ='suggested', bins = 200, ...) {
    
    adjust = get.adjust(object)
    n = get.n(object)
    
    ratioMedian <- median(ratio(object), na.rm = TRUE)
#     D1 <- specialDensity(object, special1, adjust, n)
#     D2 <- specialDensity(object, special2, adjust, n)
    
    # xRange <- c(0, max(1.2*ratio.s(object), na.rm = TRUE))
    R.s <- ratio.s(object)
    hist(R.s, n = bins, xlab = 'Ratio Test/Control' )

    # initialize legend variables
    if (length(object@Params@gp.excludeFromDensity) > 0) {
        notExclLab <- paste("Everything but", 
            paste(object@Params@gp.excludeFromDensity, collapse = ', '))
    } else {
        notExclLab <- "Everything"
    }
    textLegend <- as.character(c(notExclLab))
    legCol = c('black')
    legLty = c(1)
    legLwd = c(3)
 
    if (show == 'suggested') {
        peakX <- object@Res@suggested.peaks
        ploidy <- object@Res@suggested.ploidy/2
        tumContent <- object@Res@suggested.tumContent
    } else if (show == 'validated') {
        peakX <- object@Res@validated.peaks
        ploidy <- object@Res@validated.ploidy/2
        tumContent <- object@Res@validated.tumContent
    } else {
        stop ("`show` must be either `suggested` or `validated`")
    }

    abline(v = ratioMedian, col = 'red', lty = 2, lwd = 2)
    abline(v = peakX, col = 'cyan', lwd = 2)


}

.plotPeaks <- function (object, special1 = character(0), special2 = character(0),
    show ='suggested', ...) {
    # as input provide the output of guessPeaksAndPloidy
    # Normalize by number of reads in ok4density
    adjust = get.adjust(object)
    n = get.n(object)
    bins = 200
    DD <- density(ratio2use(object)[object@DerivData@ok4density], adjust = adjust, n = n)
    X <- DD$x
    Y <- DD$y/length(object)*length(which(object@DerivData@ok4density))
    ratioMedian <- median(ratio(object), na.rm = TRUE)
    D1 <- specialDensity(object, special1, adjust, n)
    D2 <- specialDensity(object, special2, adjust, n)
   
    xRange <- c(0, max(X*1.2, na.rm = TRUE))
    yRange <- c(0, max(Y*1.2, na.rm = TRUE))

#     xRange <- c(0, max(c(X, D1$x, D2$x)*1.2, na.rm = TRUE))
#     yRange <- c(0, max(c(Y, D1$y, D2$y)*1.2, na.rm = TRUE))
    vShift = yRange[2]/20 
    # create an empty plot space
    plot(xRange, yRange, type = 'n', xlab = 'Ratio Test/Control', 
        ylab = 'Density', ...)

    # plot density of "non special" genome
    lines (X, Y, type = 'l', lw = 3)

    # initialize legend variables
    if (length(object@Params@gp.excludeFromDensity) > 0) {
        notExclLab <- paste("Everything but", 
            paste(object@Params@gp.excludeFromDensity, collapse = ', '))
    } else {
        notExclLab <- "Everything"
    }
    textLegend <- as.character(c(notExclLab))
    legCol = c('black')
    legLty = c(1)
    legLwd = c(3)
    

   
    # plot special 1
    if (!all(is.na(D1$x))) {
        lines (D1$x, D1$y, type = 'l', lty = 1, lw = 3, col = 'orchid4')
        specialLab1 <- paste(special1, collapse = ', ')
        textLegend <- c(textLegend, specialLab1)
        legCol = c(legCol, 'orchid4')
        legLty = c(legLty, 1)
        legLwd = c(legLwd, 3)
    }
    # plot special 2
    if (!all(is.na(D2$x))) {
        lines (D2$x, D2$y, type = 'l', lw = 3, lty = 1, col = 'lightblue')
        specialLab2 <- paste(special2, collapse = ', ')
        textLegend <- c(textLegend, specialLab2)
        legCol = c(legCol, 'lightblue')
        legLty = c(legLty, 1)
        legLwd = c(legLwd, 3)
    }
    
    # plot annotation of "non special" genome
    if (show == 'suggested') {
        peakX <- object@Res@suggested.peaks
        ploidy <- object@Res@suggested.ploidy/2
        tumContent <- object@Res@suggested.tumContent
    } else if (show == 'validated') {
        peakX <- object@Res@validated.peaks
        ploidy <- object@Res@validated.ploidy/2
        tumContent <- object@Res@validated.tumContent
    } else {
        stop ("`show` must be either `suggested` or `validated`")
    }


    abline(v = ratioMedian, col = 'red', lty = 2)
    
    mth <- object@Params@method
    ##### THIS IS WHERE THE FUNCTION DOES NOT WORK ANYMORE FOR MIXTURE MODEL...
    ### check into peakBol
    # browser()
    if (mth == 'density'){
        isAPeak <- mapPeaks(X, peakX)
        peakBol <- isAPeak & object@Res@notExcluded.isAPeak
        ww <- which(peakBol)    
    } else {
        isAPeak <- mapPeaks(X, peakX)
        ww <- which(isAPeak)
        for (i in 1:length(ww)){
            lines ( c(X[ww[i]], X[ww[i]]), c(0, Y[ww[i]]))
        }
    }
    
#     medianPeaksIndex <- which(abs(X[peakBol]-ratioMedian)  ==  min(abs(X[peakBol]-ratioMedian), na.rm = TRUE))
#     # actual plotting
#     lines (X[peakBol], Y[peakBol], type = 'p', pch=19, col='blue')
#     lines (X[peakBol][medianPeaksIndex], Y[peakBol][medianPeaksIndex], type = 'p', cex = 1.8, col='red')
#     textLab <- makeTextLabels(X[peakBol], ploidy)
#     text(X[peakBol], Y[peakBol] + vShift, textLab, cex = .8, col = 'blue')
#     
    medianPeaksIndex <- which(abs(X[ww]-ratioMedian)  ==  min(abs(X[ww]-ratioMedian), na.rm = TRUE))
    # actual plotting
    lines (X[ww], Y[ww], type = 'p', pch=19, col='blue')
    lines (X[ww][medianPeaksIndex], Y[ww][medianPeaksIndex], type = 'p', cex = 1.8, col='red')
    textLab <- makeTextLabels(X[ww], ploidy)
    text(X[ww], Y[ww] + vShift, textLab, cex = .8, col = 'blue')
   
    # add tumour content label
    legCol = c(legCol, 'black')
    legLty = c(legLty, 0)
    legLwd = c(legLwd, 0)
    textLegend <- c(textLegend,  paste("Est Tumour Content: ", tumContent, "%", sep = '') )
    

    legend("topright", inset=0.01, textLegend, lty = legLty, 
        lwd = legLwd, col = legCol, cex = .8)
}

mapPeaks <- function(map, peaks) {
    # map is a vector (X) where the peak will be mapped.
    # returns A logic vector of values of X closest to peaks
    isAPeak <- rep(FALSE, length(map))
    for (i in 1: length(peaks)){
        thisPeak <- peaks[i]
        ww <- which(abs(map - thisPeak) == (min(abs(map - thisPeak), na.rm = TRUE)))
        if (length(ww) > 1){
            ww <- ww[1]
        }
        isAPeak[ww] <- TRUE
    }
    return (isAPeak)
}



.plotPeaks_old <- function (object, special1 = character(0), special2 = character(0),
    show ='suggested', adjust = .9, n = 2048, ...) {
    # as input provide the output of guessPeaksAndPloidy
    # Normalize by number of reads in ok4density
    DD <- object@Res@notExcluded.density
    X <- DD$x
    Y <- DD$y/length(object)*length(which(object@DerivData@ok4density))
    ratioMedian <- median(ratio(object), na.rm = TRUE)
    D1 <- specialDensity(object, special1, adjust, n)
    D2 <- specialDensity(object, special2, adjust, n)
   
    xRange <- c(0, max(X*1.2, na.rm = TRUE))
    yRange <- c(0, max(Y*1.2, na.rm = TRUE))

#     xRange <- c(0, max(c(X, D1$x, D2$x)*1.2, na.rm = TRUE))
#     yRange <- c(0, max(c(Y, D1$y, D2$y)*1.2, na.rm = TRUE))
    vShift = yRange[2]/20 
    # create an empty plot space
    plot(xRange, yRange, type = 'n', xlab = 'Ratio Test/Control', 
        ylab = 'Density', ...)

    # plot density of "non special" genome
    lines (X, Y, type = 'l', lw = 3)

    # initialize legend variables
    if (length(object@Params@gp.excludeFromDensity) > 0) {
        notExclLab <- paste("Everything but", 
            paste(object@Params@gp.excludeFromDensity, collapse = ', '))
    } else {
        notExclLab <- "Everything"
    }
    textLegend <- as.character(c(notExclLab))
    legCol = c('black')
    legLty = c(1)
    legLwd = c(3)
    

   
    # plot special 1
    if (!all(is.na(D1$x))) {
        lines (D1$x, D1$y, type = 'l', lty = 1, lw = 3, col = 'orchid4')
        specialLab1 <- paste(special1, collapse = ', ')
        textLegend <- c(textLegend, specialLab1)
        legCol = c(legCol, 'orchid4')
        legLty = c(legLty, 1)
        legLwd = c(legLwd, 3)
    }
    # plot special 2
    if (!all(is.na(D2$x))) {
        lines (D2$x, D2$y, type = 'l', lw = 3, lty = 1, col = 'lightblue')
        specialLab2 <- paste(special2, collapse = ', ')
        textLegend <- c(textLegend, specialLab2)
        legCol = c(legCol, 'lightblue')
        legLty = c(legLty, 1)
        legLwd = c(legLwd, 3)
    }
    
    # plot annotation of "non special" genome
    if (show == 'suggested') {
        peakX <- object@Res@suggested.peaks
        ploidy <- object@Res@suggested.ploidy/2
        tumContent <- object@Res@suggested.tumContent
    } else if (show == 'validated') {
        peakX <- object@Res@validated.peaks
        ploidy <- object@Res@validated.ploidy/2
        tumContent <- object@Res@validated.tumContent
    } else {
        stop ("`show` must be either `suggested` or `validated`")
    }
    peakBol <- (X %in% peakX) & object@Res@notExcluded.isAPeak
    medianPeaksIndex <- which(abs(X[peakBol]-ratioMedian)  ==  min(abs(X[peakBol]-ratioMedian), na.rm = TRUE))
    # actual plotting
    abline(v = ratioMedian, col = 'red', lty = 2)
    lines (X[peakBol], Y[peakBol], type = 'p', pch=19, col='blue')
    lines (X[peakBol][medianPeaksIndex], Y[peakBol][medianPeaksIndex], type = 'p', cex = 1.8, col='red')
    textLab <- makeTextLabels(X[peakBol], ploidy)
    text(X[peakBol], Y[peakBol] + vShift, textLab, cex = .8, col = 'blue')
    
    # add tumour content label
    legCol = c(legCol, 'black')
    legLty = c(legLty, 0)
    legLwd = c(legLwd, 0)
    textLegend <- c(textLegend,  paste("Est Tumour Content: ", tumContent, "%", sep = '') )
    

    legend("topright", inset=0.01, textLegend, lty = legLty, 
        lwd = legLwd, col = legCol, cex = .8)
}
.subSet <- function (x, i, j, drop) {
    if (!missing(j)) {
        stop("Error: incorrect number of dimensions\n")
    } else {
        Chr <- chrs(x)[i]
        Pos <- pos(x)[i]
        Test <- if(length(x@InData@Test) > 0){x@InData@Test[i]}else{numeric()}
        Norm <- if(length(x@InData@Norm) > 0){x@InData@Norm[i]}else{numeric()}
        ratio <- if(length(x@InData@ratio) > 0){x@InData@ratio[i]}else{numeric()}
        GC <- if(length(x@InData@GC) > 0){x@InData@GC[i]}else{numeric()}
        newInData <- new("InData", Chr = Chr, Pos = Pos, Test = Test, Norm = Norm, 
            ratio = ratio, GC = GC)
        
        ratio <- if(length(ratio(x)) > 0){ratio(x)[i]}else{numeric()}
        ratio.n <- if(length(ratio.n(x)) > 0){ratio.n(x)[i]}else{numeric()}
        ratio.s <- if(length(ratio.s(x)) > 0){ratio.s(x)[i]}else{numeric()}
        ratio.s.n <- if(length(ratio.s.n(x)) > 0){ratio.s.n(x)[i]}else{numeric()}
        segID <- if(length(x@DerivData@segID[i]) > 0){x@DerivData@segID[i]}else{numeric()}
        segMean <- if(length(segMean(x)) > 0){segMean(x)[i]}else{numeric()}
        segMean.n <- if(length(segMean.n(x)) > 0){segMean.n(x)[i]}else{numeric()}
        ok4density <- if(length(x@DerivData@ok4density) > 0){x@DerivData@ok4density[i]}else{logical()}
        
        newDerivData <- new("DerivData", ratio = ratio, ratio.n = ratio.n, ratio.s = ratio.s,
            ratio.s.n = ratio.s.n, segID = segID, segMean = segMean, segMean.n = segMean.n,
            ok4density = ok4density)
        newRes <- x@Res
        newParams <- x@Params

        
        newObject <- new("CNAnorm", InData = newInData, DerivData = newDerivData, 
            Res = newRes, Params = newParams)
    }
    return(newObject)
}

.exportTable <- function (object, file = "CNAnorm_table.tab", show = 'ratio', sep = "\t", row.names = FALSE, ...){
    roundPloidy <- object@Res@validated.closestPeak
    regExp <- paste("^", show, sep = '')
    showOptions <- c('ratio', 'ploidy', 'center')
    ww <- grep(regExp, showOptions, ignore.case = TRUE)
    if (length(ww) > 1){
        warning("ambigous value for 'show', using 'ratio'...\n")
        ww <- 1
    } else if (length(ww) == 0){
        warning("'show' not matching available options, using 'ratio'...\n")
        ww <- 1
    }
    if (ww == 1){
        mulP <- 1
    } else if (ww == 2){
        mulP <- 2
    } else {
        mulP <- 1/roundPloidy
    }


    numEl <- length(chrs(object))
    if (length(segMean(object)) == numEl){
        SegMean <- 2^segMean(object)
        SegMean.n <- segMean.n(object) * mulP
    } else {
        SegMean <- rep(NA, length(ratio.s.n(object)))
        SegMean.n <- rep(NA, length(ratio.s.n(object)))
    }
    if (length(ratio.s.n(object)) == numEl){
        Ratio.s.n = ratio.s.n(object) * mulP
    } else {
        Ratio.s.n = rep(NA, numEl)
    }

    df <- data.frame(
        Chr = chrs(object),
        Pos = pos(object),
        Ratio = ratio(object),
        Ratio.n = ratio.n(object) * mulP,
        Ratio.s.n = Ratio.s.n,
        SegMean = SegMean,
        SegMean.n = SegMean.n
    )
#     return (df)
    write.table(df, file = file, sep = sep, row.names = row.names, ...)
}

setMethod(f = "gcNorm", signature = "CNAnorm", definition = .gcNorm)
setMethod(f = "pos", signature = "CNAnorm", definition = .pos)
setMethod(f = "arms", signature = "CNAnorm", definition = .arms)
setMethod(f = "addSmooth", signature = "CNAnorm", definition = .addSmooth)
setMethod(f = "addDNACopy", signature = "CNAnorm", definition = .addDNACopy)
setMethod(f = "peakPloidy", signature = "CNAnorm", definition = .peakPloidy)
setMethod(f = "validation", signature = "CNAnorm", definition = .validation)
setMethod(f = "discreteNorm", signature = "CNAnorm", definition = .discreteNorm)
setMethod(f = "plotGenome", signature = "CNAnorm", definition = .plotGenome)
setMethod(f = "plotPeaks", signature = "CNAnorm", definition = .plotPeaks)
setMethod(f = "[", signature = "CNAnorm", definition = .subSet)
setMethod(f = "exportTable", signature = "CNAnorm", definition = .exportTable)
Any scripts or data that you put into this service are public.
CNAnorm documentation built on Nov. 8, 2020, 5:29 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
CNAnorm
A normalization method for Copy Number Aberration in cancer samples

R/CNAnorm-methods.R
In CNAnorm: A normalization method for Copy Number Aberration in cancer samples

Defines functions mapPeaks .gcNorm .calcWeight .TTquantileWeights .OTquantileWeights .gaussianWeights .poissonWeights .getWeight .getWeightByChr .validateParams ratio2use MQR MQRDS smooth.it userOrDefault checkMethod findOutliers .peakPloidy makeNumericLabels medianWinSize .pos .discreteNorm

Try the CNAnorm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CNAnorm A normalization method for Copy Number Aberration in cancer samples

R/CNAnorm-methods.R In CNAnorm: A normalization method for Copy Number Aberration in cancer samples

Defines functions mapPeaks .gcNorm .calcWeight .TTquantileWeights .OTquantileWeights .gaussianWeights .poissonWeights .getWeight .getWeightByChr .validateParams ratio2use MQR MQRDS smooth.it userOrDefault checkMethod findOutliers .peakPloidy makeNumericLabels medianWinSize .pos .discreteNorm

Try the CNAnorm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CNAnorm
A normalization method for Copy Number Aberration in cancer samples

R/CNAnorm-methods.R
In CNAnorm: A normalization method for Copy Number Aberration in cancer samples