R/dye_bias.R
In sesame: Tools For Analyzing Illumina Infinium DNA Methylation Arrays

Documented in dyeBiasCorr dyeBiasCorrMostBalanced dyeBiasCorrTypeINorm getNormCtls

#' get normalization control signal
#' 
#' get normalization control signal from SigSet. 
#' The function optionally takes mean for each channel.
#' 
#' @param   sset a SigSet
#' @param   average whether to average
#' @return a data frame of normalization control signals
#' @examples 
#' sset <- readIDATpair(file.path(system.file(
#'     'extdata','',package='sesameData'), '4207113116_B'))
#' 
#' df.ctl <- getNormCtls(sset)
#' 
#' @export
getNormCtls <- function(sset, average = FALSE) {
    df <- ctl(sset)
    df <- df[grep('norm(_|\\.)', tolower(rownames(df))),]
    if (sset@platform == 'HM27') {
        df$channel <- ifelse(grepl(
            'norm\\.green', tolower(rownames(df))), 'G', 'R')
    } else {
        df$channel <- ifelse(grepl(
            'norm_(c|g)', tolower(rownames(df))), 'G', 'R')
    }
    
    if (average) {
        c(
            G=mean(df[df$channel=='G','G'], na.rm=TRUE), 
            R=mean(df[df$channel=='R','R'], na.rm=TRUE))
    } else {
        df
    }
}

#' Correct dye bias in by linear scaling.
#'
#' The function takes a \code{SigSet} as input and scale both the Grn and Red
#' signal to a reference (ref) level. If the reference level is not given, it
#' is set to the mean intensity of all the in-band signals. The function
#' returns a \code{SigSet} with dye bias corrected.
#' 
#' @param sset a \code{SigSet}
#' @param ref reference signal level
#' @return a normalized \code{SigSet}
#' @examples
#' sset <- sesameDataGet('EPIC.1.LNCaP')$sset
#' sset.db <- dyeBiasCorr(sset)
#' @export
dyeBiasCorr <- function(sset, ref=NULL) {

    stopifnot(is(sset, "SigSet"))
    if (is.null(ref)) {
        ref <- meanIntensity(sset)
    }
    
    normctl <- getNormCtls(sset, average=TRUE)
    fR <- ref/normctl['R']
    fG <- ref/normctl['G']

    IG(sset) <- matrix(
        c(fG*IG(sset)[,'M'], fG*IG(sset)[,'U']),
        nrow=nrow(IG(sset)), ncol=ncol(IG(sset)), dimnames=dimnames(IG(sset)))
    
    IR(sset) <- matrix(
        c(fR*IR(sset)[,'M'], fR*IR(sset)[,'U']),
        nrow=nrow(IR(sset)), ncol=ncol(IR(sset)), dimnames=dimnames(IR(sset)))
    
    II(sset) <- matrix(
        c(fG*II(sset)[,'M'], fR*II(sset)[,'U']),
        nrow=nrow(II(sset)), ncol=ncol(II(sset)), dimnames=dimnames(II(sset)))
    
    ctl(sset)$G <- fG*ctl(sset)$G
    ctl(sset)$R <- fR*ctl(sset)$R
    oobG(sset) <- fG*oobG(sset)
    oobR(sset) <- fR*oobR(sset)
    sset
}

#' Correct dye bias using most balanced sample as the reference
#'
#' The function chose the reference signal level from a list of \code{SigSet}.
#' The chosen sample has the smallest difference in Grn and Red signal
#' intensity as measured using the normalization control probes. In practice,
#' it doesn't matter which sample is chosen as long as the reference level
#' does not deviate much. The function returns a list of \code{SigSet}s with
#' dye bias corrected.
#' 
#' @param ssets a list of normalized \code{SigSet}s
#' @return a list of normalized \code{SigSet}s
#' @examples
#' ssets <- sesameDataGet('HM450.10.TCGA.BLCA.normal')
#' ssets.db <- dyeBiasCorrMostBalanced(ssets)
#' @export
dyeBiasCorrMostBalanced <- function(ssets) {

    normctls <- vapply(ssets, getNormCtls, numeric(2), average=TRUE)
    most.balanced <- which.min(abs(normctls['G',] / normctls['R',] - 1))
    ref <- mean(normctls[,most.balanced], na.rm=TRUE)
    lapply(ssets, function(sset) dyeBiasCorr(sset, ref))
}

#' Dye bias correction by matching green and red to mid point
#'
#' This function compares the Type-I Red probes and Type-I Grn probes and
#' generates and mapping to correct signal of the two channels to the middle.
#' The function takes one single \code{SigSet} and returns a \code{SigSet}
#' with dye bias corrected.
#' 
#' @param sset a \code{SigSet}
#' @return a \code{SigSet} after dye bias correction.
#' @importFrom preprocessCore normalize.quantiles.use.target
#' @importFrom stats approx
#' @examples
#' sset <- sesameDataGet('EPIC.1.LNCaP')$sset
#' sset.db <- dyeBiasCorrTypeINorm(sset)
#' @export
dyeBiasCorrTypeINorm <- function(sset) {

    stopifnot(is(sset, "SigSet"))
    maxIG <- max(IG(sset), na.rm = TRUE)
    minIG <- min(IG(sset), na.rm = TRUE)
    maxIR <- max(IR(sset), na.rm = TRUE)
    minIR <- min(IR(sset), na.rm = TRUE)

    if (maxIG <= 0 || maxIR <= 0) {
        return(sset)
    }

    IR1 <- sort(as.numeric(IR(sset)))
    IR2 <- sort(as.vector(normalize.quantiles.use.target(
        matrix(IR1), as.vector(IG(sset)))))
    
    IRmid <- (IR1 + IR2) / 2.0
    maxIRmid <- max(IRmid)
    minIRmid <- min(IRmid)

    fitfunRed <- function(data) {
        insupp    <- data <= maxIR & data >= minIR & (!is.na(data))
        oversupp  <- data > maxIR & (!is.na(data))
        undersupp <- data < minIR & (!is.na(data))
        data[insupp] <- approx(x=IR1, y=IRmid, xout=data[insupp], ties=mean)$y
        data[oversupp]  <- data[oversupp] - maxIR + maxIRmid
        data[undersupp] <- minIRmid/ minIR * data[undersupp]
        data
    }

    IG1 <- sort(as.numeric(IG(sset)))
    IG2 <- sort(as.vector(normalize.quantiles.use.target(
        matrix(IG1), as.vector(IR(sset)))))
    
    IGmid <- (IG1 + IG2) / 2.0
    maxIGmid <- max(IGmid)
    minIGmid <- min(IGmid)

    fitfunGrn <- function(data) {
        insupp    <- data <= maxIG & data >= minIG & (!is.na(data))
        oversupp  <- data > maxIG & (!is.na(data))
        undersupp <- data < minIG & (!is.na(data))
        data[insupp] <- approx(x=IG1, y=IGmid, xout=data[insupp], ties=mean)$y
        data[oversupp]  <- data[oversupp] - maxIG + maxIGmid
        data[undersupp] <- minIGmid/ minIG * data[undersupp]
        data
    }

    ## fit type II
    II(sset)[,'U'] <- fitfunRed(II(sset)[,'U'])
    II(sset)[,'M'] <- fitfunGrn(II(sset)[,'M'])

    ## IR
    IR <- fitfunRed(IR(sset))
    ## dim(IRmid) <- dim(IR(sset))
    ## dimnames(IRmid) <- dimnames(IR(sset))
    IR(sset) <- IR
    ## IG
    IG <- fitfunGrn(IG(sset))
    ## dim(IGmid) <- dim(IG(sset))
    ## dimnames(IGmid) <- dimnames(IG(sset))
    IG(sset) <- IG

    ## fit control
    if (nrow(ctl(sset)) > 0) {
        ctl(sset)[,'R'] <- fitfunRed(ctl(sset)[,'R'])
        ctl(sset)[,'G'] <- fitfunGrn(ctl(sset)[,'G'])
    }

    ## fit oob
    oobR(sset) <- fitfunRed(oobR(sset))
    oobG(sset) <- fitfunGrn(oobG(sset))

    sset
}

## M, U, green matched to red distribution
dyeBiasCorrTypeINormG2R <- function(sset) {

    stopifnot(is(sset, "SigSet"))
    maxIG <- max(IG(sset))
    minIG <- min(IG(sset))
    maxIR <- max(IR(sset))
    minIR <- min(IR(sset))

    IG1 <- sort(as.numeric(IG(sset)))
    IG2 <- sort(as.vector(preprocessCore::normalize.quantiles.use.target(
        matrix(IG1), as.vector(IR(sset)))))
    
    fitfun <- function(xx) approx(x=IG1, y=IG2, xout=xx, ties=mean)$y

    ## fit type II
    insupp <- II(sset)[,'M'] <= maxIG & II(sset)[,'M'] >= minIG
    oversupp <- II(sset)[,'M'] > maxIG
    undersupp <- II(sset)[,'M'] < minIG

    II(sset)[insupp,'M'] <- fitfun(II(sset)[insupp,'M'])

    II(sset)[oversupp,'M'] <- maxIR +
        (II(sset)[oversupp,'M'] - maxIG) * (maxIR-minIR) / (maxIG-minIG)

    II(sset)[undersupp,'M'] <- minIR / minIG * II(sset)[undersupp,'M']

    ## fit IG
    IG.fit <- fitfun(IG(sset))
    dim(IG.fit) <- dim(IG(sset))
    dimnames(IG.fit) <- dimnames(IG(sset))
    IG(sset) <- IG.fit

    ## fit control
    insupp <- ctl(sset)[,'G'] <= maxIG &
        ctl(sset)[,'G'] >= minIG & (!is.na(ctl(sset)[,'G']))
    oversupp <- ctl(sset)[,'G'] > maxIG & (!is.na(ctl(sset)[,'G']))
    undersupp <- ctl(sset)[,'G'] < minIG & (!is.na(ctl(sset)[,'G']))
    ctl(sset)[insupp,'G'] <- fitfun(ctl(sset)[insupp,'G'])
    ctl(sset)[oversupp,'G'] <- maxIR +
        (ctl(sset)[oversupp,'G'] - maxIG) * (maxIR-minIR) / (maxIG-minIG)
    ctl(sset)[undersupp,'G'] <- minIR / minIG * ctl(sset)[undersupp,'G']

    ## fit oob
    insupp <- oobG(sset) <= maxIG & oobG(sset) >= minIG & (!is.na(oobG(sset)))
    oversupp <- oobG(sset) > maxIG & (!is.na(oobG(sset)))
    undersupp <- oobG(sset) < minIG & (!is.na(oobG(sset)))
    oobG(sset)[insupp] <- fitfun(oobG(sset)[insupp])
    oobG(sset)[oversupp] <- maxIR +
        (oobG(sset)[oversupp] - maxIG) * (maxIR-minIR) / (maxIG-minIG)
    oobG(sset)[undersupp] <- minIR / minIG * oobG(sset)[undersupp]

    sset
}


## M,U red matched to green distribution
dyeBiasCorrTypeINormR2G <- function(sset) {

    stopifnot(is(sset, "SigSet"))
    maxIG <- max(IG(sset))
    minIG <- min(IG(sset))
    maxIR <- max(IR(sset))
    minIR <- min(IR(sset))

    IR1 <- sort(as.numeric(IR(sset)))
    IR2 <- sort(as.vector(preprocessCore::normalize.quantiles.use.target(
        matrix(IR1), as.vector(IG(sset)))))
    fitfun <- function(xx) approx(x=IR1, y=IR2, xout=xx, ties=mean)$y

    ## fit type II
    insupp <- II(sset)[,'U'] <= maxIR & II(sset)[,'U'] >= minIR
    oversupp <- II(sset)[,'U'] > maxIR
    undersupp <- II(sset)[,'U'] < minIR
    II(sset)[insupp,'U'] <- fitfun(II(sset)[insupp,'U'])
    II(sset)[oversupp,'U'] <- maxIG +
        (II(sset)[oversupp,'U'] - maxIR) * (maxIG-minIG) / (maxIR-minIR)
    II(sset)[undersupp,'U'] <- minIG / minIR * II(sset)[undersupp,'U']

    ## fit IR
    IR.fit <- fitfun(IR(sset))
    dim(IR.fit) <- dim(IR(sset))
    dimnames(IR.fit) <- dimnames(IR(sset))
    IR(sset) <- IR.fit

    ## fit control
    insupp <- ctl(sset)[,'R'] <= maxIR & ctl(sset)[,'R'] >= minIR &
        (!is.na(ctl(sset)[,'R']))
    oversupp <- ctl(sset)[,'R'] > maxIR & (!is.na(ctl(sset)[,'R']))
    undersupp <- ctl(sset)[,'R'] < minIR & (!is.na(ctl(sset)[,'R']))
    ctl(sset)[insupp,'R'] <- fitfun(ctl(sset)[insupp,'R'])
    ctl(sset)[oversupp,'R'] <- maxIG +
        (ctl(sset)[oversupp,'R'] - maxIR) * (maxIG-minIG) / (maxIR-minIR)
    ctl(sset)[undersupp,'R'] <- minIG / minIR * ctl(sset)[undersupp,'R']

    ## fit oob
    insupp <- oobR(sset) <= maxIR & oobR(sset) >= minIR & (!is.na(oobR(sset)))
    oversupp <- oobR(sset) > maxIR & (!is.na(oobR(sset)))
    undersupp <- oobR(sset) < minIR & (!is.na(oobR(sset)))
    oobR(sset)[insupp] <- fitfun(oobR(sset)[insupp])
    oobR(sset)[oversupp] <- maxIG +
        (oobR(sset)[oversupp] - maxIR) * (maxIG-minIG) / (maxIR-minIR)
    oobR(sset)[undersupp] <- minIG / minIR * oobR(sset)[undersupp]

    sset
}

## M + U green matched to red distribution
dyeBiasCorrTypeINormMpU <- function(sset) {

    stopifnot(is(sset, "SigSet"))
    maxIG <- max(rowSums(IG(sset)))
    minIG <- min(rowSums(IG(sset)))
    maxIR <- max(rowSums(IR(sset)))
    minIR <- min(rowSums(IR(sset)))

    IG1 <- sort(as.numeric(rowSums(IG(sset))))
    IG2 <- sort(as.vector(preprocessCore::normalize.quantiles.use.target(
        matrix(IG1), as.vector(rowSums(IR(sset))))))
    fitfun <- function(xx) approx(x=IG1, y=IG2, xout=xx, ties=mean)$y

    ## fit type II
    insupp <- II(sset)[,'M'] <= maxIG & II(sset)[,'M'] >= minIG
    oversupp <- II(sset)[,'M'] > maxIG
    undersupp <- II(sset)[,'M'] < minIG
    II(sset)[insupp,'M'] <- fitfun(II(sset)[insupp,'M'])
    II(sset)[oversupp,'M'] <- maxIR +
        (II(sset)[oversupp,'M'] - maxIG) * (maxIR-minIR) / (maxIG-minIG)
    II(sset)[undersupp,'M'] <- minIR / minIG * II(sset)[undersupp,'M']

    ## fit IG
    IG.fit <- fitfun(IG(sset))
    dim(IG.fit) <- dim(IG(sset))
    dimnames(IG.fit) <- dimnames(IG(sset))
    IG(sset) <- IG.fit

    ## fit control
    insupp <- ctl(sset)[,'G'] <= maxIG &
        ctl(sset)[,'G'] >= minIG & (!is.na(ctl(sset)[,'G']))
    oversupp <- ctl(sset)[,'G'] > maxIG & (!is.na(ctl(sset)[,'G']))
    undersupp <- ctl(sset)[,'G'] < minIG & (!is.na(ctl(sset)[,'G']))
    ctl(sset)[insupp,'G'] <- fitfun(ctl(sset)[insupp,'G'])
    ctl(sset)[oversupp,'G'] <- maxIR +
        (ctl(sset)[oversupp,'G'] - maxIG) * (maxIR-minIR) / (maxIG-minIG)
    ctl(sset)[undersupp,'G'] <- minIR / minIG * ctl(sset)[undersupp,'G']

    ## fit oob
    insupp <- oobG(sset) <= maxIG & oobG(sset) >= minIG & (!is.na(oobG(sset)))
    oversupp <- oobG(sset) > maxIG & (!is.na(oobG(sset)))
    undersupp <- oobG(sset) < minIG & (!is.na(oobG(sset)))
    oobG(sset)[insupp] <- fitfun(oobG(sset)[insupp])
    oobG(sset)[oversupp] <- maxIR +
        (oobG(sset)[oversupp] - maxIG) * (maxIR-minIR) / (maxIG-minIG)
    oobG(sset)[undersupp] <- minIR / minIG * oobG(sset)[undersupp]

    sset
}

Any scripts or data that you put into this service are public.

sesame documentation built on Nov. 15, 2020, 2:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

sesame
Tools For Analyzing Illumina Infinium DNA Methylation Arrays

R/dye_bias.R
In sesame: Tools For Analyzing Illumina Infinium DNA Methylation Arrays

Defines functions dyeBiasCorrTypeINormMpU dyeBiasCorrTypeINormR2G dyeBiasCorrTypeINormG2R dyeBiasCorrTypeINorm dyeBiasCorrMostBalanced dyeBiasCorr getNormCtls

Documented in dyeBiasCorr dyeBiasCorrMostBalanced dyeBiasCorrTypeINorm getNormCtls

Try the sesame package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sesame Tools For Analyzing Illumina Infinium DNA Methylation Arrays

R/dye_bias.R In sesame: Tools For Analyzing Illumina Infinium DNA Methylation Arrays

Defines functions dyeBiasCorrTypeINormMpU dyeBiasCorrTypeINormR2G dyeBiasCorrTypeINormG2R dyeBiasCorrTypeINorm dyeBiasCorrMostBalanced dyeBiasCorr getNormCtls

Documented in dyeBiasCorr dyeBiasCorrMostBalanced dyeBiasCorrTypeINorm getNormCtls

Try the sesame package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sesame
Tools For Analyzing Illumina Infinium DNA Methylation Arrays

R/dye_bias.R
In sesame: Tools For Analyzing Illumina Infinium DNA Methylation Arrays