R/dtcr.R

Defines functions .process_oneRNA_compmerg .compare_prop dtcr

Documented in dtcr

###Function in the normalizing functions family.

#' Calculate deltaTCR.
#'
#' Performs deltaTCR (dtcr) normalization given control and treated GRanges
#' generated by comp() function.
#'
#' @param control_GR GRanges object made by comp() function from the control
#' sample.
#' @param treated_GR GRanges object made by comp() function from the treated
#' sample.
#' @param window_size if smoothing is to be performed, what should be the
#' window size?  (use only odd numbers to ensure that windows are centred on a
#' nucleotide of interest) (default: 3)
#' @param nt_offset how many nucleotides before a modification the reverse
#' transcription terminates. E.g. for HRF-Seq nt_offset=1 (default: 1)
#' @param bring_to_zero should in deltaTCR calculations negative deltaTCR's be
#' brought to 0 as was done in HRF-Seq paper (default: T)
#' @param add_to GRanges object made by other normalization function (dtcr(),
#' slograt(), swinsor(), compdata()) to which normalized values should be
#' added.
#' @return GRanges object with "dtcr" (deltaTCR) and "dtcr.p" (p.value of
#' comparing control and treated calcualted with pooled two-proportion Z-test)
#' metadata.
#' @author Lukasz Jan Kielpinski, Nikos Sidiropoulos
#' @seealso \code{\link{comp}}, \code{\link{slograt}}, \code{\link{swinsor}},
#' \code{\link{compdata}}, \code{\link{GR2norm_df}}, \code{\link{plotRNA}},
#' \code{\link{norm2bedgraph}}
#' @references Kielpinski, L.J., and Vinther, J. (2014). Massive
#' parallel-sequencing-based hydroxyl radical probing of RNA accessibility.
#' Nucleic Acids Res.
#' @examples
#'
#' dummy_euc_GR_control <- GRanges(seqnames="DummyRNA",
#'                                 IRanges(start=round(runif(100)*100),
#'                                 width=round(runif(100)*100+1)), strand="+",
#'                                 EUC=round(runif(100)*100))
#' dummy_euc_GR_treated <- GRanges(seqnames="DummyRNA",
#'                                 IRanges(start=round(runif(100)*100),
#'                                 width=round(runif(100)*100+1)), strand="+",
#'                                 EUC=round(runif(100)*100))
#' dummy_comp_GR_control <- comp(dummy_euc_GR_control)
#' dummy_comp_GR_treated <- comp(dummy_euc_GR_treated)
#' dtcr(control_GR=dummy_comp_GR_control, treated_GR=dummy_comp_GR_treated)
#'
#' @import GenomicRanges
#' @export dtcr
dtcr <- function(control_GR, treated_GR, window_size=3, nt_offset=1,
                 bring_to_zero=TRUE, add_to){

###Check conditions:
    if(nt_offset < 0)
        stop("nt_offset must be >= 0")

    if(window_size < 0)
        stop("window_size must be >= 0")

    if((window_size%%2)!=1)
        stop("window_size must be odd")

    ###Main Body:
    control <- GR2norm_df(control_GR)
    treated <- GR2norm_df(treated_GR)

    #Merge control and treated
    comp_merg <- merge(control, treated, by=c("RNAid", "Pos", "nt"), all=TRUE,
                       suffixes=c(".control",".treated"))

    #Repair ordering after merging
    comp_merg <- comp_merg[order(comp_merg$RNAid, comp_merg$Pos),]
    #Changes NA to 0 - treat no events as 0 events
    comp_merg[is.na(comp_merg)] <- 0

    ###Calculate deltaTCR as described in HRF-Seq paper:
    #Calculate probabilistic difference
    dtcr <- (comp_merg$TCR.treated -
                 comp_merg$TCR.control)/(1 - comp_merg$TCR.control)
    #If bring_to_zero=TRUE, all negative deltaTCRs bring to 0, else change -Inf
    #to NA [possible when treated=0 and control=1]
    if(bring_to_zero){
        dtcr[dtcr < 0] <- 0
    } else
        dtcr[dtcr==-Inf] <- NA

    #Import dtcr to merged data frame
    comp_merg$dtcr <- dtcr
    ###

    #Split merged data frame by RNA
    compmerg_by_RNA <- split(comp_merg, f=comp_merg$RNAid, drop=TRUE)

    #Do processing (smooth, offset, p-values)
    normalized <- do.call(rbind, lapply(compmerg_by_RNA,
                                        FUN=.process_oneRNA_compmerg,
                                        window_size, nt_offset))
    #Keep only relevant columns
    normalized <- data.frame(RNAid=normalized$RNAid, Pos=normalized$Pos,
                             nt=normalized$nt, dtcr=normalized$dtcr,
                             dtcr.p=normalized$dtcr.p)

    #Change NaN to NA for consistency [NaN often created at the 5' end of RNA]
    normalized$dtcr[is.nan(normalized$dtcr)] <- NA
    normalized$dtcr.p[is.nan(normalized$dtcr.p)] <- NA

    #If add_to specified, merge with existing normalized data frame:
    if(!missing(add_to)){
        if (!is.null(add_to)){
            add_to_df <- GR2norm_df(add_to)
            normalized <- merge(add_to_df, normalized,
                                by=c("RNAid", "Pos", "nt"),
                                suffixes=c(".old",".new"))
        }
    }
    ###

    normalized <- normalized[order(normalized$RNAid, normalized$Pos),]
    norm_df2GR(normalized)
}

###Auxiliary functions

#Function to calculate p-values for dtcr, it uses test for comparing Two
#Population Proportions: (z-test, as e.g. shown on
#http://www.socscistatistics.com/tests/ztest/
#or https://onlinecourses.science.psu.edu/stat414/node/268)

#T_ctrl - terminations control, C_ctrl - coverage control,
#T_tr - terminations treated, C_tr - coverage treated
#' @importFrom stats pnorm
.compare_prop <- function(T_ctrl, C_ctrl, T_tr, C_tr, window_size){

    window_side <- window_size/2-0.5

    #Prepare running sums (the same window as in for smoothing dtcr):
    Tc <- colSums(.construct_smoothing_matrix(T_ctrl, window_size),
                  na.rm=TRUE)[(window_side+1):(length(T_ctrl) + window_side)]
    Cc <- colSums(.construct_smoothing_matrix(C_ctrl, window_size),
                  na.rm=TRUE)[(window_side+1):(length(C_ctrl) + window_side)]
    Tt <- colSums(.construct_smoothing_matrix(T_tr, window_size),
                  na.rm=TRUE)[(window_side+1):(length(T_tr) + window_side)]
    Ct <- colSums(.construct_smoothing_matrix(C_tr, window_size),
                  na.rm=TRUE)[(window_side+1):(length(C_tr) + window_side)]

    #Calculate test statistics z:
    pp <- (Tc + Tt)/(Cc + Ct) #pooled proportion
    se <- sqrt(pp*(1-pp)*(1/Cc + 1/Ct)) #standard error
    z <- (Tc/Cc - Tt/Ct)/se

    #Transform 'z' to two-tailed p-value:
    pnorm(abs(z), lower.tail= FALSE)*2

}

#Function smoothing and offsetting dtcr and adding p-values to each nucleotide
#(dataset split by RNA):
.process_oneRNA_compmerg <- function(oneRNA_compmerg, window_size, nt_offset){
    if(prod(diff(oneRNA_compmerg$Pos)==rep(1, nrow(oneRNA_compmerg)-1))!=1)
        oneRNA_compmerg <- .correct_merged(oneRNA_compmerg)

    #Check if data is properly sorted/corrected
    if(prod(diff(oneRNA_compmerg$Pos)==rep(1, nrow(oneRNA_compmerg)-1))==1){
        oneRNA_compmerg$dtcr <-
            .moving_average(oneRNA_compmerg$dtcr, window_size)[
                (1 + nt_offset):(nrow(oneRNA_compmerg)+nt_offset)]
        oneRNA_compmerg$dtcr.p <-
            .compare_prop(T_ctrl=oneRNA_compmerg$TC.control,
                          C_ctrl=oneRNA_compmerg$Cover.control,
                          T_tr=oneRNA_compmerg$TC.treated,
                          C_tr=oneRNA_compmerg$Cover.treated,
                          window_size=window_size)[(1 + nt_offset):(nrow(
                              oneRNA_compmerg) + nt_offset)]

        oneRNA_compmerg[1:(nrow(oneRNA_compmerg)-nt_offset),]

    }
    else {
        Message <- "Check if data was properly sorted by comp() function.
                    Problem with"
        stop(paste(strwrap(Message), oneRNA_compmerg$RNAid[1]))
    }

}

Try the RNAprobR package in your browser

Any scripts or data that you put into this service are public.

RNAprobR documentation built on Nov. 8, 2020, 5:57 p.m.