R/generate_noise_matrices.R

Defines functions simulate_replicates execute_sim_replicates fill_out_count_probability_table genewise_dropouts permute_count_in_dropout_range randomizer

Documented in execute_sim_replicates fill_out_count_probability_table genewise_dropouts permute_count_in_dropout_range randomizer simulate_replicates

# R script
# David Severson
# Scope: To simulate technical replicates.

################Underlying functions############################

##actually applies the estimated noise to the counts table
randomizer<-function(count, parameters, max_cumprob){
    qnbinom<-qpois<-dnbinom<-dpois<-NULL
    count<-ceiling(count)
    NB_intercept<-as.numeric(parameters$alpha2mu.intercept)
    NB_slope<-as.numeric(parameters$alpha2mu.slope)
    Mean2SDcor_residual<-as.numeric(parameters$max.res)
    Mean2SDcor_slope<-as.numeric(parameters$mu2sigma.slope)
    Mean2SDcor_intercept<-as.numeric(parameters$mu2sigma.intercept)
    sd_inflate<-as.numeric(parameters$sd.inflate)
    p<-NB_intercept+NB_slope*log2(count)
    if (p<0) {p=0}
    if (p>1) {p=1}
    top_probable_count<-max(qnbinom(max_cumprob, size = count^2/
        ((2^(Mean2SDcor_slope*log2(count)+Mean2SDcor_intercept+
        Mean2SDcor_residual*sd_inflate))^2-count),mu = count),
        qpois(max_cumprob, count))
    min_probable_count<-min(qnbinom(max_cumprob, size = count^2/
        ((2^(Mean2SDcor_slope*log2(count)+Mean2SDcor_intercept+
        Mean2SDcor_residual*sd_inflate))^2-count), mu = count,
        lower.tail = FALSE), qpois(max_cumprob, count, lower.tail = FALSE))
    sequence<-seq(from=min_probable_count, to=top_probable_count, by=1)
    sample(sequence,size=1,prob=p*dnbinom(sequence, size = count^2/((2^(
        Mean2SDcor_slope*log2(count)+Mean2SDcor_intercept+
        Mean2SDcor_residual*sd_inflate))^2-count), mu = count)+(1-p)*dpois(
        sequence,count))
}

permute_count_in_dropout_range<-function(count, dropout_injection){
    count<-round(count)
    dropout_prob<-dropout_injection[count,]$dropouts_given_transcripts
    sample(c(0,count),1, prob=as.numeric(c(dropout_prob, 1-dropout_prob)))
}

##seperates out genes for analysis
genewise_dropouts<-function(onegene_counts, dropout_recovery,
dropout_recovery.genes){
    round_counts<-NULL
    genewise_dropout_recovery_prob<-dropout_recovery.genes[gsub("[-,(,)]",
        ".",onegene_counts[1]),]
    onegene_counts<-as.numeric(onegene_counts[-1])
    sampling<-sample(
        dropout_recovery$transcripts, sum(onegene_counts==0),
        prob=genewise_dropout_recovery_prob, replace=TRUE)
    onegene_counts[onegene_counts==0]<-dropout_recovery[match(sample(
        dropout_recovery$transcripts, sum(onegene_counts==0),
        prob=genewise_dropout_recovery_prob, replace=TRUE),
        dropout_recovery$transcripts), round_counts]
    onegene_counts
}

fill_out_count_probability_table<-function(count, dropout_injection){
    counts<-transcripts<-NULL
    dropout_injection[counts>=count,][,min:=min(transcripts),][
        transcripts==min,][,counts:=count][,min:=NULL]
}

execute_sim_replicates<-function(sim_replicate, SCEList, max_cumprob){
    metadata<-round_counts<-counts<-transcripts<-NULL
    dropouts_given_transcripts<-dif<-assay<-NULL
    print(paste("Creating a simulated replicated counts matrix: ",
        sim_replicate,".", sep=""))
    noise_parameters<-metadata(SCEList)
    dropout_recovery.genes<-t(data.frame(noise_parameters$dropout_parameters,
        row.names = "transcripts")[,3:eval(dim(
        noise_parameters$dropout_parameters)[2]-1)])
    rownames(dropout_recovery.genes)<-gsub("^X([0-9])","\\1",
        rownames(dropout_recovery.genes))
    dropout_recovery<-data.table(noise_parameters$dropout_parameters[,
        seq_len(3)])[, round_counts:=round(counts),]
    dropout_injection<-data.table(noise_parameters$dropout_parameters[,
        seq_len(3)])[, round_counts:=round(counts),][,`:=`(
        transcripts=transcripts,
        dropouts_given_transcripts=dropouts_given_transcripts,
        dif=abs(counts-round_counts), min=min(abs(counts-round_counts))),
        by=round_counts][dif==min,][,`:=`(transcripts=transcripts, dif=NULL,
        min=NULL, dropouts_given_transcripts=dropouts_given_transcripts,
        counts=round_counts, round_counts=NULL)]
    count_max<-max(dropout_injection$counts)
    all_counts<-seq(from=1,to=max(dropout_injection$counts), by=1)
    if (length(all_counts)>(length(dropout_injection$counts)-1)){
        dropout_injection<-data.frame(do.call(rbind, lapply(all_counts,
            `fill_out_count_probability_table`, dropout_injection)),
            row.names="counts")
    } else{
        dropout_injection<-data.frame(dropout_injection, row.names = "counts")
    }
    noisy_counts<-t(data.frame(assay(SCEList, "observed_expression")))
    noisy_counts.zeros<-noisy_counts[,colSums(noisy_counts==0)>0]
    noisy_counts.nozeros<-noisy_counts[,colSums(noisy_counts==0)==0]
    indropout_range<-which(noisy_counts.zeros<=count_max & noisy_counts.zeros>0)
    dropouts<-vapply(noisy_counts.zeros[indropout_range],
        `permute_count_in_dropout_range`, dropout_injection,
        FUN.VALUE = numeric(1))
    t_counts<-rbind(colnames(noisy_counts.zeros), noisy_counts.zeros)
    noisy_counts.zeros<-data.table(t_counts)[,vapply(.SD,`genewise_dropouts`,
        dropout_recovery=dropout_recovery,
        dropout_recovery.genes=dropout_recovery.genes,
        FUN.VALUE = numeric(dim(.SD)[1]-1))]
    noisy_counts<-cbind(noisy_counts.zeros, noisy_counts.nozeros)[
        rownames(noisy_counts), colnames(noisy_counts)]
    noisy_counts.zeros[indropout_range]<-dropouts
    noisy_counts[noisy_counts>0]<-vapply(noisy_counts[noisy_counts>0],
        `randomizer`, parameters=noise_parameters$spikein_parameters,
        max_cumprob=max_cumprob, FUN.VALUE = numeric(1))
    t(noisy_counts)
}

############# USER FUNCTIONS ###################################

##executes noise perturbation with estimated parameters
simulate_replicates<-function(SCEList, max_cumprob=0.9999, n=3){
    metadata<-`metadata<-`<-NULL
    max_cumprob<-1-(1-max_cumprob)/2
    sim_rep_vector<-data.table(t(as.character(seq(from=1, to=n, by=1))))
    colnames(sim_rep_vector)<-gsub("^","Iteration_", sim_rep_vector)
    metadata(SCEList)$simulated_replicates<-lapply(sim_rep_vector,
        `execute_sim_replicates`, SCEList=SCEList, max_cumprob=max_cumprob)
    SCEList
}

Try the BEARscc package in your browser

Any scripts or data that you put into this service are public.

BEARscc documentation built on Nov. 8, 2020, 7:56 p.m.