R/initial.guess.R

# Function with calculate best initial guess for labeled (alpha) and 
# unlabeled (beta) RNA amounts
# 
# Author: demel
###############################################################################


#' Calculate initial alpha values (labeled RNA) by closed form solution
#' 
#' @param N number of cells
#' @param L length of genes
#' @param counts count table for genes (rows) and samples (columns)
#' @param gene.names names of features == rownames of count table for which the
#' initial guess should be calculated
#' @param seq.depths vector with sequencing depths for each sample
#' @param cross.cont vector with cross contamination rates for each sample
#' @param labeled.samples numeric vector indicating which of the samples are
#' labeled RNA
#' @param total.samples numeric vector indicating which of the samples are 
#' total RNA
#' @return initial guess for alpha for given genes
#' 
#' @author Carina Demel
initial.guess.alpha <- function(
		N = 1,
		L,
		counts,
		gene.names = rownames(counts),
		seq.depths,
		cross.cont,
		labeled.samples,
		total.samples = c(1 : length(seq.depths)[-labeled.samples])
){
	nl <- length(labeled.samples)
	nt <- length(total.samples)
	
	if (length(gene.names) == 1) {
		alpha <- 1 / (N * L[gene.names]) * 1 / 
				(nl * sum(cross.cont[total.samples]) - 
					nt * sum(cross.cont[labeled.samples])) * 
				(sum(cross.cont[total.samples]) * 
					sum(counts[gene.names,labeled.samples] / seq.depths[labeled.samples])
					- sum(cross.cont[labeled.samples]) * 
					sum(counts[gene.names,total.samples] / (seq.depths[total.samples]))
					)
	} else {
		alpha <- 1 / (N * L[gene.names]) * 1 / 
				(nl * sum(cross.cont[total.samples]) - 
					nt * sum(cross.cont[labeled.samples])) * 
				(sum(cross.cont[total.samples]) * 
					rowSums(t(t(counts[gene.names, labeled.samples]) / 
											seq.depths[labeled.samples]))
					- sum(cross.cont[labeled.samples]) * 
					rowSums(t(t(counts[gene.names, total.samples]) / 
											(seq.depths[total.samples])))
					)
	}

	return(alpha)
}


#' Calculate initial beta values (unlabeled RNA amount) by closed form solution
#' 
#' @param N number of cells
#' @param L length of genes
#' @param counts count table for genes (rows) and samples (columns)
#' @param gene.names names of features == rownames of count table for which the
#' initial guess should be calculated
#' @param seq.depths vector with sequencing depths for each sample
#' @param cross.cont vector with cross contamination rates for each sample
#' @param labeled.samples numeric vector indicating which of the samples are
#' labeled RNA
#' @param total.samples numeric vector indicating which of the samples are
#' total RNA
#' @return initial guess for beta for given genes
#' 
#' @author Carina Demel
initial.guess.beta = function(
		N,
		L,
		counts,
		gene.names,
		seq.depths,
		cross.cont,
		labeled.samples,
		total.samples
){
	nl <- length(labeled.samples)
	nt <- length(total.samples)
	
	if (length(gene.names) == 1) {
		beta <- 1 / (N * L[gene.names]) * 1 / 
				(sum(cross.cont[total.samples]) / nt - 
					sum(cross.cont[labeled.samples]) / nl) * 
				( sum(counts[gene.names, total.samples] / 
									(nt * seq.depths[total.samples])) 
					- sum(counts[gene.names, labeled.samples] / 
									(nl * seq.depths[labeled.samples])))
	} else {
		beta <- 1/(N * L[gene.names]) * 1 /
				(sum(cross.cont[total.samples]) / nt -
					sum(cross.cont[labeled.samples]) / nl) * 
				( rowSums(t(t(counts[gene.names, total.samples]) /
											(nt * seq.depths[total.samples])))
					- rowSums(t(t(counts[gene.names, labeled.samples]) /
											(nl * seq.depths[labeled.samples]))))
	}
		
	return(beta)
}



#' Function to estimate initial alpha and beta values based on the observed 
#' counts (closed form solution)
#' Takes into account if there are replicates for labeled samples or not
#' 
#' @param N number of cells
#' @param L length of features
#' @param counts feature and time specific counts
#' @param gene.names names of features == rownames of count table
#' @param seq.depths sequencing depth estimates for all samples
#' @param cross.cont cross contamination rate estimates for all samples
#' @param conditions vector of time points/treatment of experiment for each 
#' sample
#' @param conditions.labeling vector indicating for each sample if it was 
#' labeled RNA or total RNA
#' @return list of matrices, alpha and beta values for each feature and each 
#' time point
#' 
#' @author Carina Demel
#' @export
initial.guess <- function(
		N,
		L,
		counts,
		gene.names = rownames(counts),
		seq.depths,
		cross.cont,
		conditions = colnames(counts),
		conditions.labeling
){
	uniqueTimePoints <- unique(conditions)

	res <- sapply(1:length(uniqueTimePoints), function(t){ 
		labeled.samples <- which(conditions == uniqueTimePoints[t] & 
						conditions.labeling == "L")
		total.samples <- which(conditions == uniqueTimePoints[t] &
						conditions.labeling == "T")
		alpha.initial <- initial.guess.alpha(N, L, counts, gene.names, seq.depths,
				cross.cont, labeled.samples, total.samples)
		beta.initial <- initial.guess.beta(N, L, counts, gene.names, seq.depths,
				cross.cont, labeled.samples, total.samples)
		
		return(cbind(alpha.initial, beta.initial))
	})

	alpha.initial <- res[1:length(gene.names), ]
	beta.initial <- res[(length(gene.names)+1):nrow(res), ]
	alpha.initial[alpha.initial < 0] <- exp(1) 
	beta.initial[beta.initial < 0] <- exp(1) 
	res.list <- list(
			"alpha" = alpha.initial,
			"beta" = beta.initial
	)
	
	return(res.list)
}
carinademel/RNAlife documentation built on May 13, 2019, 12:43 p.m.