R/privateCisEffectFunctions.R

Defines functions .nulldist.all.wmw .nulldist.all.wcvm .datareduce .pi0est .redrawcol .redraw .countdiscoveries .powerunbalance .shift.est .probs2calls .alphabivariate .alphaest .pretest .prob.test.stats .wmw.test.stats .wcvm.test.stats .rawps .pval.perm.marg .countth .countth.eff .pvalbound .find.cgh.reg.data .lambda.per.reg .shrin.an.wcvm .shrin.an.prob .R2.stat .uni.an.prob .uni.an.wcvm .tuning

	.tuning <- function(data.in, datacgh.in, allest, alphanewmat, powerunbal, null.dists.tune, test.stat="wmw", seqg, nperm, pi0, fdrcut=0.25, nresamp=100, gridnr=30, minim=10, a=a, nosamp=nosamp, shiftsam=shiftsam, verbose){
		################################################################################################
		# function that does the tuning
		################################################################################################
	
		probs <- seq(0,gridnr/(gridnr+1), 1/(gridnr+1))
		probstruncate <- probs[probs<=((100-minim)/100)]
		allest2 <- allest[seqg]
		alphanewmat2<-alphanewmat[seqg,]
		data.in2 <- data.in[seqg,]
		datacgh.in2 <- datacgh.in[seqg,]
		powerunbal2 <- powerunbal[seqg]
		quant <- quantile(powerunbal2, probs=probstruncate)
		whatthrmat <- c()
		for (j in 1:nresamp){
			if ((j %% 50) == 0){ if (verbose){ cat(paste(j, "of", nresamp, "resamples done, and counting...", sep=" "), "\n") } }
			shiftsampled <- sample(shiftsam, nrow(data.in2), replace=TRUE)
			pi0cutoff <- quantile(shiftsampled, pi0)
			shiftsampled <- sapply(shiftsampled, function(x){if (x <= pi0cutoff){ return(0) } else { return(x)}})
			redrawall <- t(sapply(1:nrow(data.in2), .redrawcol, allest=allest2, alphanmat=alphanewmat2, shiftsampled=shiftsampled, data2=data.in2, a=a, nosamp=nosamp))
			newdata <- cbind(datacgh.in2[,-(1:3)], redrawall) 
			if(test.stat=="wmw"){ test.resamp <- apply(newdata, 1, .wmw.test.stats, nosamp, a) } else { test.resamp <- apply(newdata, 1, .wcvm.test.stats, nosamp, a) }
			rawpvals.test.resamp <- .rawps(test.resamp, null.dists.tune, nperm)
			rawp_powerunbal <- cbind(rawpvals.test.resamp, powerunbal2)
			whatthresh <- sapply(quant, .countdiscoveries, rawp_powerunbal=rawp_powerunbal, fdrcutoff=fdrcut)
			whatthrmat <- rbind(whatthrmat, whatthresh)  
		}
		whatthrmean <- apply(whatthrmat, 2, mean)
		# print(whatthrmean)
		bestthr <- which.max(whatthrmean)    
		return(quant[bestthr])
	}

	.uni.an.wcvm <- function(data.both, nosamp, a, nperm, low.ci.thres=0.10, verbose=TRUE){
		################################################################################################
		# function that performs a univariate analysis with the weighted cvm-statistic,
		# with the efficient p-value calculation.
		################################################################################################

		# Calculate the observed weighted cvm test statistics
		wcvm.obs <- apply(data.both, 1, .wcvm.test.stats, nosamp, a)

		# Actual analysis
		data.perm <- data.both
		# data.perm <- rbind(data.perm, data.perm)
		total.genes.on.chr <- dim(data.both)[1]
		remainders <- c(1:dim(data.perm)[1])
		steps <- sort(unique(c(0,25,50,100,150,200,250,500,750,1000,1250,1500,1750,2000,2250,2500,2750,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000,9500,seq(from=10000,to=50000,by=1000),nperm)))
		steps <- steps[steps <= nperm]
		wcvm.mat <- c()
		for(j in 1:(length(steps)-1)){
			for(i in 1:(steps[j+1]-steps[j])){
				if (verbose){  if (((steps[j]+i) %% 100) == 0){ cat(paste(steps[j]+i," of ", steps[length(steps)]," permutations done, and counting...", sep=""), "\n") } }
				x <- sample(1:nosamp,nosamp) + a*nosamp
				data.ran <- cbind(data.perm[, c(1:(a*nosamp))], data.perm[, x])
				wcvm.ran <- apply(data.ran, 1, .wcvm.test.stats, nosamp, a)
				wcvm.mat <- cbind(wcvm.mat, wcvm.ran)
			}

			# compute pval bound and delete row from data set and permutation set when 0.001 < lower bound
			perm.and.obs <- cbind(wcvm.mat, wcvm.obs)
			pvals <- apply(perm.and.obs, 1, .countth)/steps[j+1]
			pbound <- sapply(pvals, .pvalbound, steps[j+1])
			index <- cbind(1:length(pbound), pbound)
			pind <- index[pbound < low.ci.thres, 1]

			if (length(pind) > 2){ remainders <- remainders[pind] } else { pind <- c(1:length(remainders)) }

			wcvm.mat <- wcvm.mat[pind, ]
			wcvm.obs <- wcvm.obs[pind]
		
			if (verbose){   cat(paste(steps[j]+i, "of", steps[length(steps)], " permutations done, and", length(remainders), "of", total.genes.on.chr, "genes remaining...", sep=" "), "\n")   }
			data.perm <- data.both[remainders, , drop=FALSE]
		}

		# Generate list with raw p-values for all genes 
		raw.pvals <- cbind(c(1:dim(data.both)[1]), rep(1,dim(data.both)[1]))
		raw.pvals[remainders,2] <- pvals[pind]

		# Calculate BH-adjusted p-values
		adjpvals <- cbind(raw.pvals, p.adjust(raw.pvals[,2], "BH"))
		adjpvals[,2:3] <- round(adjpvals[,2:3], digits=4)

		# Some editing for presentation purposes
		colnames(adjpvals) <- c("clone.id", "raw.p", "adj.p")

		return(adjpvals)
	}

	.uni.an.prob <- function(data.both, nosamp, a, nperm, low.ci.thres=0.10, verbose=TRUE){
		################################################################################################
		# function that performs a univariate analysis with the normalized weighted wm-statistic,
		# with the efficient p-value calculation.
		################################################################################################

		# Calculate the observed weighted cvm test statistics
		prob.obs <- apply(data.both, 1, .prob.test.stats, nosamp, a)

		# Actual analysis
		data.perm <- data.both
		total.genes.on.chr <- dim(data.both)[1]
		remainders <- c(1:dim(data.perm)[1])
		steps <- sort(unique(c(0,25,50,100,150,200,250,500,750,1000,1250,1500,1750,2000,2250,2500,2750,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000,9500,seq(from=10000,to=50000,by=1000),nperm)))
		steps <- steps[steps <= nperm]
		prob.mat <- c()
		for(j in 1:(length(steps)-1)){
			for(i in 1:(steps[j+1]-steps[j])){
				if (verbose){  if (((steps[j]+i) %% 100) == 0){ cat(paste(steps[j]+i," of ", steps[length(steps)]," permutations done, and counting...", sep=""), "\n") }  }
				x <- sample(1:nosamp,nosamp) + a*nosamp
				data.ran <- cbind(data.perm[,c(1:(a*nosamp))],data.perm[,x])
				prob.ran <- apply(data.ran, 1, .prob.test.stats, nosamp, a)
				prob.mat <- cbind(prob.mat, prob.ran)
			}

			# compute pval bound and delete row from data set and permutation set when 0.001 < lower bound
			perm.and.obs <- cbind(prob.mat, prob.obs)
			pvals <- apply(perm.and.obs, 1, .countth)/steps[j+1]
			pbound <- sapply(pvals, .pvalbound, steps[j+1])
			index <- cbind(1:length(pbound), pbound)
			pind <- index[pbound < low.ci.thres,1]

			if (length(pind) > 2){ remainders <- remainders[pind] } else { pind <- c(1:length(remainders)) }

			prob.mat <- prob.mat[pind,]
			prob.obs <- prob.obs[pind]

			# cat(paste(length(remainders), "of", total.genes.on.chr, "genes remaining...", sep=" "), "\n")
			if (verbose){   cat(paste(steps[j]+i, "of", steps[length(steps)], " permutations done, and", length(remainders), "of", total.genes.on.chr, "genes remaining...", sep=" "), "\n")  }
			data.perm <- data.both[remainders, , drop=FALSE]
	    	}

		# Generate list with raw p-values for all genes    
		raw.pvals <- cbind(c(1:dim(data.both)[1]), rep(1,dim(data.both)[1]))
		raw.pvals[remainders, 2] <- pvals[pind]

		# Calculate BH-adjusted p-values
		adjpvals <- cbind(raw.pvals, p.adjust(raw.pvals[,2], "BH"))
		adjpvals[,2:3] <- round(adjpvals[,2:3], digits=4)

		# Some editing for presentation purposes
		colnames(adjpvals) <- c("clone.id", "raw.p", "adj.p")
	
		return(adjpvals)
	}


	.R2.stat <- function(data.both, a, nosamp){
		################################################################################################
		# function that calculates the R^2 value.
		################################################################################################

		# Define function for calculated of numerator of R^2 statistic
		r2.numerator <- function(data.both, a, nosamp){
			alpha.ind <- matrix(data.both[c(1:(a*nosamp))], ncol=a, byrow=TRUE)
			alpha.1.mom <- apply(alpha.ind, 2, mean)
			alpha.2.mom <- t(alpha.ind) %*% alpha.ind / nosamp

			cgh.em <- cbind(matrix(data.both[c(1:(a*nosamp))], ncol=2, byrow=TRUE), data.both[c((a*nosamp+1):((a+1)*nosamp))])

			mu1 <- 1 / nosamp*sum(cgh.em[,3]*(cgh.em[,1]*alpha.1.mom[2] - alpha.2.mom[2,1]) / (alpha.2.mom[1,1]*alpha.1.mom[2]-alpha.1.mom[1]*alpha.2.mom[1,2]))
			mu2 <- 1 / nosamp*sum(cgh.em[,3]*(cgh.em[,2]*alpha.1.mom[1] - alpha.2.mom[2,1]) / (alpha.2.mom[2,2]*alpha.1.mom[1]-alpha.1.mom[2]*alpha.2.mom[1,2]))
			call.means <- matrix(c(mu1, mu2), nrow=1)

        		return(sum((data.both[a*nosamp+c(1:nosamp)] - alpha.ind %*% t(call.means))^2)/(nosamp-1))
		}

		# Calculate R^2 statistic
		r2.denominator <- apply(data.both[, c((a*nosamp+1):((a+1)*nosamp))], 1, var)
		numerator <- apply(data.both, 1, r2.numerator, a=a, nosamp=nosamp)
		R2 <- 1- numerator / r2.denominator
		for (i in 1:length(R2)){ R2[i] <- min(1, max(0, R2[i])) }
		return(R2)
	}


	.shrin.an.prob <- function(data.both, nosamp, a, nperm, low.ci.thres=0.1, datacgh.org, verbose=TRUE){
		################################################################################################
		# function that performs a regional analysis with the normalized weighted mw-statistic, also
		# called the probability in this context, with shrinkage per region and efficient p-value calculation.
		################################################################################################

		shrunken.prob.test.stats <- function(reg.bounds.lambda, data.both, nosamp, a){
			################################################################################################
			# Function that reshuffles the shrunken test statistics to get them in the right format.
			################################################################################################

			shrunken.test.stats.wrong.format <- apply(reg.bounds.lambda, 1, "shrunken.prob.test.stats.per.reg", cgh.em=data.both, nosamp=nosamp, a=a)
			if (is.numeric(shrunken.test.stats.wrong.format)){ 
				shrunken.test.stats <- shrunken.test.stats.wrong.format
			}
			if (is.matrix(shrunken.test.stats.wrong.format)){
				shrunken.test.stats <- as.numeric(shrunken.test.stats.wrong.format)
			}
			if (is.list(shrunken.test.stats.wrong.format)){
				shrunken.test.stats <- NULL
				for (i in 1:dim(reg.bounds.lambda)[1]){
					shrunken.test.stats <- c(shrunken.test.stats, shrunken.test.stats.wrong.format[[i]])
				}
			}
			return(shrunken.test.stats)
		}

		shrunken.prob.test.stats.per.reg <- function(bounds.lambda, cgh.em, nosamp, a){
			################################################################################################
			# Function that calculates the normalized weighted MW test statistics for all clones in a region.
			################################################################################################
	
			lambda <- bounds.lambda[3]
			bounds <- bounds.lambda[c(1:2)]
			if (bounds[1] != bounds[2]){
				cgh.em <- cgh.em[c(bounds[1]:bounds[2]),]
				marg.test.stats <- apply(cgh.em,1, .prob.test.stats, nosamp,a)
				shrunken.test.stats <- lambda*marg.test.stats + (1-lambda)*mean(marg.test.stats)
			} else {
				shrunken.test.stats <- .wcvm.test.stats(cgh.em[bounds[1],],nosamp,a)
			}
			return(shrunken.test.stats)
		}

		if (verbose){   cat("construct regions...", "\n")  }
		reg.bounds <- .find.cgh.reg.data(data.both, a, nosamp, datacgh.org)

		if (verbose){   cat("calculate shrinkage parameters...", "\n")   }
		lambda.of.reg <- apply(reg.bounds, 1, .lambda.per.reg, data.both=data.both, a=a, nosamp=nosamp)
		reg.bounds.lambda <- cbind(reg.bounds, lambda.of.reg)
		colnames(reg.bounds.lambda) <- NULL

		if (verbose){   cat("calculate observed test statistics...", "\n")  }
		shrunken.prob.obs <- shrunken.prob.test.stats(reg.bounds.lambda, data.both, nosamp, a)

		if (verbose){   cat("calculate null distribution...", "\n")    }
		prob.obs <- shrunken.prob.obs
		data.perm <- data.both
		total.genes.on.chr <- dim(data.both)[1]
		remainders <- c(1:dim(data.perm)[1])
		steps <- sort(unique(c(0,25,50,100,150,200,250,500,750,1000,1250,1500,1750,2000,2250,2500,2750,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000,9500,seq(from=10000,to=50000,by=1000),nperm)))
		steps <- steps[steps <= nperm]
		prob.mat <- c()
		for(j in 1:(length(steps)-1)){
			for(i in 1:(steps[j+1]-steps[j])){
				if (verbose){   if (((steps[j]+i) %% 100) == 0){ cat(paste(steps[j]+i," of ", steps[length(steps)]," permutations done, and counting...", sep=""), "\n") }  }
				x <- sample(1:nosamp,nosamp) + a*nosamp
				data.ran <- cbind(data.perm[, c(1:(a*nosamp))], data.perm[,x])
				prob.ran <- shrunken.prob.test.stats(reg.bounds.lambda, data.ran, nosamp, a)
				prob.mat <- cbind(prob.mat, prob.ran[remainders])
			}
			# compute pval bound and delete row from data set and permutation set when 0.001 < lower bound
			perm.and.obs <- cbind(prob.mat, prob.obs)
			pvals <- apply(perm.and.obs, 1, .countth)/steps[j+1]
			pbound <- sapply(pvals, .pvalbound, steps[j+1])
			index <- cbind(1:length(pbound), pbound)
			pind <- index[pbound < low.ci.thres, 1]

			if (length(pind) > 2){ remainders <- remainders[pind] } else { pind <- c(1:length(remainders)) }

			prob.mat <- prob.mat[pind, , drop=FALSE]
			prob.obs <- prob.obs[pind]

			if (verbose){   cat(paste(steps[j]+i, "of", steps[length(steps)], " permutations done, and", length(remainders), "of", total.genes.on.chr, "genes remaining...", sep=" "), "\n")  }
		}

		# Generate list with raw p-values for all genes    
		raw.pvals <- cbind(c(1:dim(data.both)[1]), rep(1,dim(data.both)[1]))
		raw.pvals[remainders,2] <- pvals[pind]

		# Calculate BH-adjusted p-values
		adjpvals <- cbind(raw.pvals, p.adjust(raw.pvals[,2], "BH"))
		adjpvals[,2:3] <- round(adjpvals[,2:3], digits=4)

		reg.details <- NULL
		for (i in 1:dim(reg.bounds.lambda)[1]){
			reg.length <- (reg.bounds.lambda[i,2] - reg.bounds.lambda[i,1] +1)
			reg.details <- rbind(reg.details, cbind(rep(i,reg.length), matrix(rep(reg.bounds.lambda[i,], reg.length), nrow=reg.length, byrow=TRUE)))
		}
		adjpvals <- cbind(adjpvals[,1], reg.details, adjpvals[,2:3])
		colnames(adjpvals) <- c("clone.id", "reg.id", "begin.reg", "end.reg", "shrinkage", "raw.p", "adj.p")

		return(adjpvals)
	}

	.shrin.an.wcvm <- function(data.both, nosamp, a, nperm, low.ci.thres=0.1, datacgh.org, verbose=TRUE){
		################################################################################################
		# function that performs a regional analysis with the weighted cvm-statistic,
		# with shrinkage per region and efficient p-value calculation.
		################################################################################################
	
		shrunken.wcvm.test.stats <- function(reg.bounds.lambda, data.both, nosamp, a){
			################################################################################################
			# Function that reshuffles the shrunken test statistics to get them in the right format.
			################################################################################################
	
			shrunken.test.stats.wrong.format <- apply(reg.bounds.lambda, 1, "shrunken.wcvm.test.stats.per.reg", cgh.em=data.both, nosamp=nosamp, a=a)
			if (is.numeric(shrunken.test.stats.wrong.format)){ 
				shrunken.test.stats <- shrunken.test.stats.wrong.format
			}
			if (is.matrix(shrunken.test.stats.wrong.format)){
				shrunken.test.stats <- as.numeric(shrunken.test.stats.wrong.format)
			}
			if (is.list(shrunken.test.stats.wrong.format)){
				shrunken.test.stats <- NULL
				for (i in 1:dim(reg.bounds.lambda)[1]){
					shrunken.test.stats <- c(shrunken.test.stats, shrunken.test.stats.wrong.format[[i]])
				}
			}
			return(shrunken.test.stats)
		}
	
		shrunken.wcvm.test.stats.per.reg <- function(bounds.lambda, cgh.em, nosamp, a){
			################################################################################################
			# Function that calculates the weighted CvMe test statistics for all clones in a region.
			################################################################################################
	
			lambda <- bounds.lambda[3]
			bounds <- bounds.lambda[c(1:2)]
			if (bounds[1] != bounds[2]){
 				cgh.em <- cgh.em[c(bounds[1]:bounds[2]),]
				marg.test.stats <- apply(cgh.em, 1, .wcvm.test.stats, nosamp, a)
				shrunken.test.stats <- lambda * marg.test.stats + (1-lambda) * mean(marg.test.stats)
			} else {
				shrunken.test.stats <- .wcvm.test.stats(cgh.em[bounds[1],], nosamp, a)
			}
			return(shrunken.test.stats)
		}
    
		if (verbose){   cat("construct regions...", "\n")  }
		reg.bounds <- .find.cgh.reg.data(data.both, a, nosamp, datacgh.org)

		if (verbose){   cat("calculate shrinkage parameters...", "\n")   }
		lambda.of.reg <- apply(reg.bounds, 1, .lambda.per.reg, data.both=data.both, a=a, nosamp=nosamp)
		reg.bounds.lambda <- cbind(reg.bounds, lambda.of.reg)
		colnames(reg.bounds.lambda) <- NULL

		if (verbose){   cat("calculate observed test statistics...", "\n")   }
		shrunken.wcvm.obs <- shrunken.wcvm.test.stats(reg.bounds.lambda, data.both, nosamp, a)

		if (verbose){   cat("calculate null distribution...", "\n")  }
		wcvm.obs <- shrunken.wcvm.obs
		data.perm <- data.both
		total.genes.on.chr <- dim(data.both)[1]
		remainders <- c(1:dim(data.perm)[1])
		steps <- sort(unique(c(0,25,50,100,150,200,250,500,750,1000,1250,1500,1750,2000,2250,2500,2750,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000,9500,seq(from=10000,to=50000,by=1000),nperm)))
		steps <- steps[steps <= nperm]
		wcvm.mat <- c()
		for(j in 1:(length(steps)-1)){
			for(i in 1:(steps[j+1]-steps[j])){
				if (verbose){   if (((steps[j]+i) %% 100) == 0){ cat(paste(steps[j]+i," of ", steps[length(steps)], " permutations done, and counting...", sep=""), "\n") }   }
				x <- sample(1:nosamp,nosamp) + a * nosamp
				data.ran <- cbind(data.perm[, c(1:(a*nosamp))],data.perm[,x])
				wcvm.ran <- shrunken.wcvm.test.stats(reg.bounds.lambda, data.ran, nosamp, a)
				wcvm.mat <- cbind(wcvm.mat, wcvm.ran[remainders])
			}
			# compute pval bound and delete row from data set and permutation set when 0.001 < lower bound
			perm.and.obs <- cbind(wcvm.mat, wcvm.obs)
			pvals <- apply(perm.and.obs, 1, .countth)/steps[j+1]
			pbound <- sapply(pvals, .pvalbound, steps[j+1])
			index <- cbind(1:length(pbound), pbound)
			pind <- index[pbound < low.ci.thres,1]
	
			if (length(pind) > 2){ remainders <- remainders[pind] } else { pind <- c(1:length(remainders)) }

			wcvm.mat <- wcvm.mat[pind, , drop=FALSE]
			wcvm.obs <- wcvm.obs[pind]

			if (verbose){   cat(paste(steps[j]+i, "of", steps[length(steps)], " permutations done, and", length(remainders), "of", total.genes.on.chr, "genes remaining...", sep=" "), "\n")   }
		}

		# Generate list with raw p-values for all genes    
		raw.pvals <- cbind(c(1:dim(data.both)[1]), rep(1,dim(data.both)[1]))
		raw.pvals[remainders,2] <- pvals[pind]

		# Calculate BH-adjusted p-values
		adjpvals <- cbind(raw.pvals, p.adjust(raw.pvals[,2], "BH"))
		adjpvals[,2:3] <- round(adjpvals[,2:3], digits=4)

		reg.details <- NULL
		for (i in 1:dim(reg.bounds.lambda)[1]){
			reg.length <- (reg.bounds.lambda[i,2] - reg.bounds.lambda[i,1] +1)
			reg.details <- rbind(reg.details, cbind(rep(i, reg.length), matrix(rep(reg.bounds.lambda[i,], reg.length), nrow=reg.length, byrow=TRUE)))
		}
		adjpvals <- cbind(adjpvals[,1], reg.details, adjpvals[,2:3])
		colnames(adjpvals) <- c("clone.id", "reg.id", "begin.reg", "end.reg", "shrinkage", "raw.p", "adj.p")

		return(adjpvals)
	}

	.lambda.per.reg <- function(bounds, data.both, a, nosamp){
		################################################################################################
		# function that calculates the shrinkage factor for a region.
		################################################################################################

		# if the region consists of more than one clone, 
		if (bounds[1] != bounds[2]){
			# select data from region
			data.both.org <- matrix(data.both[c(bounds[1]:bounds[2]),], nrow=(bounds[2]-bounds[1]+1))
			data.both.org <- data.both.org[, c((dim(data.both)[2] - dim(data.both)[2] / 3 + 1):dim(data.both)[2])]
	        
			# calculate mean correlation, and shrinkage parameter
			slh <- cor(t(data.both.org), method="spearman") 
			lambda.final <- 1 - max(0, mean(slh[upper.tri(slh)]))
		}
	
		# if region consists of one clone, set shrinkage parameter equal to 1.
		if (bounds[1] == bounds[2]){ lambda.final <- 1 }
		return(lambda.final)
	}

	.find.cgh.reg.data <- function(data.both, a, nosamp, datacgh.org){
		################################################################################################
		# Compress the data to regions. A new regions starts when the CGH probabilities change.
		################################################################################################

		# every change in the call probabilities implies a new region
		cgh.data <- data.both[,c(1:(a*nosamp))]
    
		splitter <- list()
		splitter[[1]] <- c(1)
		index.temp <- 1
		j <- 1
		for (i in 1:(dim(cgh.data)[1]-1)){
			if (all(cgh.data[i,] == cgh.data[i+1,])){
				index.temp <- c(index.temp,i+1)
				splitter[[j]] <- index.temp
			
			} else {
				index.temp <- i+1
				j <- j + 1
				splitter[[j]] <- index.temp
			}
		}

		region.details <- NULL
		for (i in 1:length(splitter)){
			region.details <- rbind(region.details, c(min(splitter[[i]]), max(splitter[[i]])))
		}
		return(region.details)
	}

	.pvalbound <- function(pval, np){
		################################################################################################
		# p-value lower conf. bound. 3.09 = Z_{0.001}
		################################################################################################
	
		return(pval-sqrt(pval * (1-pval) / np) * 3.09)
	}

	.countth.eff <- function(statlist, threshold){
		################################################################################################
		# counts the number of values in statlist exceed threshold.
		################################################################################################
	
		return(length(statlist[statlist >= threshold]))  
	}

	.countth <- function(statlist){
		################################################################################################
		# Counts the number of values in statlist exceed threshold.
		################################################################################################

		threshold <- as.numeric(statlist[length(statlist)])
		statlist <- as.numeric(statlist[c(1:(length(statlist)-1))])
		return(length(statlist[statlist >= threshold]))
	}

	.pval.perm.marg <- function(observed, permuted, nperm){
		################################################################################################
		# Calculates the marginal p-values.
		# The p-value of gene is calculated using the null-ditribution 
		# resulting from the permutations of that gene.
		################################################################################################

		perm.and.obs <- cbind(permuted, observed)
		return(apply(perm.and.obs, 1, .countth) / nperm)
	}

	.rawps <- function(stats.obs, nulldists, nperm){
		################################################################################################
		# calculate the raw p-values using the observed test statistics
		# and their null distribution.
		################################################################################################

		pval.ln <- .pval.perm.marg(stats.obs, nulldists, nperm)
		return(pval.ln)
	}

	.wcvm.test.stats <- function(cgh.em, nosamp, a){
		################################################################################################
		# function that calculates the wcvm test statistics for one clone.
		################################################################################################
	
		# makes a matrix of call probs
		cgh.2cat <- matrix(cgh.em[c(1:(a*nosamp))], ncol=a, byrow=TRUE)
	
		# calculate contrast coefficients
		alphaas <- t(cgh.2cat) %*% cgh.2cat
		cs <- as.numeric(solve(alphaas) %*% matrix(c(-1,1), ncol=1))

		cgh.em <- cbind(cgh.2cat, cgh.em[c((a*nosamp+1):((a+1)*nosamp))])
		cgh.em <- cbind(cgh.em[order(cgh.em[,3]),], rep(1/dim(cgh.em)[1], dim(cgh.em)[1]))
		cgh.em <- cbind(cgh.em, cumsum(cgh.em[, 4]), cumsum(cgh.em[, 1] * cgh.em[, dim(cgh.em)[2]]) * cs[1], cumsum(cgh.em[, 2] * cgh.em[, dim(cgh.em)[2]]) * cs[2])

		test.stat <- -sum(cgh.em[,7] + cgh.em[,6]) / nosamp
		return(test.stat)
	}

	.wmw.test.stats <- function(cgh.em, nosamp, a){
		################################################################################################
		# function that calculates the wMW-like test statistics for one clone.
		################################################################################################

		# makes a matrix; first 2 call probs, last is expression
		cgh.em <- cbind(matrix(cgh.em[c(1:(a*nosamp))], ncol=a,byrow=TRUE), cgh.em[c((a*nosamp+1):((a+1)*nosamp))]) 

		# sort by expression
		cgh.em <- cgh.em[order(cgh.em[,(a+1)]),]  

		# shift probs 1 position, because I_{Xi < Xj}
		cgh.em <- cbind(cgh.em, rbind(rep(0,a), apply(cgh.em[,1:a], 2, cumsum)[-nosamp,])) 

		# '2' = prob. class 1, a+2 = cum. prob class 2
		test.stat <- sum(cgh.em[,a+2]*cgh.em[,2]) 
		return(test.stat)
	}

	.prob.test.stats <- function(cgh.em, nosamp, a){
		################################################################################################
		# function that calculates the wMW-like test statistics for one clone.
		################################################################################################

		# makes a matrix of call probs
		cgh.2cat <- matrix(cgh.em[c(1:(a*nosamp))], ncol=a, byrow=TRUE)
	
		# calculate contrast coefficients
		alphaas <- t(cgh.2cat) %*% cgh.2cat
		cs <- c(det(alphaas), alphaas[1,2]*sum(alphaas)/2)

		# makes a matrix; first 2 call probs, last is expression
		cgh.em <- cbind(matrix(cgh.em[c(1:(a*nosamp))], ncol=a,byrow=TRUE), cgh.em[c((a*nosamp+1):((a+1)*nosamp))]) 

		# sort by expression
		cgh.em <- cgh.em[order(cgh.em[,(a+1)]),]  

		# shift probs 1 position, because I_{Xi < Xj}
		cgh.em <- cbind(cgh.em,rbind(rep(0,a), apply(cgh.em[,1:a], 2, cumsum)[-nosamp,])) 
	
		# '2' = prob. class 1, a+2 = cum. prob class 2
		test.stat <- (sum(cgh.em[,a+2] * cgh.em[,2]) - cs[2]) / cs[1]
		return(test.stat)
	}

	.pretest <- function(alphascgh){
		################################################################################################
		# function that determines which hypothesis is tested (loss vs. no-loss or no-gain vs. gain).
		# also merges the call probabilities of the aberrated class that is not-dominant.
		################################################################################################

		alphas <- alphascgh[1:3]
		probs <- matrix(alphascgh[-(1:3)], ncol=3, byrow=TRUE)
		if (alphas[1] >= alphas[3]){
			probs2 <- cbind(probs[,1], probs[,2]+probs[,3])
			alphas2 <- c(alphas[1], alphas[2]+alphas[3])
			return(c(1, alphas2, as.vector(t(probs2))))
		} else {
			probs2 <- cbind(probs[,1]+probs[,2],probs[,3])
			alphas2 <- c(alphas[1]+alphas[2], alphas[3])
			return(c(2, alphas2, as.vector(t(probs2))))
		} 
	}

	.alphaest <- function(cgh.em, nosamp, a){
		################################################################################################
		# function that calculated the first order moments of the call probabilities.
		################################################################################################
	
		cgh.em <- matrix(cgh.em[c(1:(a*nosamp))], ncol=a,byrow=TRUE) 
		return(apply(cgh.em,2,mean))
	}

	.alphabivariate <- function(cgh.em, nosamp, a){
		################################################################################################
		# function that calculated the second order moments of the call probabilities.
		################################################################################################
	    
		cgh.em <- matrix(cgh.em[c(1:(a*nosamp))], ncol=a, byrow=TRUE) 
		cgh.em1 <- cgh.em[,1]
		cgh.em2 <- cgh.em[,2]
		return(c(1 / nosamp * (cgh.em1 %*% cgh.em1), 1 / nosamp * (cgh.em2 %*% cgh.em2), 1 / nosamp * (cgh.em1 %*% cgh.em2)))
	}

	.probs2calls <- function(problist){
		################################################################################################
		# function that converts sof calls to hard calls
		################################################################################################

		maxprobposition <- which.max(problist)
		call.list <- rep(0, length(problist))
		call.list[maxprobposition] <- 1
		return(call.list)
	}

	.shift.est <- function(row, nosamp, a, data2, alphabmat, alphanmat, minalphathr){
		################################################################################################
		# function that estimates the effect size.
		# if a gene exceeds the power unbalance threshold NA is returned.
		################################################################################################

		cgh.em <- cbind(matrix(data2[row,c(1:(a*nosamp))], ncol=a, byrow=TRUE), data2[row,c((a*nosamp+1):((a+1)*nosamp))])
		alphasbiv <- alphabmat[row,]
		alphas <- alphanmat[row,]
		c1 <- (alphasbiv[1]/alphasbiv[3] - alphas[1]/alphas[2])^(-1)
		c2 <- (alphasbiv[2]/alphasbiv[3] - alphas[2]/alphas[1])^(-1)
		if (is.na(c1) | is.na(c2)) {
			return(NA)
		} else {
			if (max(c1,c2) >= minalphathr) {
				return(NA)
			} else {
				mu1 <- 1 / nosamp * sum(cgh.em[,3] * (cgh.em[,1] * alphas[2] - alphasbiv[3]) / (alphasbiv[1] * alphas[2] - alphas[1] * alphasbiv[3]))
				mu2 <- 1 / nosamp * sum(cgh.em[,3] * (cgh.em[,2] * alphas[1] - alphasbiv[3]) / (alphasbiv[2] * alphas[1] - alphas[2] * alphasbiv[3]))
				shiftest <- mu2 - mu1
				return(shiftest)
			}
		}
	}
   
	.powerunbalance <- function(row, alphabmat){
		################################################################################################
		# function that calculates the power unbalance
		################################################################################################

		alphasbiv <- alphabmat[row,]
		unbalance <- alphasbiv[1] * alphasbiv[2] - alphasbiv[3]^2
		return(unbalance)
	}	

	.countdiscoveries <- function(powerquant, rawp_powerunbal, fdrcutoff){
		################################################################################################
		# function that converts sof calls to hard calls
		################################################################################################

		rawp_filt <- rawp_powerunbal[rawp_powerunbal[,2]>=powerquant,][,1]
		m <- length(rawp_filt)
		adpjrawp_filt <- cbind(rawp_filt, p.adjust(rawp_filt, "BH"))
		selected <- matrix(adpjrawp_filt[adpjrawp_filt[,2] <= fdrcutoff,],ncol=2)
		count <- nrow(selected) 
		false <- ifelse(count > 0, max(selected[,1])*m, 0)
		countS <- count - false
		return(countS)
	}

	.redraw <- function(col, shiftest, shiftsam, alpharow, dataexp, datacgh){
		################################################################################################
		# function that .....
		################################################################################################
	
		if(is.na(shiftest)){
			return(sample(dataexp,1)) 
		} else {  
			# this returns a re-sample from the data when there is basically only one group
			alpha <- alpharow[1]
			Ig <- sample(c(1,2), 1, prob=c(alpha,1-alpha))
			if (Ig==1){
				xib <- sample(dataexp, 1, prob=datacgh[,1]) + shiftest 
			} else {
				xib <- sample(dataexp, 1, prob=datacgh[,2])
			}
			Ig2 <- sample(c(1,2), 1, prob = c( max(0, min(1,datacgh[col,1])), max(0, min(1, 1-datacgh[col,1]))))
			if (Ig2==1){
				xib2 <- xib
			} else {
				xib2 <- xib + shiftsam
			}
			return(xib2)
		}
	}

	.redrawcol <- function(row, allest, alphanmat, shiftsampled, data2, a, nosamp){
		################################################################################################
		# function that .......
		################################################################################################
	
		shiftest <- allest[row]
		datacgh <- matrix(data2[row,c(1:(a*nosamp))],ncol=a,byrow=TRUE)
		dataexp <- data2[row,c((a*nosamp+1):((a+1)*nosamp))]
		shiftsam <- shiftsampled[row]
		alpharow <- alphanmat[row,]
		result <- sapply(1:nosamp, .redraw, shiftest=shiftest, alpharow=alpharow, shiftsam=shiftsam, dataexp=dataexp, datacgh=datacgh)
		return(result)
	}

	.pi0est <- function(data.in, null.dists.tune, test.stat="wmw", seqg, nperm, a, nosamp){
		################################################################################################
		# function that estimates the proportion of rejected null-hypothesis, i.e., the number
		# of gene whose expression is afffected by copy number changes.
		################################################################################################

		data.in2 <- data.in[seqg,]
		if(test.stat=="wmw"){ 
			test.stats <- apply(data.in2, 1, .wmw.test.stats, nosamp, a)  
		} else { 
			test.stats <- apply(data.in2, 1, .wcvm.test.stats, nosamp, a) 
		}
		rawpvals.test <- .rawps(test.stats, null.dists.tune, nperm)
		pi0 <- limma::convest(rawpvals.test)
		return(pi0)
	}

	.datareduce <- function(powerunbal, unbalthr){
		################################################################################################
		# function that selects rows that pass the power unbalance criterion
		################################################################################################
	
		datarows <- which(powerunbal >= unbalthr)
		return(datarows)
	}
	
	.nulldist.all.wcvm <- function(data.both, nosamp, a, nperm, verbose){
		################################################################################################
		# function that calculates the null distribution of the weighted CvM test statistics for tuning.
		################################################################################################
	    
		# Permutes data and calculates test statistic on permuted data
		cvm.like.mat <- NULL
		for(i in 1:nperm){
			if ((i %% 50) == 0){ if (verbose){ cat(i," of ", nperm, " permutations done, and counting...", "\n") } }
			x <- sample(1:nosamp,nosamp) + a*nosamp
			data.ran <- cbind(data.both[,c(1:(a*nosamp))], data.both[,x])
			cvm.like.ran <- apply(data.ran, 1, .wcvm.test.stats, nosamp, a)
			cvm.like.mat <- cbind(cvm.like.mat,cvm.like.ran)
		}
		return(cvm.like.mat)
	}
	
	.nulldist.all.wmw <- function(data.both, nosamp, a, nperm, verbose){
		################################################################################################
		# function that calculates the null distribution of the weighted CvM test statistics for tuning.
		################################################################################################
	    
		# Permutes data and calculates test statistic on permuted data
		wmw.ln.mat <- NULL
		cghdata <- data.both[,c(1:(a*nosamp))]
		for(i in 1:nperm){
			if ((i %% 50) == 0){ if (verbose){ cat(i," of ", nperm, " permutations done, and counting...", "\n") } }
			x <- sample(1:nosamp, nosamp) + a*nosamp
			data.ran <- cbind(cghdata, data.both[,x])
			wmw.ran <- apply(data.ran, 1, .wmw.test.stats, nosamp, a)
			wmw.ln.mat <- cbind(wmw.ln.mat, wmw.ran)
		}
		return(wmw.ln.mat)
	}

Try the sigaR package in your browser

Any scripts or data that you put into this service are public.

sigaR documentation built on April 28, 2020, 6:05 p.m.