R/pri_target_maxdES.R

Defines functions Find_Target_Max_dES .Diff_by_Remove .Calc_NES_bkgd

Documented in .Calc_NES_bkgd .Diff_by_Remove Find_Target_Max_dES

#' Calculating background ES distribution for normalizing dES
#' 
#' @param gs A string vector for set of targets of interests. Could be an element from output of GetLE()
#' @param rk A numeric vector for all mRNAs ranked by correlation
#' @param direction A character for the direction of enrichment 
#' @param n.iter An integer for the number of background to generate. Default is 500
#' @param alpha A numeric value for the gsea param
#' @return A numeric vector of background normalized ES distribution
#' 
.Calc_NES_bkgd <- function(gs, rk, direction, n.iter, alpha) {

	if (direction == "Negative") {
		perm.es <- unlist(lapply(seq_len(n.iter), function(x) max(.CalcGseaStat(sample(rk), gs, alpha))))
	} else {
		perm.es <- unlist(lapply(seq_len(n.iter), function(x) min(.CalcGseaStat(sample(rk), gs, alpha))))
	}

	bkgd <- mean(perm.es)
	return(bkgd)

}

#' Remove each gene in the gs and see what is the changes in the dES
#' 
#' @param gs A character vector for a set of targets. One element from output of GetLE().
#' @param main.rk A numeric vector for all mRNAs ranked by correlation in the group of interest
#' @param ref.rk A numeric vector for all mRNAs ranked by correlation in the group to compare with
#' @param direction A character for the direction of enrichment 
#' @param n.iter An integer for the number of background to generate. Default is 500
#' @param alpha A numeric value for the gsea param
#' @return A numberic vector of the dES for removing each gene in gs
#' 
.Diff_by_Remove <- function(gs, main.rk, ref.rk, direction, n.iter, alpha) {

	if (direction == "Negative") {

		main.bkgd <- .Calc_NES_bkgd(gs[-1], main.rk, direction=direction, n.iter=n.iter, alpha=alpha)
		main.nes <- unlist(lapply(gs, function(x) max(.CalcGseaStat(main.rk, gs[gs != x], alpha)) / main.bkgd))

		ref.bkgd <- .Calc_NES_bkgd(gs[-1], ref.rk, direction=direction, n.iter=n.iter, alpha=alpha)
		ref.nes <- unlist(lapply(gs, function(x) max(.CalcGseaStat(ref.rk, gs[gs != x], alpha)) / ref.bkgd))

		remove.des <- main.nes - ref.nes

	} else {

		main.bkgd <- .Calc_NES_bkgd(gs[-1], main.rk, direction=direction, n.iter=n.iter, alpha=alpha)
		main.nes <- unlist(lapply(gs, function(x) min(.CalcGseaStat(main.rk, gs[gs != x], alpha)) / main.bkgd))

		ref.bkgd <- .Calc_NES_bkgd(gs[-1], ref.rk, direction=direction, n.iter=n.iter, alpha=alpha)
		ref.nes <- unlist(lapply(gs, function(x) min(.CalcGseaStat(ref.rk, gs[gs != x], alpha)) / ref.bkgd))

		remove.des <- ref.nes - main.nes
	}

	return(remove.des)

}

#' Do backward random search to find set of targets that maximize the normalized dES
#' 
#' @param gs A character vector for a set of targets. One element from output of GetLE().
#' @param cor.list A matrix from Run_LINE()
#' @param main A character for the name of the group to compared to. 
#' @param ref A character for the name of the group to compared to. 
#' @param miR A character for the name of the miRNA to bet tested.
#' @param direction A character for the direction of enrichment 
#' @param min.gs.size An integer indicating the minimum number of genes to used. Default is 20.
#' @param n.iter An integer indicating the number of background to generate for normlizing ES. Default is 500
#' @param alpha A numeric variable for gsea parameter
#' @return A character vector of target names
#' 
#' @import progress
#' @export
Find_Target_Max_dES <- function(gs, cor.list, main, ref, miR, 
								direction=c("Negative", "Positive"), 
								min.gs.size=20, n.iter=500, alpha=1) {

	# Check if main and ref are in cor.list
	if(!main %in% names(cor.list) | !ref %in% names(cor.list)) {
		stop("The groups are not in cor.list.\n")
	}
	
	# Check if direction is correct
	if (!length(direction) == 1) {
		stop("There should be only one 'direction'.\n")
	}

	if (!direction %in% c("Negative", "Positive")) {
		stop("'direction' must be either Negative or Positive.\n")
	}

	# Check if the gs is in right format; if not, will use the names()
	if(!is.null(names(gs))) {
		gs <- names(gs)
	}

	# Check if the miR exists
	if(is.null(miR)) {
		stop("Must provide a miRNA to work.\n")
	}

	# Check if the min.gs.size has already been reached
	if (length(gs) <= min.gs.size) {
		warning("The min.gs.size has already been reached. No maximization done.\n")
		return(gs)
	} else {

		# Format gs and cor.list
		rank.list <- lapply(cor.list, function(x) sort(x[,miR]))

		ws.list <- lapply(rank.list, function(x) .CalcGseaStat(x, gs, alpha))

		# Calculate NES and dES from the full gs
		if (direction == "Negative") {
			es <- unlist(lapply(ws.list, max))
			
			main.nes <- es[main] / .Calc_NES_bkgd(gs, rank.list[[main]], direction=direction, n.iter=n.iter, alpha=alpha)
			ref.nes <- es[ref] / .Calc_NES_bkgd(gs, rank.list[[ref]], direction=direction, n.iter=n.iter, alpha=alpha)

			baseline.des <- main.nes - ref.nes
		} else {
			es <- unlist(lapply(ws.list, min))
			
			main.nes <- es[main] / .Calc_NES_bkgd(gs, rank.list[[main]], direction=direction, n.iter=n.iter, alpha=alpha)
			ref.nes <- es[ref] / .Calc_NES_bkgd(gs, rank.list[[ref]], direction=direction, n.iter=n.iter, alpha=alpha)

			baseline.des <- ref.nes - main.nes
		}

		# Start doing random backward search
		genes.des <- c()
		current.gs <- gs

		pb <- progress_bar$new(total = length(current.gs))
		for (i in seq_len(length(gs) - min.gs.size)) {
			
			pb$tick()

			# Start from all, Calculate new remove.des
			new.es.res <- .Diff_by_Remove(gs=current.gs, main.rk=rank.list[[main]], ref.rk=rank.list[[ref]], direction=direction, n.iter=n.iter, alpha=alpha)
			
			# Decide which one to remove
			this.remove <- current.gs[which(new.es.res == max(new.es.res))]

			# Pick a ranodm one if ties appear
			this.remove <- ifelse(length(this.remove) > 1, this.remove[sample(1:length(this.remove), 1)], this.remove)

			# Update gene set by deleting the target that gives the largest change
			current.gs <- current.gs[current.gs != this.remove]
			
			# Update dES score
			new.des <- max(new.es.res)

			# Record the gene removed and the score at the current step
			genes.des <- setNames(c(genes.des, new.des), c(names(genes.des), this.remove))

			# print(paste("Iteration ", i, " removes ", this.remove, " new dES=", new.des, sep=""))

			# Once we reach the desired number of genes, add the remainig genes to the genes.des and exit loop
			if (length(current.gs) <= min.gs.size) {
				print("Reached minimum gs size.")
				remaining.des <- setNames(rep(0, length(current.gs)), current.gs)
				genes.des <- c(genes.des, remaining.des)
				break
			}

			Sys.sleep(1 / 100)

		}

		final.gs <- names(genes.des[(which(genes.des == max(genes.des))+1):length(genes.des)])

		# Compare with the full one to see if we actually have increase
		if (max(genes.des) <= baseline.des) {
			warning("Can't find better gs for increasing dES. Full one should be use.")
			final.gs <- gs
		}
		return(final.gs)
	}
}
ningb/diffreg documentation built on Jan. 20, 2025, 4:10 p.m.