#' @useDynLib convSig
#' @importFrom Rcpp sourceCpp
NULL
#' Performs the ReLU transform on mutational count data
#'
#' @param mut_obj An object of class 'Shallowres' as produced by the function
#' \link[=mut_count]{mut_count()}
#'
#' @param five A boolean that specifies
#' whether you want to apply a transformation based on a 5-nucleotide
#' convolution window.
#'
#' @param K An integer that indicates the number
#' of mutational processes you want to detect in your mutational count data via
#' the transformation.
#'
#' @return A Feature matrix (\code{feat}), which contains the convolution weights
#' associated with each mutational processes. An M matrix (\code{mat}), which
#' contains the probability for all mutation types. A P matrix (\code{P}), which
#' contains the mutational intensity/activity of each mutational process. A LOSS
#' variable (\code{LOSS}), which displays the LOSS value achieved by the ReLU optimisation.
#' A testing LOSS variable (\code{test_LOSS}), which displays the LOSS value achieved
#' on the testing samples.
#'
#' @examples
#' relu_res <- relu_transform(EMu_prepped, five = TRUE, K = 6)
#'
#' @export
relu_transform <- function(mut_obj, five = FALSE, K = 5, iter_num = 5000) {
if (K == 0) {
stop("Your specified number of mutational processes cannot be zero")
} else if (K > 80) {
stop("Your specified number of mutational processes cannot exceed 80")
}
if (!("Shallowres" %in% class(mut_obj))) {
stop("Your specified mut_obj is not of the right type")
}
if (five == FALSE) {
numbase = 3
mid = 2
} else if (five == TRUE) {
numbase = 5
mid = 3
}
mut_path <- mut_obj@mut_mat
bg_path <- mut_obj@wt
X <- mutation_inputprocess(mut_path, numbase)
bg <- background_inputprocess(bg_path, numbase)
S <- dim(X)[1]
N <- dim(X)[2]
X_test <- test_splitX(X, S, N)
bg_test <- test_splitbg(bg, numbase)
Z <- (S, N, K)
P <- array(runif(S*K), dim = c(S, K))
poss = (numbase * 4) - 2
feat <- matrix(runif(poss*K), nrow = poss, ncol = K)
conv <- conv_create(N, K, numbase, feat)
theta_array = array(runif(N*K), dim = c(N, K))
theta_array <- sweep(theta_array,MARGIN=c(2),colSums(theta_array),`/`)
beta_array = X + 10^(-4)
mat <- array(runif(2*3*K), dim = c(2, 3, K))
summed_mat <- three_colsum(mat, 2)
mat <- sweep(mat,MARGIN=c(1,3),summed_mat, `/`)
type <- fragbase_indexer(numbase, N)
T <- tencode(numbase, N)
feat_o <- linreg_solver(T, theta_array)
reg_res <- regularizer(X, bg, conv, theta_array, P, mat, N, S, K, type,
mid, beta_array, Z,
feat_o@Multi_F, feat_o@feat, numbase,
bg_test, X_test, T, iter_num)
#cat("Please ignore a potential warning about -Inf.\n")
#cat("This is normal and expected, and is handled internally.\n")
invisible(reg_res)
}
# Loop function, which increments lambda (burden of the loss function) iteratively
regularizer <- function(X, bg, conv, theta, P, mat, N, S, K,
type, mid, beta_ar, Z, Multi_F, feat, numbase,
bg_test, X_test, T, iter_num) {
for (r in 1:6) {
count_accu = 0
reg = (10^(r - 1)) - 1
cat("Computing using lambda value: ")
cat(reg)
cat("\n")
cat("Optimising/Iterating on the loss function (may take a while)... \n")
LOSS <- init_LOSS(bg, theta, P)
for (i in 1:2) {
#
inter_theta <- theta[unlist(type[[mid]][i]),]
theta_dim <- dim(inter_theta)
inter_theta <- array(inter_theta, dim = c(theta_dim[1],1,K))
inter_P <- array(P, dim = c(S,1,1,K))
inter_P <- four_recycle(inter_P, 2, theta_dim[1])
inter_mat <- array(mat[i,,], dim = c(1,3,K))
inter_mat <- three_recycle(inter_mat, 1, theta_dim[1])
temp <- sweep(inter_P,MARGIN=c(2,3,4),inter_theta, `*`)
temp <- four_recycle(temp, 3, 3)
temp <- sweep(temp,MARGIN=c(2,3,4),inter_mat, `*`)
temp <- four_colsum(temp, 4)
#
inter_bg <- array(bg[unlist(type[[mid]][i])], dim = c(1,theta_dim[1],1))
inter_bg <- three_recycle(inter_bg, 3, 3)
inter_bg <- three_recycle(inter_bg, 1, S)
temp <- log(sweep(temp,MARGIN=c(1,2,3),inter_bg, `*`))
LOSS = LOSS - sum(sweep(temp,MARGIN=c(1,2,3),X[,unlist(type[[mid]][i]),], `*`))
}
LOSS = 0.5 * reg * sum((theta - conv)^2)
old_LOSS = LOSS + 1
new_LOSS = LOSS
while(abs(new_LOSS - old_LOSS) > (1.0/(2*r+1))) {
#cat("Optimising on the loss function \n")
count_accu = count_accu + 1
if (count_accu > iter_num) {
cat("Reached the limit of iteration numbers")
cat("\n")
cat("The transformation operation has not fully converged")
cat("\n")
break
}
for (i in 1:K) {
C = reg * conv[,i] - bg * sum(P[,i])
temp2 <- array(0, dim = c(S, N, 3))
for (j in 1:2) {
inter_theta <- theta[unlist(type[[mid]][j]),]
theta_dim <- dim(inter_theta)
inter_theta <- array(inter_theta, dim = c(theta_dim[1],1,K))
inter_P <- array(P, dim = c(S,1,1,K))
inter_P <- four_recycle(inter_P, 2, theta_dim[1])
inter_mat <- array(mat[j,,], dim = c(1,3,K))
inter_mat <- three_recycle(inter_mat, 1, theta_dim[1])
temp <- sweep(inter_P,MARGIN=c(2,3,4),inter_theta, `*`)
temp <- four_recycle(temp, 3, 3)
temp <- sweep(temp,MARGIN=c(2,3,4),inter_mat, `*`)
temp <- four_colsum(temp, 4)
temp2[,unlist(type[[mid]][j]),] <- temp
}
alpha_array <- temp2
#r
for (j in 1:2) {
inter_theta <- theta[unlist(type[[mid]][j]),i]
theta_len <- length(inter_theta)
inter_theta <- array(inter_theta, dim = c(theta_len,1))
inter_P <- array(P[,i], dim = c(S,1,1))
inter_P <- three_recycle(inter_P, 2, theta_len)
inter_mat <- array(mat[j,,i], dim = c(3))
temp <- sweep(inter_P,MARGIN=c(2,3),inter_theta, `*`)
temp <- three_recycle(temp, 3, 3)
temp <- sweep(temp,MARGIN=c(3),inter_mat, `*`)
inter_P <- array(P[,i], dim = c(S,1,1))
inter_P <- three_recycle(inter_P, 3, 3)
temp2 <- sweep(inter_P,MARGIN=c(3),inter_mat, `*`)
alpha_array[,unlist(type[[mid]][j]),] =
alpha_array[,unlist(type[[mid]][j]),] - temp
alpha_array[,unlist(type[[mid]][j]),] =
alpha_array[,unlist(type[[mid]][j]),] / three_recycle(temp2, 2, theta_len)
}
# sum(0,2)
inter_upper = three_colsum((beta_ar / alpha_array), 3)
upper = colSums(inter_upper, 1)
upper = upper + C
v_max = max(upper)
x_max_max = array(0, dim = c(N))
x_max_min = array(0, dim = c(N))
inter_upper = three_colsum((beta_ar / (alpha_array + 1)), 3)
upper2 = colSums(inter_upper, 1) - reg + C
v_min = max(upper2)
x_min_max = array(1, dim = c(N))
x_min_min = array(0, dim = c(N))
x_min_min[which(upper2 == max(upper2))] = 1
while ((v_max - v_min) > 0.0001) {
v_new = (1 / 2) * (v_max + v_min)
x_new_max = (1 / 2) * (x_min_max + x_max_max)
if (!is.finite(v_max)) {
v_new = 2 * abs(v_min)
x_new_max = x_min_max
}
x_new_max[v_new > upper] = 0
x_new_min = x_max_min
x_new_min[v_new > upper] = 0
while ((sum(x_new_min) - 1) * (sum(x_new_max) - 1) < 0) {
x_new = (1 / 2) * (x_new_min + x_new_max)
#inter <- three_colsum((beta_ar / sweep(alpha_array,
#MARGIN=c(2),x_new, `+`)), 3)
inter <- array(rowSums((beta_ar /
sweep(alpha_array,MARGIN=c(2),x_new, `+`)),
dims = 2), dim = c(S,N))
b_array = (array(colSums(inter, 1), dim = N) - reg * x_new + C) > v_new
x_new_min[b_array] = x_new[b_array]
x_new_max[!b_array] = x_new[!b_array]
}
if (sum(x_new_min) - 1 >= 0 & sum(x_new_max) - 1 >= 0) {
v_min = v_new
x_min_min = x_new_min
x_min_max = x_new_max
} else {
v_max = v_new
x_max_min = x_new_min
x_max_max = x_new_max
}
}
v_new = (1 / 2) * (v_max + v_min)
x_new_max = (1 / 2) * (x_min_max + x_max_max)
x_new_max[v_new > upper] = 0
x_new_min = x_max_min
x_new_min[v_new > upper] = 0
while (sum((x_new_min - x_new_max)^2) > 0.001) {
x_new = 1 / 2 * (x_new_min + x_new_max)
#inter <- three_colsum((beta_ar / sweep(alpha_array,
#MARGIN=c(2),x_new, `+`)), 3)
inter <- array(rowSums((beta_ar /
sweep(alpha_array,MARGIN=c(2),x_new, `+`)),
dims = 2), dim = c(S,N))
b_array = (array(colSums(inter, 1), dim = N) - reg * x_new + C) > v_new
x_new_min[b_array] = x_new[b_array]
x_new_max[!b_array] = x_new[!b_array]
}
theta[,i] = (1 / 2) * (x_new_max + x_new_min)
}
theta = theta / colSums(theta)
for (i in 1:2) {
inter_theta <- theta[unlist(type[[mid]][i]),]
theta_dim <- dim(inter_theta)
inter_theta <- array(inter_theta, dim = c(theta_dim[1],1,K))
inter_P <- array(P, dim = c(S,1,1,K))
inter_P <- four_recycle(inter_P, 2, theta_dim[1])
inter_mat <- array(mat[i,,], dim = c(1,3,K))
inter_mat <- three_recycle(inter_mat, 1, theta_dim[1])
temp <- sweep(inter_P,MARGIN=c(2,3,4),inter_theta, `*`)
temp <- four_recycle(temp, 3, 3)
temp <- sweep(temp,MARGIN=c(2,3,4),inter_mat, `*`)
Z[,unlist(type[[mid]][i]),,] = temp
}
Z = Z + (1e-4 * sum(Z) / (K*N*3*S))
Z = sweep(Z,MARGIN=c(1,2,3),four_colsum(Z, 4), `/`)
inter_X = list_indexer(X, type, mid, N, K)
inter_Z = list_indexer(Z, type, mid, N, K, X = FALSE)
inter_mat = five_colsum(sweep(inter_Z,MARGIN=c(1,2,3,4),inter_X, `*`), 3)
mat = array((colSums(inter_mat) + 0.01), dim = c(2,3,K))
summed_mat <- three_colsum(mat, 2)
mat <- sweep(mat,MARGIN=c(1,3),summed_mat, `/`)
inter_P = sweep(Z,MARGIN=c(1,2,3),X, `*`)
inter_P = four_colsum(inter_P, 2)
inter_P = array(four_colsum(inter_P, 3), dim = c(S, K))
#P = inter_P / colSums(sweep(theta,MARGIN=c(1),bg, `*`))
P = sweep(inter_P,MARGIN=c(2),colSums(sweep(theta,MARGIN=c(1),bg, `*`)), `/`)
F_gradient = array(1, dim = c(N, K))
alpha = 1
while (sum((alpha * F_gradient)^2) > 0.01) {
F_gradient = (theta - conv) * sign(conv)
F_gradient = Multi_F %*% F_gradient
conv_change = T %*% F_gradient
temp = T %*% feat / conv_change
if (is.finite(max(temp[temp<0]))) {
alpha = -(max(temp[temp<0]))
} else {
alpha = 1
}
alpha = min(1, alpha * 1.0001)
feat = feat + (alpha * F_gradient)
conv = conv_create(N, K, numbase, feat)
}
LOSS = sum(sweep(P,MARGIN=c(2),colSums(sweep(theta,MARGIN=c(1),bg, `*`)), `*`))
for (i in 1:2) {
inter_theta <- theta[unlist(type[[mid]][i]),]
theta_dim <- dim(inter_theta)
inter_theta <- array(inter_theta, dim = c(theta_dim[1],1,K))
inter_P <- array(P, dim = c(S,1,1,K))
inter_P <- four_recycle(inter_P, 2, theta_dim[1])
inter_mat <- array(mat[i,,], dim = c(1,3,K))
inter_mat <- three_recycle(inter_mat, 1, theta_dim[1])
temp <- sweep(inter_P,MARGIN=c(2,3,4),inter_theta, `*`)
temp <- four_recycle(temp, 3, 3)
temp <- sweep(temp,MARGIN=c(2,3,4),inter_mat, `*`)
inter_bg <- array(bg[unlist(type[[mid]][i])],
dim = c(1, length(unlist(type[[mid]][i])), 1))
inter_bg <- three_recycle(inter_bg, 1, S)
inter_bg <- three_recycle(inter_bg, 3, 3)
temp <- array(log(sweep(four_colsum(temp, 4),MARGIN=c(1,2,3),inter_bg, `*`)),
dim = c(S, length(unlist(type[[mid]][i])), 3))
temp[is.infinite(temp)] <- NA
LOSS = LOSS - sum(temp * X[,unlist(type[[mid]][i]),], na.rm = TRUE)
}
LOSS = LOSS + 0.5 * reg * sum((theta - conv)^2)
old_LOSS = new_LOSS
new_LOSS = LOSS
}
}
test_LOSS = sum(sweep(P,MARGIN=c(2),colSums(sweep(theta,MARGIN=c(1),
bg_test, `*`)), `*` ))
for (i in 1:2) {
inter_theta <- theta[unlist(type[[mid]][i]),]
theta_dim <- dim(inter_theta)
inter_theta <- array(inter_theta, dim = c(theta_dim[1],1,K))
inter_P <- array(P, dim = c(S,1,1,K))
inter_P <- four_recycle(inter_P, 2, theta_dim[1])
inter_mat <- array(mat[i,,], dim = c(1,3,K))
inter_mat <- three_recycle(inter_mat, 1, theta_dim[1])
temp <- sweep(inter_P,MARGIN=c(2,3,4),inter_theta, `*`)
temp <- four_recycle(temp, 3, 3)
temp <- sweep(temp,MARGIN=c(2,3,4),inter_mat, `*`)
inter_bg <- array(bg_test[unlist(type[[mid]][i])],
dim = c(1, length(unlist(type[[mid]][i])), 1))
inter_bg <- three_recycle(inter_bg, 1, S)
inter_bg <- three_recycle(inter_bg, 3, 3)
temp <- array(log(sweep(four_colsum(temp, 4),MARGIN=c(1,2,3),inter_bg, `*`)),
dim = c(S, length(unlist(type[[mid]][i])), 3))
temp[is.infinite(temp)] <- NA
test_LOSS = test_LOSS - sum(temp * X_test[,unlist(type[[mid]][i]),], na.rm = TRUE)
}
conv_all <- reluexp_conv_all(bg, mat, N, K, type, numbase, feat, mid)
relu_o <- new("relu_obj", feat = feat, mat = mat, P = P,
LOSS = LOSS, test_LOSS = test_LOSS, conv = conv_all)
return(relu_o)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.