# hetset-forward stepwise selection ===========================================
scan_hetset <- function(H, level = "univariate", min_size = 2, max_size = 10,
rel_imp = 0, em_steps = 2) {
if (is.null(H@metadata$slf)) {
H@metadata$slf <- NA
}
if (is.null(H$prt)) {
H$prt <- NA
}
if (sum(!is.na(H@metadata$slf)) == 0) {
if (mean(is.na(H$prt)) == 1) {
if (level == "univariate") {
H <- .init_meth_A_univ(H)
} else {
H <- .init_meth_A_biv(H)
}
} else {
if (level == "univariate") {
H <- .init_meth_B(H, scan_depth = 1)
} else {
H <- .init_meth_B(H, scan_depth = 2)
}
}
} else if (mean(is.na(H$prt)) == 1) {
H <- .init_meth_C(H)
} else {
H <- estimate_densities(H)
H <- calculate_dist(H)
}
H <- reassign_samples(H)
iter_max <- 10^2
iter_no <- 1
cat(paste0(":",H@metadata$slf))
while ((sum(!is.na(H@metadata$slf)) < max_size) & (iter_no < iter_max)) {
H_b <- H
H_b <- reassign_samples(H_b)
# distance to be beaten with additional feature:
ref_het <- calculate_dist(estimate_densities(H_b))@metadata$sqHell
# initialize current best choice
H_b@metadata$sqHell <- 0
# initialize candidate set
candidates <- H@NAMES[!(H@NAMES %in% H@metadata$slf)]
for (i_c in candidates) {
H_c <- H
H_c@metadata$slf <- c(H@metadata$slf, i_c)
H_c <- estimate_densities(H_c)
i.em <- 0
while (i.em < em_steps) {
H_c <- reassign_samples(H_c)
H_c <- estimate_densities(H_c)
i.em <- i.em + 1
}
H_c <- calculate_dist(H_c)
if ((H_c@metadata$sqHell > H_b@metadata$sqHell) &
(min(c(H_c@metadata$prp.A, 1 - H_c@metadata$prp.A)) > 0.05)) {
H_b <- H_c
cat(".")
}
rm(H_c)
}
rm(candidates)
if ((sum(!is.na(H@metadata$slf)) < min_size)){
cat(paste0("+",H_b@metadata$slf[length(H_b@metadata$slf)]))
H <- H_b
H <- reassign_samples(H)
} else {
if (H_b@metadata$sqHell > ref_het) {
cat(paste0("+",H_b@metadata$slf[length(H_b@metadata$slf)]))
H <- H_b
H <- reassign_samples(H)
} else {
break
}
separation <- evaluate_set(H = H,verbose = F)
low_imp <- separation[which.min(separation$rel.importance),]
if(low_imp$rel.importance < rel_imp){
ohne <- low_imp$Feature
cat(paste0("-",ohne))
H <- subset_hetset(H = H,remove.features = ohne)
iter_no <- Inf
}
iter_no <- iter_no + 1
}
}
if (sum(!is.na(H@metadata$slf))) {
H@metadata$color_type <- "subpop"
}
H
}
# Initialize Scenario A -------------------------------------------------------
.init_meth_A_univ <- function(H) {
cat(" no partitioning, no set ... initializing ... univariate scan ... \n")
H_b <- H
H_b@metadata$sqHell <- 0
H_c <- H
for (i_c in H@NAMES) {
H_c@metadata$slf <- i_c
# fit normal mixture model using mclust
# add noise for robustness of mclust
mc_fit_data <- (assays(H_c[H_c@metadata$slf, ])[[1]]
+ 0.0025 * sd(assays(H_c[H_c@metadata$slf, ])[[1]], na.rm = T)
* rnorm(n = length(assays(H_c[H_c@metadata$slf, ])[[1]])))
mc_fit <- mclust::Mclust(data = mc_fit_data, G = 2, modelNames = "V",
verbose = F)
# calculate
H_c$prt <- factor(c("A", "B")[mc_fit$classification], levels = c("A", "B"))
H_c@metadata$prm.A$mean <- mc_fit$parameters$mean[1]
H_c@metadata$prm.A$cov <- mc_fit$parameters$variance$sigmasq[1]
H_c@metadata$prm.B$mean <- mc_fit$parameters$mean[2]
H_c@metadata$prm.B$cov <- mc_fit$parameters$variance$sigmasq[2]
H_c@metadata$prp.A <- sum(H_c$prt == "A") / sum(!is.na(H_c$prt))
H_c <- calculate_dist(H_c)
if ((H_c@metadata$sqHell > H_b@metadata$sqHell) &
(min(c(H_c@metadata$prp.A, 1 - H_c@metadata$prp.A)) > 0.05)) {
H_c@metadata$prm.full$mean <- mean(assays(H[i_c, ])[[1]], na.rm = T)
H_c@metadata$prm.full$cov <- var(t(assays(H[i_c, ])[[1]]), na.rm = T)
H_b <- H_c
}
}
H <- H_b
H
}
.init_meth_A_biv <- function(H) {
cat(" no partitioning, no set ... initializing ... bivariate scan ... \n")
H_b <- H
H_b@metadata$sqHell <- 0
H_c <- H
feature_pairs <- t(combn(x = H@NAMES, m = 2))
N_combn <- dim(feature_pairs)[1]
for (i_c in 1:N_combn) {
pair <- feature_pairs[i_c, ]
H_c@metadata$slf <- pair
# fit normal mixture model using mclust
# add noise for robustness of mclust
mc_fit_data <- (t(assays(H_c[H_c@metadata$slf, ])[[1]])
+ 0.001 * sd(assays(H_c[H_c@metadata$slf, ])[[1]], na.rm = T)
* rnorm(n = prod(dim(assays(H_c[H_c@metadata$slf, ])[[1]]))))
mc_fit <- mclust::Mclust(data = mc_fit_data, G = 2, modelNames = "VVV",
verbose = F)
# calculate
H_c$prt <- factor(c("A", "B")[mc_fit$classification], levels = c("A", "B"))
H_c@metadata$prm.A$mean <- mc_fit$parameters$mean[, 1]
H_c@metadata$prm.A$cov <- mc_fit$parameters$variance$sigma[, , 1]
H_c@metadata$prm.B$mean <- mc_fit$parameters$mean[, 2]
H_c@metadata$prm.B$cov <- mc_fit$parameters$variance$sigma[, , 2]
H_c@metadata$prp.A <- sum(H_c$prt == "A") / sum(!is.na(H_c$prt))
H_c <- calculate_dist(H_c)
if ((H_c@metadata$sqHell > H_b@metadata$sqHell) &
(min(c(H_c@metadata$prp.A, 1 - H_c@metadata$prp.A)) > 0.05)) {
H_c@metadata$prm.full$mean <- rowMeans(assays(H[pair, ])[[1]], na.rm = T)
H_c@metadata$prm.full$cov <- cov(t(assays(H[pair, ])[[1]]),
use = "complete.obs")
H_b <- H_c
}
}
H <- H_b
H
}
# Initialize Scenario B -------------------------------------------------------
.init_meth_B <- function(H, scan_depth = 2) {
cat(" partitioning, no set ... initializing ... \n")
H_b <- H
H_c <- H
rm(H)
H_b@metadata$sqHell <- 0
feature_pairs <- t(combn(x = H_b@NAMES, m = scan_depth))
N_combn <- dim(feature_pairs)[1]
for (i_c in 1:N_combn) {
pair <- feature_pairs[i_c, ]
H_c@metadata$slf <- pair
# calculate
H_c <- estimate_densities(H_c)
H_c <- calculate_dist(H_c)
if (H_c@metadata$sqHell > H_b@metadata$sqHell) {
H_b <- H_c
}
}
H <- H_b
H
}
# Initialize Scenario C -------------------------------------------------------
.init_meth_C <- function(H) {
cat(" no partitioning, set of intital features ... initializing ... \n")
if (sum(!is.na(H@metadata$slf)) == 1) {
# fit normal mixture model using mclust
# add noise for robustness of mclust
mc_fit_data <- (assays(H[H@metadata$slf, ])[[1]]
+ 0.001 * sd(assays(H[H@metadata$slf, ])[[1]], na.rm = T)
* rnorm(n = length(assays(H[H@metadata$slf, ])[[1]])))
mc_fit <- mclust::Mclust(
data = mc_fit_data, G = 2, modelNames = "V",
verbose = F
)
H@metadata$prm.A$mean <- mc_fit$parameters$mean[1]
H@metadata$prm.A$cov <- mc_fit$parameters$variance$sigmasq[1]
H@metadata$prm.B$mean <- mc_fit$parameters$mean[2]
H@metadata$prm.B$cov <- mc_fit$parameters$variance$sigmasq[2]
H@metadata$prm.full$mean <- mean(mc_fit_data, na.rm = T)
H@metadata$prm.full$cov <- var(mc_fit_data, na.rm = T)
} else {
# fit normal mixture model using mclust
# add noise for robustness of mclust
mc_fit_data <- (t(assays(H[H@metadata$slf, ])[[1]])
+ 0.001 * sd(assays(H[H@metadata$slf, ])[[1]], na.rm = T)
* rnorm(n = prod(dim(assays(H[H@metadata$slf, ])[[1]]))))
mc_fit <- mclust::Mclust(
data = mc_fit_data, G = 2, modelNames = "VVV",
verbose = F
)
H@metadata$prm.A$mean <- mc_fit$parameters$mean[, 1]
H@metadata$prm.A$cov <- mc_fit$parameters$variance$sigma[, , 1]
H@metadata$prm.B$mean <- mc_fit$parameters$mean[, 2]
H@metadata$prm.B$cov <- mc_fit$parameters$variance$sigma[, , 2]
H@metadata$prm.full$mean <- colMeans(mc_fit_data, na.rm = T)
H@metadata$prm.full$cov <- cov(mc_fit_data, use = "complete.obs")
}
H$prt <- factor(c("A", "B")[mc_fit$classification], levels = c("A", "B"))
H@metadata$prp.A <- sum(H$prt == "A") / sum(!is.na(H$prt))
H <- calculate_dist(H)
H
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.