R/utility.R

Defines functions vec_to_array indicator initialize_Delta initialize_Mu_Sigma initialize_Sigma initialize_Mu initialize_Beta check_K lastInd LogSumExp cal_loglik compute_res_r

####################utility functions for the EM algorithm of LUCID in parallel####################

#individual inclusion prob for each LC of each layer
compute_res_r <- function(r, N, layer) {
  r_margin <- t(sapply(1:N, function(j) {
    marginSums(lastInd(r,j), margin = layer)
  }))
  return(r_margin)
}


cal_loglik <- function(Estep_array, Estep_r) {
  return(sum(Estep_array * Estep_r))
}

LogSumExp <- function(vec) {
  max_vec <- max(vec)
  trick <- max_vec + log(sum(exp(vec - max_vec)))
  return(trick)
}



lastInd <- function(x, n){
  d <- dim(x)
  d.new <- d[-length(d)]
  block.size <- prod(d.new)
  res <- x[(block.size * (n - 1) + 1):(block.size * n)]
  array(res, dim = d.new)
}



check_K <- function(K) {
  for(x in K) {
    if(x != as.integer(x)) {
      stop("K should be a vector of integer")
    }
  }
  if(min(K) < 2) {
    stop("each element in K should be greater or equal than 2")
  }
}




initialize_Beta <- function(K, nG) {
  nOmics <- length(K)
  Beta <- vector(mode = "list", length = nOmics)
  for(i in 1:nOmics) {
    # row represent cluster, column represents variable
    Beta[[i]] <- matrix(runif((nG + 1) * (K[i] - 1), min = -1, max = 1),
                        nrow = K[i] - 1)
  }
  return(Beta)
}


initialize_Mu <- function(K, nZ) {
  nOmics <- length(K)
  Mu <- vector(mode = "list", length = nOmics)
  for(i in 1:nOmics) {
    # For parallel model: K clusters x M features
    Mu[[i]] <- matrix(runif(K[i] * nZ[i], min = -1, max = 1),
                      nrow = K[i],  # K clusters in rows
                      ncol = nZ[i]) # M features in columns
  }
  return(Mu)
}



initialize_Sigma <- function(K, nZ) {
  nOmics <- length(K)
  Sigma <- vector(mode = "list", length = nOmics)
  for(i in 1:nOmics) {
    # each element is an array of nZ[i] x nZ[i] x K[i]
    Sigma[[i]] <- array(0, dim = c(nZ[i], nZ[i], K[i]))
    for(j in 1:K[i]) {
      Sigma[[i]][, , j] <- diag(nZ[i])
    }
  }
  return(Sigma)
}





initialize_Mu_Sigma <- function(K, Z, modelNames, na_pattern) {
  nOmics <- length(K)
  Mu <- vector(mode = "list", length = nOmics)
  Sigma <- vector(mode = "list", length = nOmics)
  z <- vector(mode = "list", length = nOmics)
  for(i in 1:nOmics) {
    # Exclude all-missing rows for layer-specific GMM fit.
    idx_obs <- na_pattern[[i]]$indicator_na != 3
    temp_fit <- Mclust(data = Z[[i]][idx_obs, ],
                       G = K[i],
                       modelNames = modelNames[i])
    # Transpose mean matrix for parallel model - means should be K x M
    Mu[[i]] <- t(temp_fit$parameters$mean)  # Transpose to get K x M
    Sigma[[i]] <- temp_fit$parameters$variance$sigma
    # Expand initial responsibilities back to full N rows so downstream
    # multinomial regressions align with G.
    z_full <- matrix(1 / K[i], nrow = nrow(Z[[i]]), ncol = K[i])
    z_full[idx_obs, ] <- temp_fit$z
    z[[i]] <- z_full
  }
  return(list(Mu = Mu,
              Sigma = Sigma,
              z = z))
}



initialize_Delta <- function(K, CoY, family = c("normal", "binary"),
                             z, Y) {
  family <- to_parallel_family(family)
  
  # Helper function for stable GLM fitting
  safe_glm_fit <- function(formula, data, family = "binomial", maxit = 100, max_attempts = 3) {
    for (attempt in 1:max_attempts) {
      # Try with progressively more robust settings
      control_settings <- switch(attempt,
        list(maxit = maxit),  # First attempt: standard settings
        list(maxit = maxit * 2, epsilon = 1e-8),  # Second attempt: increased iterations
        list(maxit = maxit * 3, epsilon = 1e-10)  # Third attempt: maximum robustness
      )
      
      fit <- try(glm(formula, data = data, family = family, 
                     control = control_settings,
                     start = if(attempt > 1) rep(0, length(all.vars(formula)) - 1) else NULL), 
                 silent = TRUE)
      
      if (!inherits(fit, "try-error") && fit$converged) {
        return(fit)
      }
    }
    
    # If all attempts failed, return a simple intercept-only model
    warning("GLM fitting failed after ", max_attempts, " attempts. Using intercept-only model.")
    return(glm(Y ~ 1, data = data, family = family))
  }

  if(family == "gaussian") {

    # if 2 omics layers
    if(length(K) == 2) {
      r_matrix <- cbind(z[[1]], z[[2]])
      r_fit <- r_matrix[, -c(1, K[1] + 1)]

      if(is.null(CoY)) {
        fit <- lm(Y ~ r_fit)
        mu <- as.numeric(coef(fit))
        sd <- sd(resid(fit))
        x <- list(mu = mu,
                  sd = sd,
                  K = K)
      }else{
        Set0 <- as.data.frame(cbind(Y, r_fit))
        Set0 <- cbind(Set0, CoY)
        colnames(Set0) <- c("Y", paste0("LC", 1:ncol(r_fit)), colnames(CoY))
        fit <- glm(as.formula(paste("Y~", paste(colnames(Set0)[-1], collapse = "+"))), data = Set0, family = gaussian)

        beta_f <- coef(fit)
        mu <- as.numeric(beta_f)
        sd <- sd(resid(fit))
        x <- list(mu = mu,
                  sd = sd,
                  K = K)
      }}

    # if 3 omics layers
    if(length(K) == 3) {
      r_matrix <- cbind(z[[1]], z[[2]], z[[3]])
      r_fit <- r_matrix[, -c(1, K[1] + 1, K[1] + K[2] + 1)]

      if(is.null(CoY)) {
        fit <- lm(Y ~ r_fit)
        mu <- as.numeric(coef(fit))
        sd <- sd(resid(fit))
        x <- list(mu = mu,
                  sd = sd,
                  K = K)
      }else{
        Set0 <- as.data.frame(cbind(Y, r_fit))
        Set0 <- cbind(Set0, CoY)
        colnames(Set0) <- c("Y", paste0("LC", 1:ncol(r_fit)), colnames(CoY))
        fit <- glm(as.formula(paste("Y~", paste(colnames(Set0)[-1], collapse = "+"))), data = Set0, family = gaussian)

        beta_f <- coef(fit)
        mu <- as.numeric(beta_f)
        sd <- sd(resid(fit))
        x <- list(mu = mu,
                  sd = sd,
                  K = K)
      }}

    # if 4 omics layers
    if(length(K) == 4) {
      r_matrix <- cbind(z[[1]], z[[2]], z[[3]], z[[4]])
      r_fit <- r_matrix[, -c(1, K[1] + 1, K[1] + K[2] + 1, K[1] + K[2] + K[3] + 1)]

      if(is.null(CoY)) {
        fit <- lm(Y ~ r_fit)
        mu <- as.numeric(coef(fit))
        sd <- sd(resid(fit))
        x <- list(mu = mu,
                  sd = sd,
                  K = K)
      }else{
        fit <- lm(Y ~ r_fit + CoY)
        beta_f <- summary(fit)$coefficients[, 1]
        mu <- as.numeric(beta_f)
        sd <- sd(resid(fit))
        x <- list(mu = mu,
                  sd = sd,
                  K = K)
      }}

    # if 5 omics layers
    if(length(K) == 5) {
      r_matrix <- cbind(z[[1]], z[[2]], z[[3]], z[[4]], z[[5]])
      r_fit <- r_matrix[, -c(1, K[1] + 1, K[1] + K[2] + 1, K[1] + K[2] + K[3] + 1, K[1] + K[2] + K[3] + K[4] + 1)]

      if(is.null(CoY)) {
        fit <- lm(Y ~ r_fit)
        mu <- as.numeric(coef(fit))
        sd <- sd(resid(fit))
        x <- list(mu = mu,
                  sd = sd,
                  K = K)
      }else{
        fit <- lm(Y ~ r_fit + CoY)
        beta_f <- summary(fit)$coefficients[, 1]
        mu <- as.numeric(beta_f)
        sd <- sd(resid(fit))
        x <- list(mu = mu,
                  sd = sd,
                  K = K)
      }}

  }


  if(family == "binomial") {


    # if 2 omics layers
    if(length(K) == 2) {
      r_matrix <- cbind(z[[1]], z[[2]])
      r_fit <- r_matrix[, -c(1, K[1] + 1)]

      if(is.null(CoY)) {
        Set0 <- data.frame(Y = Y, r_fit)
        fit <- safe_glm_fit(Y ~ ., data = Set0, family = "binomial")
        b <- as.numeric(coef(fit))
      } else {
        Set0 <- data.frame(Y = Y, r_fit = r_fit, CoY)
        formula <- as.formula(paste("Y ~", paste(colnames(Set0)[-1], collapse = "+")))
        fit <- safe_glm_fit(formula, data = Set0, family = "binomial")
        b <- as.numeric(coef(fit))
      }

      b_array <- vec_to_array(K = K, mu = b)
      p <- 1 / (1 + exp(-b_array))
      # Ensure probabilities are numerically stable
      p[p < 1e-10] <- 1e-10
      p[p > 1 - 1e-10] <- 1 - 1e-10
      x <- list(mu = p, K = K)
    }


    # if 3 omics layers
    if(length(K) == 3) {
      r_matrix <- cbind(z[[1]], z[[2]], z[[3]])
      r_fit <- r_matrix[, -c(1, K[1] + 1, K[1] + K[2] + 1)]

      if(is.null(CoY)) {
        fit <- glm(Y ~ r_fit, family = "binomial")
        b <- as.numeric(coef(fit))
      }else{
        Set0 <- as.data.frame(cbind(Y, r_fit, CoY))
        colnames(Set0) <- c("Y", paste0("LC", 1:ncol(r_fit)), colnames(CoY))
        fit <- glm(as.formula(paste("Y~", paste(colnames(Set0)[-1], collapse = "+"))), data = Set0, family ="binomial")
        b <- as.numeric(coef(fit))
      }

      b_array <- vec_to_array(K = K, mu = b)
      p <- 1 / (1 + exp(-b_array))
      x <- list(mu = p,
                K = K)
    }

    # if 4 omics layers
    if(length(K) == 4) {
      r_matrix <- cbind(z[[1]], z[[2]], z[[3]], z[[4]])
      r_fit <- r_matrix[, -c(1, K[1] + 1, K[1] + K[2] + 1, K[1] + K[2] + K[3] + 1)]

      if(is.null(CoY)) {
        fit <- glm(Y ~ r_fit, family = "binomial")
        b <- as.numeric(coef(fit))
      }else{
        fit <- glm(Y ~ r_fit + CoY, family = "binomial")
        b <- as.numeric(coef(fit))
      }

      b_array <- vec_to_array(K = K, mu = b)
      p <- 1 / (1 + exp(-b_array))
      x <- list(mu = p,
                K = K)
    }

    # if 5 omics layers
    if(length(K) == 5) {
      r_matrix <- cbind(z[[1]], z[[2]], z[[3]], z[[4]], z[[5]])
      r_fit <- r_matrix[, -c(1, K[1] + 1, K[1] + K[2] + 1, K[1] + K[2] + K[3] + 1, K[1] + K[2] + K[3] + K[4] + 1)]

      if(is.null(CoY)) {
        fit <- glm(Y ~ r_fit, family = "binomial")
        b <- as.numeric(coef(fit))
      }else{
        fit <- glm(Y ~ r_fit + CoY, family = "binomial")
        b <- as.numeric(coef(fit))
      }

      b_array <- vec_to_array(K = K, mu = b)
      p <- 1 / (1 + exp(-b_array))
      x <- list(mu = p,
                K = K)
    }


  }
  return(x)
}




indicator <- function(x) {
  m <- 0
  if(x > 1) {
    m <- 1
  }
  return(m)
}


vec_to_array <- function(K, mu) {
  res <- array(data = rep(0, prod(K)),
               dim = K)

  # if nK = 2, transform mu to a matrix
  if(length(K) == 2) {
    for(i in 1:K[1]) {
      for(j in 1:K[2]) {
        res[i, j] <- mu[1] + indicator(i) * mu[i] + indicator(j) * mu[K[1] + j - 1]
      }
    }
  }

  # if nK = 3, transform mu to a 3d array
  if(length(K) == 3) {
    for(i in 1:K[1]) {
      for(j in 1:K[2]) {
        for (k in 1:K[3]) {
          res[i, j, k] <- mu[1] + indicator(i) * mu[i] + indicator(j) * mu[K[1] + j - 1] + indicator(k) * mu[K[1] + K[2] + k - 2]
        }
      }
    }
  }

  # if nK = 4, transform mu to a 4d array
  if(length(K) == 4) {
    for(i in 1:K[1]) {
      for(j in 1:K[2]) {
        for (k in 1:K[3]) {
          for(l in 1:K[4]) {
            res[i, j, k, l] <- mu[1] + indicator(i) * mu[i] + indicator(j) * mu[K[1] + j - 1] + indicator(k) * mu[K[1] + K[2] + k - 2] + indicator(l) * mu[K[1] + K[2] + K[3] + l - 3]
          }
        }
      }
    }
  }

  # if nK = 5, transform mu to a 5d array
  if(length(K) == 5) {
    for(i in 1:K[1]) {
      for(j in 1:K[2]) {
        for (k in 1:K[3]) {
          for(l in 1:K[4]) {
            for(m in 1:K[5]) {
              res[i, j, k, l, m] <- mu[1] + indicator(i) * mu[i]
              + indicator(j) * mu[K[1] + j - 1]
              + indicator(k) * mu[K[1] + K[2] + k - 2] +
                indicator(l) * mu[K[1] + K[2] + K[3] + l - 3] +
                indicator(m) * mu[K[1] + K[2] + K[3] + K[4] + m - 4]
            }
          }
        }
      }
    }
  }


  return(res)
}

Try the LUCIDus package in your browser

Any scripts or data that you put into this service are public.

LUCIDus documentation built on March 11, 2026, 9:06 a.m.