R/init_pam.R
In cobiclust: Biclustering via Latent Block Model Adapted to Overdispersed Count Data

Documented in init_pam

# cobiclust R package Copyright INRAE 2024 Universite Paris-Saclay,
# AgroParisTech, INRAE, UMR MIA Paris-Saclay, 91120, Palaiseau, France
#' Initialisation of the co-clusters by partitioning around medoids method.
#'
#' @param x The output of the cobiclust function.
#' @param nu_j a vector of numeric, corresponding of a column effect, may be interpreted as a sampling effort. The length is equal to the number of columns.
#' @param a an numeric.
#' @param K an integer specifying the number of groups in rows.
#' @param G an integer specifying the number of groups in columns.
#' @param akg a logical variable indicating whether to use a common dispersion parameter (\code{akg = FALSE}) or a dispersion parameter per cocluster (\code{akg = TRUE}).
#' @return A list of
#' \describe{
#' \item{\code{nu_j}}{nu_j.}
#' \item{\code{mu_i}}{mu_i.}
#' \item{\code{t_jg}}{t_jg.}
#' \item{\code{s_ik}}{s_ik.}
#' \item{\code{pi_c}}{pi.}
#' \item{\code{rho_c}}{rho.}
#' \item{\code{a}}{a.}
#' \item{\code{exp_utilde}}{exp_utilde.}
#' \item{\code{exp_logutilde}}{exp_logutilde.}
#' \item{\code{alpha_c}}{alpha.}
#' }
#' @importFrom cluster pam
#' @importFrom stats median
#' @export
#' @keywords internal
#'
init_pam <- function(x, nu_j = NULL, a = NULL, K = K, G = G, akg = FALSE) {
    n <- nrow(x)
    m <- ncol(x)
    # Initialisation of mu_i
    # ----------------------------------------------------------------
    mu_i <- rowMeans(x)
    # Calculation of nu_j
    # -------------------------------------------------------------------
    if (is.null(nu_j)) {
        nu_j <- colSums(x)/mean(colSums(x))
    } else {
        nu_j <- nu_j
    }

    y <- sqrt(x + 0.5)

    # Initialisation of the co-clusters by partitioning around medoids method
    # ---------------
    init.rows <- cluster::pam(y, k = K, cluster.only = TRUE)
    pi_c <- tabulate(init.rows)/n

    init.cols <- cluster::pam(t(y), k = G, cluster.only = TRUE)
    rho_c <- tabulate(init.cols)/m

    if (is.matrix(nu_j)) {
        tmp0 <- sapply(seq_len(m), FUN = function(j) {
            sapply(seq_len(n), FUN = function(i) {
                x[i, j]/(mu_i[i] * nu_j[i, j])
            })
        })
    } else {
        tmp0 <- t(scale(t(scale(x, center = FALSE, scale = nu_j)),
            center = FALSE, scale = mu_i))
    }
    # alpha_c <- sapply(seq_len(G), FUN = function(y) sapply(seq_len(K), FUN =
    # function(x) mean(tmp0[which(init.rows$clustering == x),
    # which(init.cols$clustering == y)])))

    t_jg <- matrix(nrow = m, ncol = G, 0)
    for (i in seq_len(m)) {
        t_jg[i, init.cols[i]] <- 1
    }

    s_ik <- matrix(nrow = n, ncol = K, 0)
    for (i in seq_len(n)) {
        s_ik[i, init.rows[i]] <- 1
    }

    alpha_c <- crossprod(s_ik, tmp0 %*% t_jg)/crossprod(s_ik, matrix(1,
        nrow = n, ncol = m) %*% t_jg)
    if (is.null(a)) {
        # Initialisation of a (gamma parameter)
        # ---------------------------------------------- m_co <- (t(s_ik) %*% x
        # %*% t_jg) / (t(s_ik) %*% matrix(1, nrow = n, ncol = m) %*% t_jg)
        m_co <- crossprod(s_ik, x %*% t_jg)/crossprod(s_ik, matrix(1,
            nrow = n, ncol = m) %*% t_jg)
        tmp <- sapply(seq_len(G), FUN = function(g) {
            sapply(seq_len(K), FUN = function(k) {
                sum((x[init.rows == k, init.cols == g] - m_co[k, g])^2)
            })
        })
        var_co <- tmp/((t(s_ik) %*% matrix(1, nrow = n, ncol = m) %*%
            t_jg) - 1)

        a <- (var_co - m_co)/(m_co * m_co)
    }
    if (akg == FALSE) {
        a <- max(median(a[is.finite(a)]), 0.1)
        if (is.matrix(nu_j)) {
            b_tilde <- a + t(sapply(seq_along(mu_i), FUN = function(j) {
                (s_ik[j, ] * mu_i[j]) %*% tcrossprod(alpha_c, t_jg *
                  nu_j[j, ])
            }))
        } else {
            b_tilde <- a + (s_ik * mu_i) %*% tcrossprod(alpha_c, t_jg *
                nu_j)
        }

        a_tilde <- a + rowSums(s_ik) %*% diag(nrow = 1, ncol = 1,
            1) %*% rowSums(t_jg) * x
    } else {
        a <- apply(a, c(1, 2), FUN = function(y) max(y[is.finite(y)],
            0.1))
        if (is.matrix(nu_j)) {
            b_tilde <- tcrossprod(s_ik %*% a, t_jg) + t(sapply(seq_along(mu_i),
                FUN = function(j) (s_ik[j, ] * mu_i[j]) %*% tcrossprod(alpha_c,
                  t_jg * nu_j[j, ])))
        } else {
            b_tilde <- tcrossprod(s_ik %*% a, t_jg) + t(sapply(seq_along(mu_i),
                FUN = function(j) (s_ik[j, ] * mu_i[j]) %*% tcrossprod(alpha_c,
                  t_jg * nu_j)))
        }
        a_tilde <- tcrossprod(s_ik %*% a, t_jg) + rowSums(s_ik) %*%
            diag(nrow = 1, ncol = 1, 1) %*% rowSums(t_jg) * x
    }
    # }
    exp_utilde <- a_tilde/b_tilde
    exp_logutilde <- digamma(a_tilde) - log(b_tilde)
    # Output
    strategy <- list(akg = akg, cvg_lim = NULL)
    parameters <- list(alpha = alpha_c, pi = pi_c, rho = rho_c, mu_i = mu_i,
        nu_j = nu_j, a = a)
    info <- list(s_ik = s_ik, t_jg = t_jg, exp_logutilde = exp_logutilde,
        exp_utilde = exp_utilde)
    output <- list(data = x, K = K, G = G, classification = NULL,
        strategy = strategy, parameters = parameters, info = info)
    class(output) <- append(class(output), "cobiclustering")
    invisible(output)
}

Any scripts or data that you put into this service are public.

cobiclust documentation built on May 29, 2024, 6:23 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

cobiclust
Biclustering via Latent Block Model Adapted to Overdispersed Count Data

R/init_pam.R
In cobiclust: Biclustering via Latent Block Model Adapted to Overdispersed Count Data

Defines functions init_pam

Documented in init_pam

Try the cobiclust package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

cobiclust Biclustering via Latent Block Model Adapted to Overdispersed Count Data

R/init_pam.R In cobiclust: Biclustering via Latent Block Model Adapted to Overdispersed Count Data

Defines functions init_pam

Documented in init_pam

Try the cobiclust package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

cobiclust
Biclustering via Latent Block Model Adapted to Overdispersed Count Data

R/init_pam.R
In cobiclust: Biclustering via Latent Block Model Adapted to Overdispersed Count Data