mnem: Mixture Nested Effects Models

#' @noRd
llfree <- function(D, phi, theta) {
    Dnorm <- D
    Dnorm[D > 0] <- 1
    Dnorm[D <= 0] <- -1
    phi <- mytc(phi)
    phi <- phi[colnames(D), ]
    Fmat <- phi%*%theta
    Fmat[Fmat == 0] <- -1
    notlh <- 1 - sum(abs(D - t(Fmat)))/sum(abs(D + Dnorm))
    return(notlh)
}
#' Originally imported from the package 'nem'.
#' @noRd
#' @importFrom e1071 bincombinations
#' @author Florian Markowetz
enumerate.models <- function (x, name = NULL, trans.close = TRUE,
                              verbose = TRUE) {
    if (length(x) == 1) {
        n <- as.numeric(x)
        if (is.null(name))
            name <- letters[seq_len(n)]
    }
    else {
        n <- length(x)
        name <- x
    }
    if (n == 1)
        stop("nem> choose n>1!")
    if (n > 5)
        stop(paste0("nem> exhaustive enumeration not ",
                    "feasible with more than 5 perturbed genes"))
    if (n == 5)
        cat("nem> this will take a while ... \n")
    bc <- e1071::bincombinations(n * (n - 1))
    fkt1 <- function(x, n, name) {
        M <- diag(n)
        M[which(M == 0)] <- x
        dimnames(M) <- list(name, name)
        if (trans.close)
            M <- transitive.closure(M)
        return(list(M))
    }
    models <- apply(bc, 1, fkt1, n, name)
    models <- unique(matrix(unlist(models), ncol = n * n, byrow = TRUE))
    fkt2 <- function(x, n, name) {
        M <- matrix(x, n)
        dimnames(M) <- list(name, name)
        return(list(M))
    }
    models <- unlist(apply(models, 1, fkt2, n, name), recursive = FALSE)
    if (verbose)
        cat("Generated", length(models), "unique models ( out of",
            2^(n * (n - 1)), ")\n")
    return(models)
}
#' Originally imported from the package 'nem'.
#' @noRd
#' @author Holger Froehlich, Cordula Zeller
sampleRndNetwork <- function (Sgenes, scaleFree = TRUE, gamma = 2.5,
                              maxOutDegree = length(Sgenes),
                              maxInDegree = length(Sgenes), trans.close = TRUE, DAG = FALSE) {
    n = length(Sgenes)
    S = diag(n)
    maxOutDegree = min(n, maxOutDegree)
    degprob = (0:maxOutDegree)^(-gamma)
    degprob[1] = 1
    degprob = degprob/sum(degprob)
    for (i in seq_len(n)) {
        if (scaleFree)
            outdeg = sample(0:maxOutDegree, 1, prob = degprob)
        else outdeg = sample(0:maxOutDegree, 1)
        if (outdeg > 0) {
            if (!DAG)
                idx0 = which(S[i, ] == 0)
            else idx0 = which(S[i, ] == 0 & seq_len(n) < i)
            if (length(idx0) > 0) {
                idx = which(colSums(S[, idx0, drop = FALSE]) <=
                                maxInDegree)
                if (length(idx) > 0) {
                    idx = sample(idx0[idx], min(outdeg, length(idx0[idx])),
                                 replace = TRUE)
                    S[i, idx] = 1
                }
            }
        }
    }
    if (trans.close)
        S = transitive.closure(S)
    diag(S) = 0
    colnames(S) = Sgenes
    rownames(S) = Sgenes
    S
}
#' @noRd
#' @importFrom methods is
bigphi <- function(x) {
    if (is(x, "mnemsim")) {
        resfull <- NULL
        for (l in seq_len(length(x$Nem))) {
            tmp <- transitive.closure(x$Nem[[l]])
            resfull <- cbind(resfull, t(tmp))
        }
    } else {
        resfull <- NULL
        for (l in seq_len(length(x$comp))) {
            tmp <- transitive.closure(x$comp[[l]]$phi)
            colnames(tmp) <- rownames(tmp) <- seq_len(nrow(tmp))
            resfull <- cbind(resfull, t(tmp))
        }
    }
    return(resfull)
}
#' @noRd
Kratio <- function(x, y) {
    xn <- length(unlist(x$comp, recursive = FALSE))/2
    yn <- length(y$Nem)
    z <- xn/yn
    return(z)
}
#' @noRd
uniques <- function(x) {
    for (i in seq_len(length(x$comp))) {
        x$comp[[i]]$theta <- NULL
    }
    y <- length(unique(x$comp))
    return(y)
}
#' @noRd
mnemh.rec <- function(data, k = 2, logtype = 2, ...) {
    tmp <- mnemk(data, ks=seq_len(k), logtype = logtype, ...)
    cluster <- apply(getAffinity(tmp$best$probs,
                                 logtype = logtype,
                                 mw = tmp$best$mw), 2, which.max)
    if (length(tmp$best$comp) == 1) {
        return(tmp$best)
    } else {
        tmp1 <- NULL
        for (i in seq_len(length(tmp$best$comp))) {
            if (length(unique(colnames(data[, which(cluster == i)]))) > 1) {
                tmp2 <- mnemh.rec(data[, which(cluster == i)],
                                  k=k, logtype=logtype, ...)
            } else {
                tmp2 <- NULL
            }
            tmp1 <- c(tmp1, tmp2)
        }
        return(tmp1)
    }
}
#' @noRd
random_probs <- function(k, data, full = FALSE, logtype = 2) {
    samplefun <- function(n,p) {
        x <- sample(c(0.5,1.5), n,
                    replace = TRUE,
                    prob = p)

        return(x)
    }
    probs <- matrix(log(samplefun(k*ncol(data),
                                  c(0.9, 0.1)))/log(logtype), k,
                    ncol(data))
    if (full) {
        for (i in seq_len(k)) {
            if (i == 1) { next() }
            infcells <- which(apply(probs, 2, function(x) {
                bad <- FALSE
                if (all(is.infinite(x))) {
                    bad <- TRUE
                }
                return(bad)
            }))
            if (i == k) {
                probs[i, infcells] <- log(1)/log(logtype)
            } else {
                probs[i, infcells] <-
                    log(samplefun(length(infcells),
                                  c(1-1/k, 1/k)))/log(logtype)
            }
        }
    }
    while(any(apply(probs, 1, function(x) {
        bad <- FALSE
        if (all(is.infinite(x)) | all(x == 0)) {
            bad <- TRUE
        }
        return(bad)
    }))) {
        probs <- matrix(log(samplefun(k*ncol(data),
                                      c(0.9, 0.1)))/log(logtype), k,
                        ncol(data))
        if (full) {
            for (i in seq_len(k)) {
                if (i == 1) { next() }
                infcells <- which(apply(probs, 2, function(x) {
                    bad <- FALSE
                    if (all(is.infinite(x))) {
                        bad <- TRUE
                    }
                    return(bad)
                }))
                if (i == k) {
                    probs[i, infcells] <- log(1)/log(logtype)
                } else {
                    probs[i, infcells] <-
                        log(samplefun(length(infcells),
                                      c(1-1/k, 1/k)))/log(logtype)
                }
            }
        }
    }
    return(probs)
}
#' @noRd
random_probs2 <- function(k, data, full = FALSE, logtype = 2) {
    samplefun <- function(n,p) {
        x <- sample(c(0.5,1.5), n,
                    replace = TRUE,
                    prob = p)

        return(x)
    }
    probs <- matrix(0, k, ncol(data))
    for (i in seq_len(k)) {
        if (i == 1) {
            probs[i, ] <- samplefun(ncol(data), c(1-1/k,1/k))
        } else {
            probs[i, which(probs[i-1, ] == 0)] <-
                samplefun(sum(probs[i-1, ] == 0), c(1-1/(k+1-i),1/(k+1-i)))
        }
    }
    return(probs)
}
#' @noRd
sortAdj <- function(res, list = FALSE) {
    resmat <- NULL
    for (i in seq_len(length(res))) {
        if (list) {
            resmat <-
                rbind(resmat,
                      as.vector(mytc(res[[i]])))
        } else {
            resmat <-
                rbind(resmat,
                      as.vector(mytc(res[[i]]$adj)))
        }
    }
    d <- as.matrix(dist(resmat))
    dsum <- apply(d, 1, sum)
    resorder <- which.max(dsum)
    d[resorder, resorder] <- Inf
    for (i in 2:length(dsum)) {
        resorder <- c(resorder, which.min(d[, resorder[length(resorder)]]))
        d[resorder, resorder] <- Inf
    }
    res2 <- list()
    for (i in seq_len(length(res))) {
        res2[[i]] <- res[[resorder[i]]]
    }
    return(list(res = res2, order = resorder))
}
#' @noRd
calcEvopen <- function(res, list = TRUE) {
    evopen <- 0
    for (i in seq_len(length(res)-1)) {
        if (list) {
            evopen <- evopen + sum(abs(res[[i]] -
                                           res[[(i+1)]]))/length(res[[i]])
        } else {
            evopen <- evopen + sum(abs(res[[i]]$adj -
                                           res[[(i+1)]]$adj))/length(res[[i]]$adj)
        }
    }
    evopen <- -evopen#/(k-1)
    return(evopen)
}
#' @noRd
transProb <- function(x, y, lambda = 0.5) {
    n <- nrow(x)
    exp <- sum(abs(x-y))
    enum <- lambda*(1-lambda)^(exp-1)
    denom <- 0
    ne <- (n*(n-1))
    for (i in 0:ne) {
        denom <- denom + choose(ne, i)*lambda*(1-lambda)^(i-1)
    }
    p <- enum/denom
    return(p)
}
#' @noRd
fullTransProb <- function(x, ...) {
    p <- 1
    for (i in seq_len(length(x)-1)) {
        p <- p*transProb(x[[i]], x[[i+1]], ...)
    }
    return(p)
}
#' @noRd
modAdj <- function(adj, D) {
    Sgenes <- getSgenes(D)
    SgeneN <- getSgeneN(D)
    for (i in seq_len(SgeneN)) {
        colnames(adj) <- rownames(adj) <- gsub(i, Sgenes[i], colnames(adj))
    }
    return(adj)
}
#' @noRd
getRho <- function(data) {

    Sgenes <- getSgenes(data)

    Rho <- matrix(0, length(Sgenes), ncol(data))

    for (i in seq_len(length(Sgenes))) {
        Rho[i, grep(paste0("^", Sgenes[i], "_|_", Sgenes[i],
                           "$|_", Sgenes[i], "_|^", Sgenes[i], "$"),
                    colnames(data))] <- 1
    }

    rownames(Rho) <- Sgenes
    colnames(Rho) <- colnames(data)
    Rho <- Rho[naturalsort(rownames(Rho)), ]
    return(Rho)
}
#' @noRd
initComps <- function(data, k=2, starts=1, verbose = FALSE, meanet = NULL,
                      linets = TRUE) {
    n <- getSgeneN(data)
    init <- list()
    for (i in seq_len(starts)) {
        init[[i]] <- list()
        for (j in seq_len(k)) {
            if (linets) {
                tmp <- matrix(0, n, n)
                tmp[upper.tri(tmp)] <- 1
                diag(tmp) <- 1
                rownames(tmp) <- colnames(tmp) <- sample(seq_len(n), n)
                tmp <- tmp[naturalorder(rownames(tmp)),
                           naturalorder(colnames(tmp))]
            } else {
                tmp <- sampleRndNetwork(seq_len(n), DAG = TRUE)
            }
            init[[i]][[j]] <- tmp
        }
    }
    return(init)
}
#' @noRd
initps <- function(data, ks, k,
                   starts = 3,
                   ksel = c("kmeans", "silhouette", "manhattan")) {
    clusters <- list()
    multi <- grep("_", colnames(data))
    if (length(multi) > 0) {
        data2 <- data[, -multi]
    } else {
        data2 <- data
    }
    for (i in seq_len(length(unique(colnames(data2))))) {
        if ("cor" %in% ksel) {
            d <- (1 - cor(data[, which(colnames(data) %in% i)]))/2
            d <- as.dist(d)
        } else {
            d <- dist(t(data[, which(colnames(data) %in% i)]),
                      method = ksel[3])
        }
        if (length(d) > 1) {
            hc <- hclust(d)
            clusters[[i]] <- cutree(hc, min(ks[i], length(hc$labels)))
        } else {
            clusters[[i]] <- 1
        }
    }
    probscl <- list()

    n <- getSgeneN(data)

    count <- 0
    llstr <- NULL
    resstr <- NULL
    counter <- 0
    takes <- NULL
    takesdone <- NULL
    while(count < starts & counter < prod(ks)) {
        counter <- counter + 1
        tmp <- matrix(0.5, k, ncol(data))
        if (ks[1] < k) {
            takes <- as.matrix(sample(seq_len(ks[1]), k, replace = TRUE), k, 1)
        } else {
            takes <- as.matrix(sample(seq_len(ks[1]), k), k, 1)
        }
        for (i in 2:length(unique(colnames(data)))) {
            if (ks[i] < k) {
                takes <- cbind(takes, sample(seq_len(ks[i]), k, replace = TRUE))
            } else {
                takes <- cbind(takes, sample(seq_len(ks[i]), k))
            }
        }
        for (i in seq_len(k)) {
            for (j in seq_len(n)) {
                tmp[i, which(colnames(data) == j)[
                    which(clusters[[j]] == takes[i, j])]] <- 1.5
            }
        }
        takestmp <- paste(takes, collapse = "")
        if (!(takestmp %in% takesdone)) {
            count <- count + 1
            takesdone <- c(takesdone, takestmp)
            probscl[[count]] <- log2(tmp)
        }
    }
    return(probscl)
}
#' @noRd
modData <- function(D) {
    SgeneN <- getSgeneN(D)
    Sgenes <- getSgenes(D)
    if (!all(is.numeric(Sgenes))) {
        colnamesD <- colnames(D)
        for (i in seq_len(SgeneN)) {
            colnamesD <- gsub(paste0("^", Sgenes[i], "$"), i, colnamesD)
            colnamesD <- gsub(paste0("_", Sgenes[i], "$"),
                              paste0("_", i), colnamesD)
            colnamesD <- gsub(paste0("^", Sgenes[i], "_"),
                              paste0(i, "_"), colnamesD)
            colnamesD <- gsub(paste0("_", Sgenes[i], "_"),
                              paste0("_", i, "_"), colnamesD)
        }
        colnames(D) <- colnamesD
    }
    rownames(D) <- as.numeric(seq_len(nrow(D)))
    return(D)
}
#' @noRd
#' @importFrom cluster silhouette
#' @importFrom stats kmeans
learnk <- function(data, kmax = 10, ksel = c("hc", "silhouette", "cor"),
                   starts = 10, mono = TRUE, verbose = FALSE) {
    ks <- numeric(length(unique(colnames(data))))
    lab <- list()
    index <- numeric(ncol(data))
    for (i in naturalsort(as.numeric(unique(colnames(data))))) {
        if (verbose) {
            print(i)
        }
        if (sum(colnames(data) %in% i) <= 1) { k <- 1; next() }
        if ("cor" %in% ksel) {
            d <- (1 - cor(data[, which(colnames(data) %in% i)]))/2
            d <- as.dist(d)
        } else {
            d <- dist(t(data[, which(colnames(data) %in% i)]),
                      method=ksel[3])
        }
        if ("hc" %in% ksel) {
            hc <- hclust(d)
            ks[i] <- 2
            lab[[i]] <- rep(1, sum(colnames(data) %in% i))
            if (length(d) > 1) {
                silavg <- 0
                silavgs <- numeric(length(hc$order)-1)
                clusters <- list()
                for (j in 2:(length(hc$order)-1)) {
                    cluster <- cutree(hc, j)
                    clusters[[j]] <- cluster
                    if ("BIC" %in% ksel | "AIC" %in% ksel) {
                        m <- nrow(data)
                        n <- length(cluster)
                        k <- length(table(cluster))
                        D <- log(mean(silhouette(cluster, d)[, 3]))
                        if ("AIC" %in% ksel) {
                            silavgs[j] <- 2*m*k - 2*D
                        } else {
                            silavgs[j] <- log(n)*m*k - 2*D
                        }
                    }
                    if ("silhouette" %in% ksel) {
                        silavgs[j] <- mean(silhouette(cluster, d)[, 3])
                    }
                    if (verbose) {
                        print(silavgs[j])
                    }
                    if (silavgs[j] < silavgs[(j-1)]) {
                        break()
                    }
                    if (silavg < silavgs[j]) {
                        silavg <- silavgs[j]
                        ks[i] <- j
                        lab[[i]] <- cluster
                    }
                }
            }
            index[which(colnames(data) %in% i)] <- lab[[i]]
        }
        if ("kmeans" %in% ksel) {
            silavg <- 0
            silavgs <- numeric(kmax)
            clusters <- list()
            for (j in seq_len(kmax)) {
                if (j == 1) { next() }
                if (ncol(as.matrix(d)) <= j) { next() }
                kc <- kmeans(d, centers = j, nstart = starts)
                if ("BIC" %in% ksel | "AIC" %in% ksel) {
                    m <- ncol(kc$centers)
                    n <- length(kc$cluster)
                    k <- nrow(kc$centers)
                    D <- kc$tot.withinss
                    if ("AIC" %in% ksel) {
                        silavgs[j] <- 2*m*k + D
                    } else {
                        silavgs[j] <- log(n)*m*k + D
                    }
                }
                if ("silhouette" %in% ksel) {
                    silavgs[j] <- mean(silhouette(kc$cluster, d)[, 3])
                }
                if (verbose) {
                    print(silavgs[j])
                }
                if (j == 1) {
                    j2 <- j
                } else {
                    j2 <- j - 1
                }
                if (silavgs[j] < silavgs[j] & mono) {
                    break()
                }
                if (silavg < silavgs[j]) {
                    silavg <- silavgs[j]
                    ks[i] <- j
                    lab[[i]] <- kc$cluster
                }
            }
            index[which(colnames(data) %in% i)] <- lab[[i]]
        }
    }
    k <- min(kmax, max(ks))
    return(list(ks = ks, k = k, lab = lab, cluster = index))
}
#' @noRd
getLL <- function(x, logtype = 2, mw = NULL, data = NULL, complete = FALSE) {
    if (is.null(mw)) { mw = rep(1, nrow(x))/nrow(x) }
    if (complete) {
        Z <- getAffinity(x, logtype = logtype, mw = mw, data = data,
                         complete = complete)
        l <- sum(apply(Z*(x + log(mw)/log(logtype)), 2, sum))
    } else {
        x <- logtype^x
        x <- x*mw
        l <- sum(log(rep(1,nrow(x))%*%x)/log(logtype))
    }
    return(l)
}
#' @noRd
estimateSubtopo <- function(data, fun = which.max) {
    if (length(grep("_", colnames(data))) > 0) {
        data <- data[, -grep("_", colnames(data))]
    }
    effectsums <- effectsds <- matrix(0, nrow(data),
                                      length(unique(colnames(data))))
    n <- getSgeneN(data)
    for (i in seq_len(n)) {
        ilen <- length(grep(i, colnames(data)))
        if (ilen > 1) {
            effectsds[, i] <- apply(data[, grep(i, colnames(data))], 1, sd)
            effectsums[, i] <- apply(data[, grep(i, colnames(data))], 1, sum)
        } else if (ilen == 1) {
            effectsds[, i] <- 1
            effectsums[, i] <- data[, grep(i, colnames(data))]
        }
    }
    subtopoX <- as.numeric(apply(effectsums/effectsds, 1, fun))
    subtopoX[which(is.na(subtopoX) == TRUE)] <- 1
    return(subtopoX)
}
#' @noRd
getProbs <- function(probs, k, data, res, method = "llr", affinity = 0,
                     converged = 10^-2, subtopoX = NULL, ratio = TRUE,
                     logtype = 2, mw = NULL, fpfn = fpfn, Rho = NULL,
                     complete = FALSE, nullcomp = FALSE, marginal = FALSE) {
    n <- ncol(res[[1]]$adj)
    bestprobs <- probsold <- probs
    time0 <- TRUE
    count <- 0
    if (ncol(data) <= 100) {
        max_count <- 100
    } else {
        max_count <- 10
    }
    ll0 <- llold <- -Inf
    stop <- FALSE
    mw <- apply(getAffinity(probsold, affinity = affinity, norm = TRUE,
                            mw = mw, logtype = logtype, data = data,
                            complete = complete), 1, sum)
    mw <- mw/sum(mw)
    if (any(is.na(mw))) { mw <- rep(1, k)/k }
    while((!stop | time0) & count < max_count) {
        time0 <- FALSE
        probsold <- probs
        subtopo0 <- matrix(0, k, nrow(data))
        subweights0 <- matrix(0, nrow(data), n+1)
        postprobsold <- getAffinity(probsold, affinity = affinity, norm = TRUE,
                                    logtype = logtype, mw = mw, data = data,
                                    complete = complete)
        probs0 <- probsold*0
        eprobs <- matrix(0, k, nrow(data))
        for (i in seq_len(k)) {
            if (i == k & nullcomp) {
                tmp <- tmp2 <- 0
            } else {
                adj1 <- mytc(res[[i]]$adj)
                if (is.null(Rho)) {
                    adj1 <- adj1[colnames(data), ]
                } else {
                    adj1 <- t(Rho)%*%adj1
                    adj1[which(adj1 > 1)] <- 1
                }
                adj1 <- cbind(adj1, "0" = 0)
                if (is.null(res[[i]]$subtopo)) {
                    subtopo <- maxCol_row(t(t(data)*postprobsold[i, ])%*%adj1)
                } else {
                    subtopo <- res[[i]]$subtopo
                    subtopo[which(subtopo == 0)] <- ncol(adj1)
                }
                adj2 <- adj1[, subtopo]
                if (marginal) {
                    tmp2 <- llrScore(data, adj1, ratio = ratio)
                    tmp2 <- logtype^tmp2
                    tmp2 <- log(rowSums(tmp2))/log(logtype)
                }
                if (method %in% "llr") {
                    tmp <- colSums(data*t(adj2))
                }
            }
            probs0[i, ] <- tmp
            if (marginal) {
                eprobs[i, ] <- tmp2
            }
        }
        if (marginal) {
            ll0 <- getLL(eprobs, logtype = logtype, mw = mw,
                         data = data, complete = complete)
        } else {
            ll0 <- getLL(probs0, logtype = logtype, mw = mw,
                         data = data, complete = complete)
        }
        if (max(ll0) - llold > 0) {
            bestprobs <- probs0
        }
        probs <- probs0
        if (max(ll0) - llold <= converged) {
            stop <- TRUE
        }
        mw <- apply(getAffinity(probs, affinity = affinity, norm = TRUE,
                                logtype = logtype, mw = mw, data = data,
                                complete = complete), 1,
                    sum)
        mw <- mw/sum(mw)
        llold <- max(ll0)
        count <- count + 1
    }
    return(list(probs = bestprobs, subtopoX = subtopoX,
                mw = mw, ll = llold))
}
#' @noRd
annotAdj <- function(adj, data) {
    Sgenes <- getSgenes(data)
    colnames(adj) <- rownames(adj) <- naturalsort(Sgenes)
    return(adj)
}
#' @noRd
nemEst2 <- function(data, maxiter = 100, start = "null",
                   cut = 0, monoton = TRUE, logtype = 2,
                   method = "llr",
                   weights = NULL, fpfn = c(0.1, 0.1), Rho = NULL,
                   close = TRUE, domean = TRUE, modified = FALSE,
                   hierarchy = "totaleffect", tree = TRUE, cutlen = 100,
                   ...) {
    if (method %in% "disc") {
        D <- data
        D[which(D == 1)] <- log((1-fpfn[2])/fpfn[1])/log(logtype)
        D[which(D == 0)] <- log(fpfn[2]/(1-fpfn[1]))/log(logtype)
        method <- "llr"
        data <- D
    }
    if (!modified) {
        data <- modData(data)
    }
    if (sum(duplicated(colnames(data)) == TRUE) > 0 &
        method %in% "llr" & domean) {
        if (!is.null(weights)) {
            D <- data
            data <- data*rep(weights, rep(nrow(data), ncol(data)))
        }
        data2 <- doMean(data, weights = weights, Rho = Rho, logtype = logtype)
        data <- D
    } else {
        data2 <- data
    }
    if (is.null(weights)) { weights <- rep(1, ncol(data2)) }
    R <- data2[, naturalsort(colnames(data2))]
    Rho <- diag(ncol(R))
    Sgenes <- getSgenes(R)
    phi <- matrix(0, n, n)
    rownames(phi) <- colnames(phi) <- Sgenes
    R2 <- t(R)%*%R
    R2 <- R2 + diag(R2)
    E <- apply(R, 2, sum)
    E <- phi[order(E, decreasing=TRUE), order(E, decreasing=TRUE)]
    E[upper.tri(E)] <- 1
    E <- E[naturalsort(rownames(E)), naturalsort(colnames(E))]
    llmax <- -Inf
    lls <- NULL
    for (cut in c(0,seq(min(R2),max(R2),length.out=cutlen))) {
        phi[R2 > cut] <- 1
        phi[R2 <= cut] <- 0
        ll <- scoreAdj(R, phi, weights = weights, dotopo = TRUE,
                       trans.close = close, Rho = Rho)
        theta <- theta2theta(ll$subtopo, phi)
        lls <- c(lls,ll)
        if (ll > llmax) {
            phibest <- phi
            thetabest <- theta
            llmax <- ll
        }
    }
    nem <- list(phi = phibest, theta = thetabest, iter = cutlen,
                ll = llbest, lls = lls, num = which.max(lls),
                O = E, E = E, phintc = phi)
    class(nem) <- "nemEst"
    return(nem)
}
#' @noRd
nemEst <- function(data, maxiter = 100, start = "null",
                    cut = 0, monoton = TRUE, logtype = 2,
                    method = "llr",
                    weights = NULL, fpfn = c(0.1, 0.1), Rho = NULL,
                    close = TRUE, domean = TRUE, modified = FALSE,
                    hierarchy = "totaleffect", tree = TRUE, ...) {
    if (method %in% "disc") {
        D <- data
        D[which(D == 1)] <- log((1-fpfn[2])/fpfn[1])/log(logtype)
        D[which(D == 0)] <- log(fpfn[2]/(1-fpfn[1]))/log(logtype)
        method <- "llr"
        data <- D
    }
    if (!modified) {
        data <- modData(data)
    }
    if (sum(duplicated(colnames(data)) == TRUE) > 0 &
        method %in% "llr" & domean) {
        if (!is.null(weights)) {
            D <- data
            data <- data*rep(weights, rep(nrow(data), ncol(data)))
        }
        data2 <- doMean(data, weights = weights, Rho = Rho, logtype = logtype)
        data <- D
    } else {
        data2 <- data
    }
    if (is.null(weights)) { weights <- rep(1, ncol(data2)) }
    R <- data2[, naturalsort(colnames(data2))]
    Rho <- diag(ncol(R))
    N <- rowSums(Rho)
    n <- getSgeneN(R)
    phibest <- phi <- matrix(0, n, n)
    Sgenes <- getSgenes(R)
    rownames(phi) <- colnames(phi) <- Sgenes
    if (hierarchy == "totaleffect") {
        E0 <- apply(t(t(R%*%t(Rho))/N), 2, sum)
        phi <- phi[order(E0, decreasing = TRUE), order(E0, decreasing = TRUE)]
        phi[upper.tri(phi)] <- 1
        phi <- phi[naturalsort(rownames(phi)), naturalsort(colnames(phi))]
        E <- phi
    } else if (hierarchy == "pairwise.greedy") {
        for (i in seq_len(n-1)) {
            for (j in (i+1):n) {
                pwg <- nem(R[, c(i,j)], logtype = logtype,
                           weights = weights[c(i,j)], trans.close = close,
                           domean = FALSE)
                phi[i, j] <- pwg$adj[1, 2]
                phi[j, i] <- pwg$adj[2, 1]
            }
        }
        E0 <- E <- phi
    } else {
        Rpos <- t(t(R%*%t(Rho))/N)
        Rpos[which(Rpos < 0)] <- 0
        E0 <- t(Rpos)%*%(R%*%t(Rho))
        E <- E0  - t(E0)
        parents <- which(E <= 0)
        children <- which(E > 0)
        E[parents] <- 1
        E[children] <- 0
        E <- E[naturalorder(rownames(E)), naturalorder(colnames(E))]
        phi <- phi[naturalsort(rownames(phi)), naturalsort(colnames(phi))]
    }
    if ("full" %in% start) {
        phi <- phi
        diag(phi) <- 1
    } else if ("rand" %in% start) {
        phi <- phi*0
        diag(phi) <- 1
        phi[seq_len(length(phi))] <- sample(c(0,1), length(phi), replace = TRUE)
        phi[lower.tri(phi)] <- 0
        phi <- phi[sample(seq_len(nrow(phi)), nrow(phi)),
                   sample(seq_len(nrow(phi)), nrow(phi))]
        phi <- phi[naturalsort(rownames(phi)), naturalsort(colnames(phi))]
    } else if ("null" %in% start) {
        phi <- phi*0
        diag(phi) <- 1
    } else {
        phi <- start
    }
    O <- phi*0
    iter <- Oold <- 0
    lls <- NULL
    llbest <- -Inf
    stop <- FALSE
    while(!stop & iter < maxiter) {
        iter <- iter + 1
        ll <- scoreAdj(R, phi,
                       weights = weights, dotopo = TRUE,
                       trans.close = close, Rho = Rho)
        P <- ll$subweights
        theta <- theta2theta(ll$subtopo, phi)
        ll <- ll$score
        Oold <- O
        if (ll %in% lls | all(phi == phibest)) {
            stop <- TRUE
        }
        if (monoton & iter > 1) {
            if (ll < lls[length(lls)]) { stop <- TRUE }
        }
        if (llbest < ll) {
            phibest <- phi
            thetabest <- theta
            llbest <- ll
            numbest <- iter
            Obest <- O
        }
        lls <- c(lls, ll)
        nogenes <- which(apply(theta, 1, sum) == 0)
        nozeros <- which(t(P) > 0, arr.ind = TRUE)
        nozeros <- nozeros[which(nozeros[, 1] %in% nogenes), ]
        theta[nozeros] <- 1
        O <- (t(R%*%t(Rho))*weights)%*%t(theta)
        cutoff <- cut*max(abs(O))
        phi[which(O > cutoff & E == 1)] <- 1
        phi[which(O <= cutoff | E == 0)] <- 0
        if (close) {
            phi <- mytc(phi)
        }
    }
    phintc <- phibest
    if (close) {
        phibest <- mytc(phibest)
    }
    ll <- scoreAdj(R, phibest,
                   weights = weights, dotopo = TRUE,
                   trans.close = close, Rho = Rho)
    P <- ll$subweights
    theta <- theta2theta(ll$subtopo, phibest)
    llbest <- ll$score
    nem <- list(phi = phibest, theta = thetabest, iter = iter,
                ll = llbest, lls = lls, num = numbest,
                O = Obest, E = E0, phintc = phintc)
    class(nem) <- "nemEst"
    return(nem)
}
#' @noRd
doMean <- function(D, weights = NULL, Rho = NULL, logtype = 2) {
    man <- FALSE
    if (man) {
        mD <- matrix(0, nrow(D), length(unique(colnames(D))))
        if (!is.null(weights)) {
            doodds <- FALSE
            if (doodds) {
                A <- exp(D)/(1+exp(D))
                D <- log(t(t(A)*weights +
                               (1-weights)*0.5)/t(t(1 - A)*weights +
                                                      (1-weights)*0.5))/log(logtype)
            } else {
                D <- D*rep(weights, rep(nrow(D), ncol(D)))
            }
        }
        for (i in seq_len(length(unique(colnames(D))))) {
            mD[, i] <-
                apply(D[, which(colnames(D) %in% unique(colnames(D))[i]),
                        drop = FALSE], 1, mean)
        }
        colnames(mD) <- unique(colnames(D))
    } else {
        if (!is.null(weights)) {
            D <- D*rep(weights, rep(nrow(D), ncol(D)))
        }
        if (!is.null(Rho)) {
            Rho <- apply(Rho, 2, function(x) {
                if (sum(x) > 1) {
                    x <- x/sum(x)
                }
                return(x)
            })
            Rho[is.na(Rho)] <- 0
            mD <- D%*%t(Rho)
            mD <- mD[, naturalorder(colnames(mD))]
            colnames(mD) <- seq_len(ncol(mD))
        } else {
            global <- TRUE
            if (global) {
                mD <- t(rowsum(t(D), colnames(D)))/
                    (ncol(D)/length(unique(colnames(D))))
            } else {
                mD <- t(rowsum(t(D), colnames(D))/
                            as.numeric(table(colnames(D))))
            }
        }
    }
    mD <- mD[, naturalorder(colnames(mD))]
    return(mD)
}
#' @noRd
modules <- function(D, method = "llr", weights = NULL, reduce = FALSE,
                    start = NULL,
                    verbose = FALSE, trans.close = TRUE, redSpace = NULL,
                    subtopo = NULL, ratio = TRUE, parallel = NULL,
                    prior = NULL, fpfn = c(0.1, 0.1),
                    modulesize = 4, search = "exhaustive", domean = TRUE,
                    Rho = NULL, logtype = 2) {
    D <- data <- modData(D)
    n <- getSgeneN(D)
    Sgenes <- getSgenes(D)
    if (domean) {
        D <- doMean(D, weights = weights, Rho = Rho, logtype = logtype)
        weights <- rep(1, ncol(D))
        sumdata <- data <- D
    } else {
        sumdata <- matrix(0, nrow(data), n)
        if (!is.null(weights)) {
            D <- D*rep(weights, rep(nrow(D), ncol(D)))
            weights <- rep(1, ncol(sumdata))
        }
        for (i in seq_len(n)) {
            sumdata[, i] <-
                apply(D[, which(colnames(D) %in% i), drop = FALSE], 1, sum)
        }
        colnames(sumdata) <- seq_len(n)
        rownames(sumdata) <- rownames(D)
    }
    D <- NULL
    n <- getSgeneN(data)
    cordata <- cor(sumdata)
    cordata[is.na(cordata)] <- -1
    d <- as.dist((1 - cordata)/2)
    for (i in 2:n) {
        hc <- hclust(d)
        hcut <- cutree(hc, i)
        if (max(table(hcut)) <= modulesize) {
            break()
        }
    }
    groups <- table(hcut)
    fullnet <- NULL
    for (i in seq_len(length(groups))) {
        subset <- which(hcut == i)
        if (verbose) {
            print(paste(c("calculating module", subset), collapse = " "))
        }
        if (length(subset) > 1) {
            subdata <- data[, which(colnames(data) %in% subset)]
            if (is.null(start)) {
                start2 <- start
            } else {
                start2 <- start[which(rownames(start) %in% subset),
                                which(colnames(start) %in% subset)]
            }
            if (!is.null(Rho)) { Rho <- getRho(subdata) }
            tmp <- nem(subdata, search = search, method = method,
                       start = start2,
                       parallel = parallel, reduce = reduce,
                       weights = weights[which(colnames(data) %in% subset)],
                       verbose = verbose,
                       redSpace = redSpace, trans.close = trans.close,
                       subtopo = subtopo, prior = prior, ratio = ratio,
                       domean = FALSE, fpfn = fpfn, Rho = Rho,
                       logtype = logtype)
            if (is.null(fullnet)) {
                fullnet <- tmp$adj
            } else {
                tmpnames <- c(colnames(fullnet), colnames(tmp$adj))
                fullnet <-
                    rbind(cbind(fullnet,
                                matrix(0, nrow(fullnet), ncol(tmp$adj))),
                          cbind(matrix(0, nrow(tmp$adj), ncol(fullnet)),
                                tmp$adj))
                colnames(fullnet) <- rownames(fullnet) <- as.numeric(tmpnames)
            }
        } else {
            if (is.null(dim(fullnet))) {
                fullnet <- matrix(1, 1, 1)
                colnames(fullnet) <- rownames(fullnet) <- subset
            } else {
                fullnet <- rbind(cbind(fullnet, 0), 0)
                colnames(fullnet)[ncol(fullnet)] <-
                    rownames(fullnet)[nrow(fullnet)] <-
                    subset
            }
        }
    }
    fullnet <- transitive.reduction(fullnet)
    fullnet <- fullnet[order(as.numeric(rownames(fullnet))),
                       order(as.numeric(colnames(fullnet)))]
    return(fullnet)
}
#' @noRd
getSgeneN <- function(data) {
    Sgenes <- length(unique(unlist(strsplit(colnames(data), "_"))))
    return(Sgenes)
}
#' @noRd
getSgenes <- function(data) {
    Sgenes <- naturalsort(unique(unlist(strsplit(colnames(data), "_"))))
    return(Sgenes)
}
#' @noRd
get.rev.tc <-function (Phi) {
    Phitr <- transitive.reduction(Phi)
    idx = which(Phitr + t(Phitr) == 1, arr.ind = TRUE)
    models = list()
    nn <- dim(Phi)
    if (NROW(idx) > 0) {
        for (i in seq_len(NROW(idx))) {
            Phinew = Phi
            Phinew[idx[i, 1], idx[i, 2]] = 1 - Phinew[idx[i, 1], idx[i, 2]]
            Phinew[idx[i, 2], idx[i, 1]] = 1 - Phinew[idx[i, 2], idx[i, 1]]
            diag(Phinew) = 1
            if (Phinew[idx[i, 1], idx[i, 2]] == 1) {
                uv <- idx[i, ]
            } else {
                uv <- rev(idx[i, ])
            }
            Phinew <- mytc(Phinew, uv[1], uv[2])
            models[[i]] <- Phinew
        }
    }
    models
}
#' @noRd
get.ins.fast <- function (Phi, trans.close = TRUE, tree = FALSE) {
    idx = which(Phi == 0)
    models = list()
    nn <- dim(Phi)
    if (length(idx) > 0) {
        for (i in seq_len(length(idx))) {
            uv <- arrayInd(idx[i], nn)
            Phinew = Phi
            Phinew[idx[i]] = 1
            if (trans.close) {
                Phinew = mytc(Phinew, uv[1], uv[2])
            }
            if (!tree | all(colSums(transitive.reduction(Phinew))<=1)) {
                models[[i]] <- Phinew
            } else {
                models[[i]] <- Phi
            }
        }
    }
    models
}
#' @noRd
get.del.tc <- function (Phi) {
    Phi = Phi - diag(ncol(Phi))
    Phi2 <- transitive.reduction(Phi)
    idx = which(Phi2 == 1)
    models = list()
    if (length(idx) > 0) {
        for (i in seq_len(length(idx))) {
            Phinew = Phi
            Phinew[idx[i]] = 0
            diag(Phinew) = 1
            models[[i]] <- Phinew
        }
    }
    models
}
#' @noRd
theta2theta <- function(x, y) {
    if (is.matrix(x)) {
        z <- apply(x, 2, function(x) {
            if (max(x) == 0) {
                y <- 0
            } else {
                y <- which.max(x)
            }
            return(y)
        })
    } else {
        z <- matrix(0, nrow(y), length(x))
        z[cbind(x, seq_len(ncol(z)))] <- 1
        rownames(z) <- seq_len(nrow(z))
        colnames(z) <- seq_len(ncol(z))
    }
    return(z)
}
#' @noRd
adj2dnf <- function(A) {

    dnf <- NULL

    for (i in seq_len(ncol(A))) {
        for (j in seq_len(nrow(A))) {
            if (A[i, j] > 0) {
                dnf <- c(dnf, paste(colnames(A)[i], rownames(A)[j], sep = "="))
            }
            if (A[i, j] < 0) {
                dnf <- c(dnf, paste("!", colnames(A)[i], "=",
                                    rownames(A)[j], sep = ""))
            }
        }
    }

    dnf <- unique(dnf)

    return(dnf)

}
#' @noRd
#' @importFrom methods new
plot.adj <- function(x, ...) {
    adj2graph <- function(adj.matrix) {
        V   <- rownames(adj.matrix)
        edL <- vector("list", length=nrow(adj.matrix))
        names(edL) <- V
        for (i in seq_len(nrow(adj.matrix))) {
            edL[[i]] <- list(edges=which(!adj.matrix[i,]==0),
                             weights=adj.matrix[i,!adj.matrix[i,]==0])
        }
        gR <- new("graphNEL",nodes=V,edgeL=edL,edgemode="directed")
        return(gR)
    }
    g <- adj2graph(x)
    plot(g)
}
#' @noRd
graph2adj <- function(gR) {
    adj.matrix <- matrix(0,
                         length(nodes(gR)),
                         length(nodes(gR))
    )
    rownames(adj.matrix) <- nodes(gR)
    colnames(adj.matrix) <- nodes(gR)
    for (i in seq_len(length(nodes(gR)))) {
        adj.matrix[nodes(gR)[i],adj(gR,nodes(gR)[i])[[1]]] <- 1
    }
    return(adj.matrix)
}
#' @noRd
#' @importFrom flexclust dist2
#' @import Rcpp
#' @import RcppEigen
llrScore <- function(data, adj, weights = NULL, ratio = TRUE) {
    if (is.null(weights)) {
        weights <- rep(1, ncol(data))
    }
    if (ratio) {
        score <- eigenMapMatMult(data, adj*weights)
    } else {
        if (max(data) == 1) {
            score <- -dist2(data, t(adj)*weights)
        } else {
            score <- -dist2(data, t((adj*mean(data))*weights))
        }
    }
    return(score)
}
#' @noRd
adj2dnf <- function(A) {
    dnf <- NULL
    for (i in seq_len(ncol(A))) {
        dnf <- c(dnf, rownames(A))
        for (j in seq_len(nrow(A))) {
            if (A[i, j] == 1) {
                dnf <- c(dnf, paste(colnames(A)[i], rownames(A)[j], sep = "="))
            }
            if (A[i, j] == -1) {
                dnf <- c(dnf, paste("!", colnames(A)[i], "=", rownames(A)[j],
                                    sep = ""))
            }
        }
    }

    dnf <- unique(dnf)

    return(dnf)

}
#' @noRd
#' @useDynLib mnem
#' @importFrom Rcpp sourceCpp
mytc <- function(x, u = NULL, v = NULL) {
    diag(x) <- 1
    if (is.null(u) | is.null(v)) {
        y <- matrix(transClose_W(x), nrow(x), ncol(x))
    } else {
        if (x[u,v] == 1) {
            y <- matrix(transClose_Ins(x, u, v), nrow(x), ncol(x))
        }
        if (x[u,v] == 0) {
            y <- matrix(transClose_Del(x, u, v), nrow(x), ncol(x))
        }
    }
    y[which(y != 0)] <- 1
    rownames(y) <- rownames(x)
    colnames(y) <- colnames(x)
    return(y)
}
#' @noRd
simulateDnf <- function(dnf, stimuli = NULL, inhibitors = NULL) {
    getStateDnf <- function(node, signalStates, graph, children = NULL) {
        graphCut <- graph[grep(paste("=", node, "$", sep = ""), graph)]
        if (length(graphCut) == 0) {
            signalStates[, node] <- 0
        } else {
            sop <- numeric(nrow(signalStates))
            children2 <- gsub("!", "", children)
            for (i in graphCut) {
                parents <- gsub("=.*$", "", unlist(strsplit(i, "\\+")))
                pob <- rep(1, nrow(signalStates))
                for (j in parents) {
                    j2 <- gsub("!", "", j)
                    if (sum(is.na(signalStates[, j2]) == TRUE) ==
                        length(signalStates[, j2])) {
                        if (j %in% j2) {
                            node2 <- node
                            add1 <- 0
                        } else {
                            node2 <- paste("!", node, sep = "")
                            add1 <- 1
                        }
                        if (j2 %in% children2) {
                            subGraph <- graph[
                                -grep(paste(".*=", node, "|.*",
                                            j2, ".*=.*", sep = ""), graph)]
                            signalStatesTmp <- getStateDnf(
                                node = j2,
                                signalStates = signalStates,
                                graph = subGraph,
                                children = NULL)
                            ifa <- children[
                                which(children2 %in% j2):length(children2)]
                            ifb <- (length(grep("!", ifa)) + add1)/2
                            ifc <- children[
                                which(children2 %in% j2):length(children2)]
                            if (ifb != ceiling((length(grep("!", ifc)) +
                                                add1)/2)) {
                            } else {
                            }
                            if (add1 == 0) {
                                pobMult <- signalStatesTmp[, j2]
                            } else {
                                pobMult <- add1 - signalStatesTmp[, j2]
                            }
                        } else {
                            signalStates <-
                                getStateDnf(node = j2,
                                            signalStates = signalStates,
                                            graph = graph,
                                            children = unique(c(children,
                                                                node2)))
                            if (add1 == 0) {
                                pobMult <- signalStates[, j2]
                            } else {
                                pobMult <- add1 - signalStates[, j2]
                            }
                        }
                        pob <- pob*pobMult
                    } else {
                        if (j %in% j2) {
                            add1 <- 0
                        } else {
                            add1 <- 1
                        }
                        if (add1 == 0) {
                            pobMult <- signalStates[, j2]
                        } else {
                            pobMult <- add1 - signalStates[, j2]
                        }
                        pob <- pob*pobMult
                    }
                    if (max(pob, na.rm = TRUE) == 0) { break() }
                }
                sop <- sop + pob
                if (min(sop, na.rm = TRUE) > 0) { break() }
            }
            sop[sop > 0] <- 1
            if (node %in% inhibitors) {
                sop <- sop*0
            }
            if (node %in% stimuli) {
                sop <- max(sop, 1)
            }
            signalStates[, node] <- sop
        }
        return(signalStates)
    }
    signals <-
        unique(gsub("!", "",
                    unlist(strsplit(
                        unlist(strsplit(dnf, "=")), "\\+"))))
    graph <- dnf
    signalStates <- matrix(NA, nrow = 1, ncol = length(signals))
    rownames(signalStates) <- paste(c("stimuli:", stimuli, "inhibitors:",
                                      inhibitors), collapse = " ")
    colnames(signalStates) <- signals
    signalStates[which(signals %in% stimuli)] <- 1
    for (k in signals) {
        if (is.na(signalStates[, k]) == TRUE) {
            signalStates <- getStateDnf(node = k, signalStates = signalStates,
                                        graph = graph, children = NULL)
        }
    }
    namestmp <- colnames(signalStates)
    signalStates <- as.vector(signalStates)
    names(signalStates) <- namestmp
    return(signalStates = signalStates)
}
#' @noRd
dnf2adj <- function(dnf) {
    nodes <- unique(sort(unlist(strsplit(dnf,"="))))
    adj <- matrix(0,length(nodes),length(nodes))
    rownames(adj) <- colnames(adj) <- nodes
    for (i in dnf) {
        nodes <- unlist(strsplit(i,"="))
        adj[nodes[1],nodes[2]] <- adj[nodes[1],nodes[2]] + 1
    }
    return(adj)
}
#' @noRd
#' @importFrom graphics abline
addgrid <- function(x = c(0,1,0.1), y = c(0,1,0.1), lty = 2,
                    col = rgb(0.5,0.5,0.5,0.5)) {
    abline(h=seq(x[1], x[2], x[3]), col = col, lty = lty)
    abline(v=seq(y[1], y[2], y[3]), col = col, lty = lty)
}

#' MCMC function
#' @param Nems number of components for the mixture to be inferred
#' @param Sgenes number of Sgenes in the data set
#' @param data data matrix with non-unique column names (corresponding to
#'  the Sgenes) for the cells indexing the columns and the Egenes indexing
#'   the rows. The modData function is not applied within the MCMC function,
#'    but in the mnem function that wraps around it.
#' @param stop_counter number of iterations for algorithm runtime
#' @param burn_in number of iterations to be discarded prior to analyzing
#'  the posterior distribution
#' @param probs matrix with initial cell probabilities if available
#' @param starts how many times the MCMC is restarted for the same data
#'  set
#' @param Hastings if set to TRUE, the Hastings ratio is calculated
#' @param Initialize if set to "random", the initial Phi has edges added
#'  by sampling from a uniform distribution. If set to "empty", the initial
#'   Phi is empty
#' @param NodeSwitching if set to TRUE, node switching is allowed as a move,
#'  additional to the edge moves
#' @param EM_NEM if set to TRUE, the EM and NEM are each run 10 times to
#'  infer the network mixture and the resulting lilelihoods saved to compare
#'   them to the MCMC
#' @param post_gaps can be set to 1, 10 or 100. Determines after how many
#'  iterations the next Phi mixture is added to the Phi edge Frequency tracker
#' @param penalized if set to TRUE, the penalized likelihood will be used.
#'  Per default this is FALSE, since no component learning is involved and
#'   sparcity is hence not enforced
#' @param logtype log base of the data
#' @param marginal logical to compute the marginal likelihood (TRUE)
#' @param complete if TRUE, complete data log likelihood is considered (for
#' very large data sets, e.g. 1000 cells and 1000 E-genes)
#' @param accept_range the random probability the acceptance probability
#' is compared to (default: 1)
#' @author Viktoria Brunner
#' @noRd
#' @return object of class mcmc
MCMC <- function(Nems=2, Sgenes=5, data, stop_counter=30000, burn_in=10000,
                 probs=NULL, starts=3, Hastings=TRUE, Initialize="random",
                 phi = NULL, NodeSwitching=TRUE, EM_NEM=TRUE, post_gaps=10,
                 penalized=FALSE, logtype = 2, marginal = FALSE,
                 complete = FALSE, accept_range = 1){
    if (burn_in >= stop_counter) {
        burn_in <- floor(0.1*stop_counter)
    }
    if (!is.null(phi)) {
        Initialize <- 'prior'
        starts <- length(phi)
    }
    k <- Nems
    n <- Sgenes
    #initialize saving entities (outside loop)
    ll_track <- list()
    mw_track <- list()
    ll_best_timer_saved <- list()
    ll_best_vector_saved <- list()
    switch_counter_saved <- list()
    Phi_track_saved <- list()
    Phi_best_saved <- list()
    for (i in 1:starts){
        Phi_track_saved[[i]] <- list()
        Phi_best_saved[[i]] <- list()
    }
    run <- 0
    max.len <- 0
    for (s in 1:starts){
        if (!is.null(phi[[s]])) {
            k <- length(phi[[s]])
        }
        # set convergence criteria
        counter <- 0
        counter_accept <- 0
        stop <- FALSE
        ll_tracker <- mw_tracker <- rep(0,stop_counter)
        ll_time <- mw_time <- c(1:(length(ll_tracker)))
        ll_best_vector <- vector()
        ll_best_timer <- vector()
        accept <- FALSE
        run <- run+1
        accept_min <- seq(1,k,1)
        switch_counter <- vector()
        #Initialize mw as uniform and probs as empty if not given otherwise
        if (is.null(probs)){
            probs <- matrix(0, k, ncol(data))
        }
        mw_old <- rep(1/k, k)
        #Initialize Phi
        Topology <- list()
        for (i in 1:k){
            Topology[[i]] <- list()
            dimnames<-list(1:n,1:n)
            if (Initialize == 'networks'){
                tmp <- matrix(sample(0:1, n*n, replace=TRUE), n, n)
                rownames(tmp) <- colnames(tmp) <- sample(seq_len(n), n)
                tmp[lower.tri(tmp)] <- 0
                tmp <- tmp[naturalorder(rownames(tmp)),
                           naturalorder(rownames(tmp))]
                Topology[[i]]$adj <- tmp
            } else if (Initialize == 'empty') {
                Topology[[i]]$adj <- matrix(0, n , n, dimnames=dimnames)
            } else if (Initialize == 'prior') {
                Topology[[i]]$adj <- phi[[s]][[i]]
            }
            data1 <- t(t(data)*getAffinity(probs, mw = mw_old,
                                           complete = FALSE)[i,])
            P <- cbind(0,data1%*%t(getRho(data1))%*%transitive.closure(
                Topology[[i]]$adj))
            Topology[[i]]$subtopo <- max.col(P)-1
            thetarand <- TRUE
        }
        probs <- getProbs(probs, k=k, data=data, mw=mw_old, res=Topology,
                          complete = complete, marginal = marginal)
        ll_old <- ll_best <- probs$ll
        probs <- probs_best <- probs$probs
        #penalized ll
        if (penalized==TRUE){
            s <- 0
            for (i in 1:k){
                theta <- theta2theta(Topology[[i]]$subtopo,Topology[[i]]$adj)
                s <- s+sum(Topology[[i]]$adj)+sum(theta)+k-1
            }
            ll_old <- ll_best <- (log(n)*s)-2*(getLL(probs, mw=mw_old,
                                                     logtype=2,
                                                     complete = complete))
        }
        #initialize Phi
        Phi_best <- list()
        old_Phi <- list()
        norm_post <- list()
        for (i in 1:k){
            Phi_best[[i]] <- matrix(0, n , n, dimnames=dimnames)
        }
        #initialize Phi edge frequency trackers
        Phi_track <- Phi_track_N <- Phi_best
        #start MCMC
        while (!stop){
            accept <- FALSE
            #sample component to change
            which_k <- sample(1:k,1) # different distribution?
            #change Phi in component by edge reversal/ adding/ removing or node
            #switching
            Phi_new <- Topology[[which_k]]$adj
            #prevent cycle by calculating transitively closed of old matrix and
            #its inverse and omit them from edge sampling
            closed_Phi <- transitive.closure(Phi_new)
            closed_trans_Phi <- closed_Phi + t(closed_Phi)
            closed_trans_Phi[which(closed_trans_Phi != 0)] <- 1
            omit_edge <- which(closed_trans_Phi != Phi_new)
            omit_length <- length(Phi_new)-length(omit_edge)
            prob_vector <- rep(1/omit_length, length(Phi_new))
            prob_vector[omit_edge] <- 0
            #calulate move probabilities
            node_switch <- choose(n, 2)
            prob_switch <- node_switch/(node_switch + omit_length)
            action <- c("node","edge")
            do <- sample(action, size=1, prob=c(prob_switch, 1-prob_switch))
            #node switching
            if (do=="node" && NodeSwitching==TRUE){
                prob <- rep(1/node_switch, n)
                nodes <- seq(1, n, 1)
                do <- sample(nodes, size=2, prob=prob)
                Phi_new[c(do[1],do[2]),]  <- Phi_new[c(do[2],do[1]),]
                Phi_new[,c(do[1],do[2])]  <- Phi_new[,c(do[2],do[1])]
            }
            #edge sampling: sample edge w/o the omitted ones
            else{
                Phi_sample <- Phi_new
                names(Phi_sample) <- seq_along(Phi_sample)
                edge <- as.numeric(names(sample(x=Phi_sample, size=1,
                                                prob=prob_vector)))
                ##check for existing edge
                curr_edge <- Topology[[which_k]]$adj[edge]
                ##change respective edge in Phi
                if (curr_edge==0){
                    Phi_new[edge]<-1
                } else{
                    action<-sample(1:2,1)
                    if (action==1){
                        Phi_new[edge]<-0
                    } else{
                        Phi_new[edge]<-0
                        Phi_new<-t(Phi_new)
                        Phi_new[edge]<-1
                        Phi_new<-t(Phi_new)
                    }
                }
            }
            #calculate Hastings ratio if desired
            if (Hastings){
                if (NodeSwitching==FALSE) { node_switch <- 0 }
                hastings_X_Y <- 1/(omit_length+node_switch)
                closed_Phi <- transitive.closure(Phi_new)
                closed_trans_Phi <- closed_Phi + t(closed_Phi)
                closed_trans_Phi[which(closed_trans_Phi != 0)] <- 1
                omit_edge <- which(closed_trans_Phi != Phi_new)
                omit_length <- length(Phi_new)-length(omit_edge)
                hastings_Y_X <- 1/(omit_length+node_switch)
            }
            #insert Phi into new test list
            Topology_test <- Topology
            Topology_test[[which_k]]$adj <- Phi_new
            #Calculate MAP for Theta
            Theta<-list()
            for (i in 1:k){
                data1 <- t(t(data)*getAffinity(probs, mw = mw_old,
                                               complete = complete)[i,]) # daten R_k
                P <- cbind(0,data1%*%t(getRho(data1))%*%transitive.closure(
                    Topology_test[[i]]$adj))
                Theta[[i]] <- max.col(P)-1
                Topology_test[[i]]$subtopo <- Theta[[i]]
            }
            #Calculate new probabilities based on new Phi and Theta in test list
            probs_new <- getProbs(probs, k=k, data=data, res=Topology_test,
                                  mw = mw_old, complete = complete, marginal = marginal)
            #extract new mixture likelihood
            if (penalized==TRUE){
                s <- 0
                for (i in 1:k){
                    theta <- theta2theta(
                        Topology_test[[i]]$subtopo,Topology_test[[i]]$adj)
                    s<-s+sum(Topology_test[[i]]$adj)+sum(theta)+k-1
                }
                ll_new <- (log(n)*s)-2*(getLL(probs_new$probs, mw=mw_old,
                                              logtype=logtype,
                                              complete = complete))
            } else {
                ll_new <- probs_new$ll
            }
            #calculate metropolis ratio of mixture likelihood's
            Metr_ratio <- logtype^(ll_new - ll_old)
            if (Hastings){
                Hastings_ratio <- hastings_X_Y/hastings_Y_X
            } else{
                Hastings_ratio <- 1
            }
            #Set acceptance probability of new mixture
            Acceptance_prob <- min(Metr_ratio*Hastings_ratio, 1)
            #for penalized likelihood: (what is going on here?)
            if (penalized==TRUE){
                if (ll_new>0){
                    Acceptance_prob <- 0
                }
            }
            if (Acceptance_prob > (runif(1, 0, accept_range))){
                accept <- TRUE
                counter_accept <- counter_accept +1
                Topology <- Topology_test
                #test if new likelihood better than old
                if ((ll_new>ll_best&penalized==FALSE) |
                    (ll_new<ll_best&penalized==TRUE)) {
                    ll_best <- ll_new
                    ll_best_vector <- append(ll_best_vector, ll_new)
                    ll_best_timer <- append(ll_best_timer, counter)
                    #remember best Phi
                    for (i in 1:k){
                        Phi_best[[i]] <- Topology[[i]]$adj
                        old_Phi[[i]] <- Topology[[i]]$adj
                    }
                    probs_best <- probs_new
                } else {
                    #for Phi counter: remember old Phi for hd calculation
                    for (i in 1:k){
                        old_Phi[[i]] <- Topology[[i]]$adj
                    }
                }
                ll_old <- ll_new
                mw_old <- probs_new$mw
                probs <- probs_new$probs
            }
            #tracking of Phi's
            if (counter>burn_in){
                # if new Phi accepted, calculate NORM of topology and topology_test
                # matrices and add new matrix to most similar old matrix
                dimnames <- dimnames<-list(1:k,1:k)
                hd <- matrix(0, k , k, dimnames=dimnames)
                for (i in 1:k){
                    for (j in 1:k){
                        if (i == j) { next() }
                        if (Phi_track[[i]][1,1]!=0){
                            hd[i,j] <- norm((Phi_track[[i]]/Phi_track[[i]][1,1])-
                                                transitive.closure(Topology[[j]]$adj),
                                            type = "2")
                        } else{
                            hd[i,j] <- norm(Phi_track[[i]]-transitive.closure(
                                Topology[[j]]$adj), type = "2")
                        }
                    }
                }
                #choose which new phi is added to which old phi -> only accept
                #if unambigous
                hd <- as.data.frame(hd)
                minHd <- apply( hd, 1, which.min)
                if (sum(duplicated(minHd)) == FALSE){
                    #keep track of component switches
                    if (sum(accept_min != minHd)!=FALSE) {
                        switch_counter <- append(switch_counter, counter)
                        accept_min <- minHd
                    }
                }
                #add up matrices
                for (i in 1:k){
                    Phi_track[[i]] <- Phi_track[[i]] + transitive.closure(
                        Topology[[accept_min[i]]]$adj)
                    if (counter_accept%%post_gaps == 0){
                        Phi_track_N[[i]] <- Phi_track_N[[i]] + transitive.closure(
                            Topology[[accept_min[i]]]$adj)
                    }
                }
            }
            counter <- counter+1
            #stop MCMC
            if (counter==stop_counter) {
                stop <- TRUE
            }
            ll_tracker[[counter]] <- ll_old
            mw_tracker[[counter]] <- mw_old[[1]]
        }
        #save likelihood evolution for trace plot
        ll_track[[run]] <- ll_tracker
        #save data for mw_tracking
        mw_track[[run]] <- mw_tracker
        #save component switching
        switch_counter_saved[[run]] <- switch_counter
        #save ll_best from current run
        ll_best_timer_saved[[run]]<- ll_best_timer
        ll_best_vector_saved[[run]] <- ll_best_vector
        max.len <- max(max.len, ll_best_timer_saved[[run]])
        #save Phi_freq/ posterior and best_Phi
        for (i in 1:k){
            Phi_track_saved[[run]][[i]]<-list()
            Phi_track_saved[[run]][[i]]$all <- Phi_track[[i]]
            Phi_track_saved[[run]][[i]]$N <- Phi_track_N[[i]]
            Phi_best_saved[[run]][[i]] <- Phi_best[[i]]
            #choose Phi frequency matrix (1, 10, 100)
            #choose which posterior to use (gap size of saved Phi's)
            norm_post[[i]] <- Phi_track_N[[i]]/(Phi_track_N[[1]][1,1])
            bin_post <- norm_post
            bin_post[[i]][bin_post[[i]] <= 0.5] <- 0
            bin_post[[i]][bin_post[[i]] > 0.5] <- 1
        }
    }
    #create object to return from mcmc
    mcmc <- list()
    mcmc$trace_time <- seq(1:stop_counter)
    mcmc$trace <- list()
    mcmc$trace_mw <- list()
    mcmc$comp_switches <- list()
    mcmc$best_ll <- list()
    mcmc$best_ll_time <- list()
    mcmc$posterior <- list()
    mcmc$best_phi <- list()
    for (i in 1:starts){
        mcmc$trace[[i]] <- ll_track[[i]]
        mcmc$trace_mw[[i]] <- mw_track[[i]]
        mcmc$comp_switches[[i]] <- switch_counter_saved[[i]]
        mcmc$best_ll[[i]] <- ll_best_vector_saved[[i]]
        mcmc$best_ll_time[[i]] <- ll_best_timer_saved[[i]]
        mcmc$best_ll_length <- max.len
        mcmc$posterior[[i]] <- Phi_track_saved[[i]]
        mcmc$best_phi[[i]] <- Phi_best_saved[[i]]
    }
    return(mcmc)
}