nempi: Inferring unobserved perturbations from gene expression data

## Sgenes <- 10; wrong <- 0; highnoise <- 1; doclass <- 1; knowns <- 80; runs2 <- NA; nCells <- NA; perturb <- 0.5

library(naturalsort)
library(nem)
library(cluster)
library(Rcpp)
library(e1071)
library(nnet)
library(randomForest)
library(missForest)
library(class)
library(CALIBERrfimpute)

## source("~/Documents/mnem/R/mnems.r"); source("~/Documents/mnem/R/mnems_low.r"); sourceCpp("~/Documents/mnem/src/mm.cpp"); source("~/Documents/nempi/R/nempi_main.r"); source("~/Documents/nempi/R/nempi_low.r")

## uncomment for leo/euler:
source("mnems.r")
source("mnems_low.r")
## sourceCpp("mm.cpp")
sourceCpp(code=readChar("mm.cpp", file.info("mm.cpp")$size))
source("nempi_main.r")
source("nempi_low.r")
Sgenes <- as.numeric(commandArgs(TRUE)[1])
wrong <- as.numeric(commandArgs(TRUE)[2])
highnoise <- as.numeric(commandArgs(TRUE)[3])
doclass <- as.numeric(commandArgs(TRUE)[4])
knowns <- as.numeric(commandArgs(TRUE)[5])
runs2 <- as.numeric(commandArgs(TRUE)[6])
nCells <- as.numeric(commandArgs(TRUE)[7])
perturb <- as.numeric(commandArgs(TRUE)[8])

##

if (is.na(knowns)) { knowns <- Sgenes }

if (is.na(perturb)) { perturb <- 0 }

docompete <- 1

Egenes <- 10
if (is.na(nCells)) {
    if (knowns < Sgenes) {
        nCells <- 1000
    } else {
        nCells <- Sgenes*10*2
    }
}

runs <- 100
noises <- c(1,3,5)
lost <- c(0.1, 0.5, 0.9)
multi <- c(0.2, 0.1)

complete <- 1
combi <- 1
combisvm <- NULL
null <- TRUE
badCells <- 0.1

if (wrong) { lost <- lost[1:2] }

if (knowns < Sgenes) { lost <- lost[c(1,3)] }

result <- array(0, c(runs, length(noises), length(lost), 14, 9), list(rep("runs", runs), noises, lost, c("net", "tp", "fp", "cor", "time", "nullsamples", "tn", "fn", "tp2", "fp2", "tn2", "fn2", "theta", "auc"), c("nempi", "svm", "nnet", "rf", "empty", "random", "missForest", "mice", "knn")))

getfalse <- 0

for (i in 1:runs) {
    if (!is.na(runs2)) {
        if (i != runs2) {
            next()
        }
    }
    for (j in 1:length(noises)) {

        ## if (knowns < Sgenes & j == 2) { next() }

        for (k in 1:length(lost)) {

            ## i <- j <- 1; k <- 1

            if (knowns < Sgenes) {
                Sgenes2 <- sort(sample(Sgenes, knowns))
                Sgenes3 <- seq_len(Sgenes)[-Sgenes2]
            } else {
                Sgenes2 <- 1:Sgenes
            }
            noise1 <- noises[j]
            if (!wrong) {
                noise2 <- lost[k]
            } else {
                noise2 <- 0.5
            }

            sim <- simData(Sgenes = Sgenes, Egenes = Egenes, mw = 1,
                           nCells = nCells, Nem = 1, multi = multi,
                           badCells = floor(nCells*badCells),
                           uninform = floor(Egenes*Sgenes*0.1),
                           exactProb = TRUE, edgeprob = runif(1,0,1))
            if (!all(1:Sgenes %in% unique(unlist(strsplit(colnames(sim$data), "_"))))) {
                kdata <- t(sim$Nem[[1]])
                kdata <- rbind(kdata[rep(1:nrow(kdata), each = Egenes), , drop = FALSE],
                               matrix(sample(c(0,1), Sgenes*floor(Egenes*Sgenes*0.1), replace = TRUE), floor(Egenes*Sgenes*0.1)))
                sim$data <- cbind(sim$data[, -(1:ncol(kdata))], kdata)
            }
            sdata <- sim$data
            sdata[which(sim$data == 1)] <- rnorm(sum(sim$data == 1), 1, noise1)
            sdata[which(sim$data == 0)] <- rnorm(sum(sim$data == 0), -1, noise1)

            stop <- FALSE
            while(!stop) {
                lost2 <- sort(sample(1:ncol(sdata), floor(noise2*ncol(sdata))))
                if (knowns < Sgenes & nCells < Sgenes*10) {
                    for (s in Sgenes2) {
                        if (!(s %in% colnames(sdata)[-lost2])) {
                            lost2 <- lost2[-which(lost2 %in% grep(paste(c(paste0("^", s, "_"),
                                            paste0("_", s, "$"),
                                            paste0("_", s, "_"),
                                            paste0("^", s, "$")), collapse = "|"), colnames(sdata)))[1]]
                        }
                    }
                }
                if (highnoise) {
                    if (all(Sgenes2 %in% unique(unlist(strsplit(colnames(sdata)[-lost2], "_"))))) { stop <- TRUE }
                } else {
                    if (all(Sgenes2 %in% unique(colnames(sdata)[-lost2]))) { stop <- TRUE }
                }
            }
            colnames(sdata)[lost2] <- ""
            if (wrong) {
                multi2 <- NULL
                if (multi[1] != FALSE) { multi2 <- multi }
                sdata2 <- sdata
                stop <- FALSE
                while(!stop) {
                    sdata <- sdata2
                    colnames(sdata)[sample(which(!(colnames(sdata) %in% "")), floor(lost[k]*sum(!(colnames(sdata) %in% ""))))] <- paste(naturalsort(sample(1:Sgenes, sample(1:(length(multi2)+1), 1))), collapse = "_")
                    lost2 <- sort(sample(1:ncol(sdata), floor(noise2*ncol(sdata))))
                    if (highnoise) {
                        if (all(Sgenes2 %in% unique(unlist(strsplit(colnames(sdata)[-lost2], "_"))))) { stop <- TRUE }
                    } else {
                        if (all(Sgenes2 %in% unique(colnames(sdata)[-lost2]))) { stop <- TRUE }
                    }
                }
            }
            D <- sdata

            if (multi[1] != FALSE) {
                multi2 <- TRUE
            }
            paras <- expand.grid(c(0,1), c(0,1), c(0,1))
            if (knowns < Sgenes) {
                colnames(D) <- gsub(paste(paste0("_", Sgenes3, "_"), collapse = "|"), "_", colnames(D))
                colnames(D) <- gsub(paste(c(paste0("^", Sgenes3, "_"),
                                            paste0("_", Sgenes3, "$"),
                                            paste0("_", Sgenes3, "_"),
                                            paste0("^", Sgenes3, "$")), collapse = "|"), "", colnames(D))
                colnames(D) <- gsub(paste(c(paste0("^", Sgenes3, "_"),
                                            paste0("_", Sgenes3, "$"),
                                            paste0("_", Sgenes3, "_"),
                                            paste0("^", Sgenes3, "$")), collapse = "|"), "", colnames(D))
                full <- mytc(sim$Nem[[1]])[Sgenes3, Sgenes2]
                Sgenes4 <- rownames(full)[which(apply(full, 1, sum) == 0)]
            } else {
                Sgenes4 <- 1:Sgenes
            }

            ## source("~/Documents/nempi/R/nempi_main.r"); source("~/Documents/nempi/R/nempi_low.r")

            s <- 2
            type <- "null"
            if (perturb > 0) {
                pertphi <- mytc(sim$Nem[[1]])
                ppo <- order(apply(mytc(pertphi), 1, sum)-apply(mytc(pertphi), 2, sum),decreasing=TRUE)
                pertphi <- pertphi[ppo, ppo]
                pp0 <- which(upper.tri(pertphi) & pertphi == 0)
                pp1 <- which(upper.tri(pertphi) & pertphi == 1)
                if (length(pp0)!=0) {
                    if (length(pp1)!=0) {
                        pertphi[sample(pp0,min(length(pp0),floor(perturb*length(pp1))))] <- 1
                    } else {
                        pertphi[sample(pp0,1)] <- 1
                    }
                }
                pertphi <- pertphi[naturalorder(rownames(pertphi)), naturalorder(colnames(pertphi))]
                start <- as.numeric(format(Sys.time(), "%s"))
                ures <- nempi(D, full = paras[s, 1], type = type, soft = paras[s+2, 2], complete = complete, null = null, phi = pertphi)
                s <- 1
                result[i, j, k, 5, s] <- as.numeric(format(Sys.time(), "%s")) - start
            } else if (perturb < 0) {
                pertphi <- mytc(sim$Nem[[1]])
                ppo <- order(apply(mytc(pertphi), 1, sum)-apply(mytc(pertphi), 2, sum),decreasing=TRUE)
                pertphi <- pertphi[ppo, ppo]
                pp1 <- which(upper.tri(pertphi) & pertphi == 1)
                pertphi[sample(pp1,floor(abs(perturb)*length(pp1)))] <- 0
                pertphi <- pertphi[naturalorder(rownames(pertphi)), naturalorder(colnames(pertphi))]
                start <- as.numeric(format(Sys.time(), "%s"))
                ures <- nempi(D, full = paras[s, 1], type = type, soft = paras[s+2, 2], complete = complete, null = null, phi = pertphi)
                s <- 1
                result[i, j, k, 5, s] <- as.numeric(format(Sys.time(), "%s")) - start
            } else {
                start <- as.numeric(format(Sys.time(), "%s"))
                ures <- nempi(D, full = paras[s, 1], type = type, soft = paras[s+2, 2], complete = complete, null = null)
                s <- 1
                result[i, j, k, 5, s] <- as.numeric(format(Sys.time(), "%s")) - start
            }
            if (knowns < Sgenes) {
                tmp <- pifit(ures, sim, D, knowns = Sgenes2)
            } else {
                tmp <- pifit(ures, sim, D)
            }
            result[i, j, k, 1, s] <- tmp$net
            result[i, j, k, 2, s] <- tmp$knownRates[1]
            result[i, j, k, 3, s] <- tmp$knownRates[2]
            result[i, j, k, 7, s] <- tmp$knownRates[3]
            result[i, j, k, 8, s] <- tmp$knownRates[4]
            result[i, j, k, 9, s] <- tmp$uknownRates[1]
            result[i, j, k, 10, s] <- tmp$uknownRates[2]
            result[i, j, k, 11, s] <- tmp$uknownRates[3]
            result[i, j, k, 12, s] <- tmp$uknownRates[4]
            result[i, j, k, 13, s] <- tmp$subtopo
            result[i, j, k, 14, s] <- tmp$auc
            result[i, j, k, 4, s] <- tmp$cor
            result[i, j, k, 6, s] <- sum(apply(ures$Gamma, 2, sum) < 0.5 & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)
            if (!all(ures$lls[2:length(ures$lls)] - ures$lls[1:(length(ures$lls)-1)] >= 0)) { getfalse <- getfalse + 1 }
            ## print("nempi")

            ## result[i, j, k, , s]

            ## if (getfalse > 0) { stop() }

            ## source("~/Documents/nempi/R/nempi_main.r"); source("~/Documents/nempi/R/nempi_low.r");

            if (doclass) {
                for (s in c(2,4,6)) {
                    start <- as.numeric(format(Sys.time(), "%s"))
                    if (s %in% 1:2) {
                        ures <- classpi(D, full = paras[s, 1])
                    }
                    if (s %in% 3:4) {
                        ures <- classpi(D, full = paras[s, 1], method = "nnet", size = 2)
                    }
                    if (s %in% 5:6) {
                        ures <- classpi(D, full = paras[s, 1], method = "randomForest")
                    }
                    result[i, j, k, 5, s+2] <- as.numeric(format(Sys.time(), "%s")) - start
                    if (knowns < Sgenes) {
                        tmp <- pifit(ures, sim, D, propagate = 0, knowns = Sgenes2)
                    } else {
                        tmp <- pifit(ures, sim, D, propagate = 0)
                    }
                    s2 <- s/2+1
                    result[i, j, k, 1, s2] <- tmp$net
                    result[i, j, k, 2, s2] <- tmp$knownRates[1]
                    result[i, j, k, 3, s2] <- tmp$knownRates[2]
                    result[i, j, k, 7, s2] <- tmp$knownRates[3]
                    result[i, j, k, 8, s2] <- tmp$knownRates[4]
                    result[i, j, k, 9, s2] <- tmp$uknownRates[1]
                    result[i, j, k, 10, s2] <- tmp$uknownRates[2]
                    result[i, j, k, 11, s2] <- tmp$uknownRates[3]
                    result[i, j, k, 12, s2] <- tmp$uknownRates[4]
                    result[i, j, k, 13, s2] <- tmp$subtopo
                    result[i, j, k, 14, s2] <- tmp$auc
                    result[i, j, k, 4, s2] <- tmp$cor
                    result[i, j, k, 6, s2] <- sum(apply(ures$Gamma, 2, sum) < 0.5 & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)
                    sum(apply(ures$Gamma, 2, max) < 1 - apply(ures$Gamma, 2, sum) & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)
                }
            }
            ## print("class")

            ## random:
            start <- as.numeric(format(Sys.time(), "%s"))
            D2 <- D
            if (knowns < Sgenes) {
                colnames(D2)[which(colnames(D) %in% "")] <- sample(Sgenes2, sum(colnames(D) %in% ""), replace = TRUE)
            } else {
                colnames(D2)[which(colnames(D) %in% "")] <- sample(1:Sgenes, sum(colnames(D) %in% ""), replace = TRUE)
            }
            tmp <- mynem(D2, multi = TRUE)
            Gamma <- getGamma(D2)
            ures <- list()
            ures$Gamma <- apply(Gamma, 2, function(x) return(x/sum(x)))
            ures$res <- list()
            ures$res$adj <- tmp$adj
            ures$null <- TRUE
            ures$combi <- 1
            result[i, j, k, 5, 6] <- as.numeric(format(Sys.time(), "%s")) - start
            if (knowns < Sgenes) {
                tmp <- pifit(ures, sim, D, propagate = 0, knowns = Sgenes2)
            } else {
                tmp <- pifit(ures, sim, D, propagate = 0)
            }
            result[i, j, k, 1, 6] <- tmp$net
            result[i, j, k, 2, 6] <- tmp$knownRates[1]
            result[i, j, k, 3, 6] <- tmp$knownRates[2]
            result[i, j, k, 7, 6] <- tmp$knownRates[3]
            result[i, j, k, 8, 6] <- tmp$knownRates[4]
            result[i, j, k, 9, 6] <- tmp$uknownRates[1]
            result[i, j, k, 10, 6] <- tmp$uknownRates[2]
            result[i, j, k, 11, 6] <- tmp$uknownRates[3]
            result[i, j, k, 12, 6] <- tmp$uknownRates[4]
            result[i, j, k, 13, 6] <- tmp$subtopo
            result[i, j, k, 14, 6] <- tmp$auc
            result[i, j, k, 4, 6] <- tmp$cor
            result[i, j, k, 6, 6] <- sum(apply(ures$Gamma, 2, sum) < 0.5 & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)

            if (docompete) {
                ## empty:

                D2 <- D[, which(colnames(D) != "")]
                Rho <- getRho(D2)
                tmp <- mynem(D2, Rho = Rho, multi = TRUE)
                n <- Sgenes
                A <- mytc(tmp$adj)
                B <- mytc(sim$Nem[[1]])
                if (knowns < Sgenes) {
                    B <- B[which(rownames(B) %in% Sgenes2), which(colnames(B) %in% Sgenes2)]
                }
                result[i, j, k, 1, 5] <- (n*(n-1) - sum(abs(A - B)))/(n*(n-1))

                ## missForest

                start <- as.numeric(format(Sys.time(), "%s"))
                mfdata <- cbind(as.data.frame(t(D)), factor(colnames(D)))
                mfdata[which(mfdata == "", arr.ind = TRUE)] <- NA
                sink("NUL")
                mfimp <- missForest(mfdata)
                sink()
                D2 <- D
                colnames(D2) <- mfimp$ximp[, ncol(mfimp$ximp)]
                tmp <- mynem(D2, multi = TRUE)
                Gamma <- getGamma(D2)
                ures <- list()
                ures$Gamma <- apply(Gamma, 2, function(x) return(x/sum(x)))
                ures$res <- list()
                ures$res$adj <- tmp$adj
                ures$null <- TRUE
                ures$combi <- 1
                result[i, j, k, 5, 7] <- as.numeric(format(Sys.time(), "%s")) - start
                if (knowns < Sgenes) {
                    tmp <- pifit(ures, sim, D, propagate = 0, knowns = Sgenes2)
                } else {
                    tmp <- pifit(ures, sim, D, propagate = 0)
                }
                result[i, j, k, 1, 7] <- tmp$net
                result[i, j, k, 2, 7] <- tmp$knownRates[1]
                result[i, j, k, 3, 7] <- tmp$knownRates[2]
                result[i, j, k, 7, 7] <- tmp$knownRates[3]
                result[i, j, k, 8, 7] <- tmp$knownRates[4]
                result[i, j, k, 9, 7] <- tmp$uknownRates[1]
                result[i, j, k, 10, 7] <- tmp$uknownRates[2]
                result[i, j, k, 11, 7] <- tmp$uknownRates[3]
                result[i, j, k, 12, 7] <- tmp$uknownRates[4]
                result[i, j, k, 13, 7] <- tmp$subtopo
                result[i, j, k, 14, 7] <- tmp$auc
                result[i, j, k, 4, 7] <- tmp$cor
                result[i, j, k, 6, 7] <- sum(apply(ures$Gamma, 2, sum) < 0.5 & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)
                ## print("missForest")

                ## mice:

                if (Sgenes <= 100) {
                    start <- as.numeric(format(Sys.time(), "%s"))
                    micedata <- mfdata
                    colnames(micedata) <- paste0(LETTERS[1:ncol(micedata)], 1:ncol(micedata))
                    sink("NUL")
                    miceres <- mice(micedata, method = c(rep('', ncol(micedata)-1), 'rfcat'), m = 1, maxit = 1)
                    sink()
                    D2 <- D
                    colnames(D2)[which(colnames(D2) %in% "")] <- as.character(miceres$imp[[length(miceres$imp)]][, 1])
                    tmp <- mynem(D2, multi = TRUE)
                    Gamma <- getGamma(D2)
                    ures <- list()
                    ures$Gamma <- apply(Gamma, 2, function(x) return(x/sum(x)))
                    ures$res <- list()
                    ures$res$adj <- tmp$adj
                    ures$null <- TRUE
                    ures$combi <- 1
                    result[i, j, k, 5, 8] <- as.numeric(format(Sys.time(), "%s")) - start
                    if (knowns < Sgenes) {
                        tmp <- pifit(ures, sim, D, propagate = 0, knowns = Sgenes2)
                    } else {
                        tmp <- pifit(ures, sim, D, propagate = 0)
                    }
                    result[i, j, k, 1, 8] <- tmp$net
                    result[i, j, k, 2, 8] <- tmp$knownRates[1]
                    result[i, j, k, 3, 8] <- tmp$knownRates[2]
                    result[i, j, k, 7, 8] <- tmp$knownRates[3]
                    result[i, j, k, 8, 8] <- tmp$knownRates[4]
                    result[i, j, k, 9, 8] <- tmp$uknownRates[1]
                    result[i, j, k, 10, 8] <- tmp$uknownRates[2]
                    result[i, j, k, 11, 8] <- tmp$uknownRates[3]
                    result[i, j, k, 12, 8] <- tmp$uknownRates[4]
                    result[i, j, k, 13, 8] <- tmp$subtopo
                    result[i, j, k, 14, 8] <- tmp$auc
                    result[i, j, k, 4, 8] <- tmp$cor
                    result[i, j, k, 6, 8] <- sum(apply(ures$Gamma, 2, sum) < 0.5 & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)
                    ## print("mice")
                }

                ## knn:

                start <- as.numeric(format(Sys.time(), "%s"))
                train <- t(D[, which(colnames(D) != "")])
                test <- t(D[, which(colnames(D) == "")])
                cl <- colnames(D)[which(colnames(D) != "")]
                knnres <- knn(train, test, cl, prob=TRUE)
                D2 <- D
                colnames(D2)[which(colnames(D2) %in% "")] <- as.character(knnres)
                tmp <- mynem(D2, multi = TRUE)
                Gamma <- getGamma(D2)
                ures <- list()
                ures$Gamma <- apply(Gamma, 2, function(x) return(x/sum(x)))
                ures$res <- list()
                ures$res$adj <- tmp$adj
                ures$null <- TRUE
                ures$combi <- 1
                result[i, j, k, 5, 9] <- as.numeric(format(Sys.time(), "%s")) - start
                if (knowns < Sgenes) {
                    tmp <- pifit(ures, sim, D, propagate = 0, knowns = Sgenes2)
                } else {
                    tmp <- pifit(ures, sim, D, propagate = 0)
                }
                result[i, j, k, 1, 9] <- tmp$net
                result[i, j, k, 2, 9] <- tmp$knownRates[1]
                result[i, j, k, 3, 9] <- tmp$knownRates[2]
                result[i, j, k, 7, 9] <- tmp$knownRates[3]
                result[i, j, k, 8, 9] <- tmp$knownRates[4]
                result[i, j, k, 9, 9] <- tmp$uknownRates[1]
                result[i, j, k, 10, 9] <- tmp$uknownRates[2]
                result[i, j, k, 11, 9] <- tmp$uknownRates[3]
                result[i, j, k, 12, 9] <- tmp$uknownRates[4]
                result[i, j, k, 13, 9] <- tmp$subtopo
                result[i, j, k, 14, 9] <- tmp$auc
                result[i, j, k, 4, 9] <- tmp$cor
                result[i, j, k, 6, 9] <- sum(apply(ures$Gamma, 2, sum) < 0.5 & colnames(sim$data) %in% Sgenes4)/sum(colnames(sim$data) %in% Sgenes4)
                ## print("knn")
            }

            ## result[i, j, k, ,]

        }
    }
    cat(paste0(i, "."))
}

if (is.na(runs2)) {
    if (wrong) {
        save(result, getfalse, lost, file = paste("unem_misslabeled", highnoise, complete, noise2, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
    } else {
        if (knowns < Sgenes) {
            save(result, getfalse, lost, file = paste("unem_missing_unknowns", knowns, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
        } else {
            save(result, getfalse, lost, file = paste("unem_missing", highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
        }
    }
} else {
    if (wrong) {
        save(result, getfalse, lost, file = paste("nempi_runs/unem_misslabeled", runs2, highnoise, complete, noise2, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
    } else {
        if (knowns < Sgenes) {
            save(result, getfalse, lost, file = paste("nempi_runs/unem_missing_unknowns", runs2, knowns, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
        } else {
            save(result, getfalse, lost, file = paste("nempi_runs/unem_missing", runs2, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
        }
    }
}

stop("done")

## testing:

## analyse ll decrease:

source("~/Documents/testing/R/nempi/nempi.r")
source("~/Documents/testing/R/nempi/nempi_low.r")
ures <- nempi(D, full = paras[s, 1], type = type, soft = paras[s, 3], combi = combi, multi = multi2, complete = complete, null = 0)
pifit(ures, sim, D)

sum(ures$lls[2:length(ures$lls)] - ures$lls[1:(length(ures$lls)-1)] < 0)

par(mfrow=c(2,3))
plotConvergence.nempi(ures, type = "b", col = "red")

## start pipeline:

system("scp nempi/R/nempi_main.r euler.ethz.ch:")
system("scp nempi/R/nempi_low.r euler.ethz.ch:")
system("scp mnem/R/mnems_low.r euler.ethz.ch:")
system("scp mnem/R/mnems.r euler.ethz.ch:")
system("scp nempi/other/nempi_app.r euler.ethz.ch:")

## ## on leo/euler:

ram=10000

rm error.txt
rm output.txt
rm .RData

nCells=NA

bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '5' '0' '1' '1' 'NA' 'NA' '${nCells}' < nempi_app.r"

bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '5' '1' '1' '1' 'NA' 'NA' '${nCells}' < nempi_app.r"

bsub -M ${ram} -q normal.24h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '15' '0' '1' '1' 'NA' 'NA' '${nCells}' < nempi_app.r"

bsub -M ${ram} -q normal.24h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '15' '1' '1' '1' 'NA' 'NA' '${nCells}' < nempi_app.r"

bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '10' '0' '1' '1' 'NA' 'NA' '${nCells}' < nempi_app.r"

bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '10' '1' '1' '1' 'NA' 'NA' '${nCells}' < nempi_app.r"

## new with unknowns

## ram=10000

## rm error.txt
## rm output.txt
## rm .RData

## nCells=NA

## bsub -M ${ram} -q normal.120h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '50' '0' '1' '1' '8' 'NA' '${nCells}' < nempi_app.r"

## ram=10000

## rm error.txt
## rm output.txt
## rm .RData

## bsub -M ${ram} -q normal.120h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '100' '0' '1' '1' '8' 'NA' '${nCells}' < nempi_app.r"

## new with perturbed net

ram=10000

rm error.txt
rm output.txt
rm .RData

nCells=NA

perturb=-0.5

bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '10' '0' '1' '1' 'NA' 'NA' '${nCells}' '${perturb}' < nempi_app.r"

## paralellize (esp. unknowns):

ram=10000

rm error.txt
rm output.txt
rm .RData

nCells=500
Pgenes=50

bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '${Pgenes}' '0' '1' '1' '8' '97' '${nCells}' < nempi_app.r"

for i in {2..100}; do
    bsub -M ${ram} -q normal.4h -n 1 -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --vanilla --silent --no-save --args '${Pgenes}' '0' '1' '1' '8' '${i}' '${nCells}' < nempi_app.r"
done

## missing runs:

knowns <- 8
highnoise <- complete <- 1
Sgenes <- 50
Egenes <- 10
nCells <- 1000
perturb <- 0
multi <- c(0.2,0.1)

for (i in 1:100) {
    filename <- paste("nempi_runs/unem_missing_unknowns", i, knowns, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_")
    if (!file.exists(filename)) {
        system(paste0("bsub -M 10000 -q normal.4h -n 1 -e error.txt -o output.txt -R \"rusage[mem=10000]\" \"R/bin/R --vanilla --silent --no-save --args '", Sgenes, "' '0' '1' '1' '8' '", i, "' '", nCells, "' < nempi_app.r\""))
        print(filename)
    }
}

## combine parallel:

knowns <- 8
highnoise <- complete <- 1
Sgenes <- 100
Egenes <- 10
nCells <- 1000
perturb <- 0
multi <- c(0.2,0.1)
missing.runs <- NULL

for (i in 1:100) {
    filename <- paste("~/Mount/Euler/nempi_runs/unem_missing_unknowns", i, knowns, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_")
    if (file.exists(filename)) {
        load(filename)
        if (i == 1) {
            result2 <- result
        } else {
            result2[i,,,,] <- result[i,,,,]
        }
        cat(paste0(i, "."))
    } else {
        missing.runs <- c(missing.runs,i)
    }
}
if (is.null(missing.runs)) {
    result <- result2
} else {
    result <- result2[-missing.runs,,,,]
}
save(result, file = paste("~/Mount/Euler/unem_missing_unknowns", knowns, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))

## ## load results:

## normal pipeline:

## Sgenes <- 5
## highnoise <- 1
## complete <- 1
## Egenes <- 10
## nCells <- Sgenes*10*2
## multi <- c(0.2, 0.1)
## perturb <- 0
## wrong <- 0

## ## Documents/nempi/old2/

## if (wrong) {
##     noise2 <- 0.5
##     load(paste("~/Documents/unem_misslabeled", highnoise, complete, noise2, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
## } else {
##     load(paste("~/Documents/unem_missing", highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_"))
## }

## result[which(is.na(result) == TRUE)] <- 0

## figure plots:

path <- "~/Mount/Euler/"
statCells <- 0
wrong <- 0; knowns <- 100 # 8 | > 15
show2 <- 14 # 1,4,6,13,14
shownoise <- c(1,2,3); showleg <- 1; perturb <- -0.5
if (knowns > 8) { if (perturb == 0) { doSgenes <- c(5,10,15) } else { doSgenes <- 10 }
} else { if (!statCells) { doSgenes <- c(50,100) } else { doSgenes <- 50 }
    lost <- c(0.1, 0.9) }
highnoise <- 1; complete <- 1; Egenes <- 10; multi <- c(0.2, 0.1); noise2 <- 0.5
if (wrong) { lost <- c(0.1, 0.5) } else { lost <- c(0.1, 0.5, 0.9) }
if (!(show2 %in% c(1,13))) { show <- c(1:4, 7:9, 6) } else { show <- 2 }
cols <- c("red", "blue", "darkgreen", "brown", "orange", "pink", "turquoise", "grey"); cols <- rep(cols[1:length(show)], 3)
parcols <- 3; height0 <- 8; lost2 <- lost; leg <- 2
if (wrong | knowns == 8) { parcols <- 2; height0 <- height0/3*2; lost2 <- lost[1:2]; leg <- 0 }
paras <- expand.grid(c(0,1), c(0,1), c(0,1)); box <- 1; scatter <- ""; dens <- 0; ymin <- 0.5; ymin2 <- 0
setEPS()
height1 <- 7.5
if (knowns == 8 | perturb != 0) { if (statCells != 0 | perturb != 0) { height1 <- height1/4*2 } else { height1 <- height1/4*3 } }
postscript("temp.eps", height = height1, width = height0)
par(mar=c(3.85,4,2.75,1),oma=c(0,0,0,0))
if (wrong == 0) {
    if (show2 == 4 & perturb == 0) {
        layout.mat <- matrix(c(rep(c(1,4,7),each=2),10,rep(c(2,5,8),each=2),11,rep(c(3,6,9),each=2),12),7)
    } else {
        layout.mat <- matrix(c(rep(rep(1:2,each=2),2),3:6),3,byrow=1)
        if (statCells==0) {
            layout.mat <- matrix(c(rep(rep(1:2,each=2),2),rep(rep(3:4,each=2),2),5:8),5,byrow=1)
        }
        if (perturb != 0) {
            layout.mat <- matrix(c(rep(rep(1:3,each=2),2),4:9),3,byrow=1)
        }
    } } else {
          layout.mat <- matrix(c(rep(rep(1:2,each=2),2),rep(rep(3:4,each=2),2),rep(rep(5:6,each=2),2),7:10),7,byrow=1)
      }
layout(layout.mat)
for (Sgenes in doSgenes) {
    for (k in 1:length(lost2)) {
        for (s in show2) {
            if (!statCells) { if (knowns > 15) { nCells <- Sgenes*10*2 } else { nCells <- 1000 } } else { nCells <- statCells }
            if (wrong) {
                noise2 <- 0.5
                main <- paste0(Sgenes, " P-genes, incorrect")
                load(paste0(path,paste("unem_misslabeled", highnoise, complete, noise2, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_")))
            } else {
                main <- paste0(Sgenes, " P-genes, unobserved")
                if (knowns < Sgenes) {
                    load(paste0(path,paste("unem_missing_unknowns", knowns, highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_")))
                } else {
                    load(paste0(path,paste("unem_missing", highnoise, complete, Sgenes, Egenes, nCells, perturb, paste(c(multi, ".rda"), collapse = ""), sep = "_")))
                }
            }
            if (s == 4) { ylab <- "perturbation profile correlation"
            } else if (s == 1) { ylab <- "normalised hamming distance"
            } else if (s == 13) { ylab <- "fraction of correct attachments"
            } else if (s == 6) { ylab <- "fraction of identified null samples"
            } else if (s == 14) { ylab <- "area under the precision-recall curve" }
            ylim <- c(ymin2,1)
            if (s < dim(result)[4]) {
                if (dimnames(result)[[4]][s] == "time") { ylim <- NULL }
                if (!(dimnames(result)[[4]][s] %in% c("cor","auc","theta","net"))) {
                    ylab <- dimnames(result)[[4]][s] }
            } else { if (s %in% c(101,103,105)) { ylab <- "PPV" }
                if (s %in% c(102,104,106)) { ylab <- "NPV" } }
            main <- paste0(main, ": ", lost[k])
            if (knowns < Sgenes) { main <- paste0(main, "\n unknowns: ", Sgenes-knowns) }
            databox <- NULL
            for (i in shownoise) {
                if (s == 101) {
                    TP <- (result[,i,k,2,show] + result[,i,k,9,show]); FP <- (result[,i,k,3,show] + result[,i,k,10,show]); databox <- cbind(databox, TP/(TP+FP))
                } else if (s == 102) {
                    TN <- (result[,i,k,7,show] + result[,i,k,11,show]); FN <- (result[,i,k,8,show] + result[,i,k,12,show]); databox <- cbind(databox, TN/(TN+FN))
                } else if (s == 103) {
                    TP <- result[,i,k,2,show]; FP <- result[,i,k,3,show]; databox <- cbind(databox, TP/(TP+FP))
                } else if (s == 104) {
                    TN <- result[,i,k,9,show]; FN <- result[,i,k,10,show]; databox <- cbind(databox, TN/(TN+FN))
                } else if (s == 105) {
                    TP <- result[,i,k,7,show]; FP <- result[,i,k,8,show]; databox <- cbind(databox, TP/(TP+FP))
                } else if (s == 106) {
                    TN <- result[,i,k,11,show]; FN <- result[,i,k,12,show]; databox <- cbind(databox, TN/(TN+FN))
                } else if (s == 14) { TP <- result[,i,k,2,6:9]+result[,i,k,9,6:9]
                    FP <- result[,i,k,3,6:9]+result[,i,k,10,6:9]
                    FN <- result[,i,k,8,6:9]+result[,i,k,12,6:9]
                    ## result[,i,k,s,6:9] <- TP/(TP+FP)
                    result[,i,k,s,6:9] <- (TP/(TP+FP)+TP/(TP+FN))/2
                    databox <- cbind(databox, result[,i,k,s,show])
                } else { databox <- cbind(databox, result[,i,k,s,show]) } }
            mnem:::myboxplot(databox, col = cols, ylim = ylim, main = main, xlab = expression(sigma), ylab = ylab, box = box, scatter = scatter, dens = dens, xaxt = "n", border = cols, #notch = 1,
                      medcol = "black")
            axis(1, length(show)/2 + 0.5 + c(0, length(show), 2*length(show))[1:length(shownoise)], c(1,3,5)[shownoise])
            abline(v=c(length(show)*(1:(length(shownoise)-1))+0.5), col = 1)
            #mnem:::addgrid(x=c(-1,1,0.5), y=c(0,length(show)*3+1,1))
        }
    }
}
if (showleg) {
    if (wrong==1 | knowns == 8 | perturb != 0) { cex.leg <- 1 } else { cex.leg <- 1.5 }
    par(mar=rep(0, 4))
    plot.new()
    legend("topleft",legend=c(expression(NEM~pi), "svm", "neural net"),col=c("red", "blue", "darkgreen"),fill=c("red", "blue", "darkgreen"),box.col = "transparent",cex = cex.leg,ncol = 1,border="transparent")
    plot.new()
    legend("topleft",legend=c("random forest", "missForest", "mice"),col=c("brown", "orange", "pink"),fill=c("brown", "orange", "pink"),box.col = "transparent",cex = cex.leg,ncol = 1,border="transparent")
    plot.new()
    legend("topleft",legend=c("knn", "random"),col=c("turquoise", "grey"),fill=c("turquoise", "grey"),box.col = "transparent",cex = cex.leg,ncol = 1,border="transparent")
}
dev.off()

## nempi scheme:

set.seed(9247)
D <- matrix(rnorm(10*20), 10)
rownames(D) <- paste0("E", 1:10)
Rho <- matrix(runif(5*20), 5)
Rho[which(Rho[, 5] != max(Rho[, 5]))[1], 5] <- max(Rho[, 5])
Rho <- apply(Rho, 2, function(x) return(x/sum(x)))
Rho[which(Rho[, 5] != max(Rho[, 5]))[1], 5] <- max(Rho[, 5])
phi <- matrix(c(1,0,0,0,0,
                1,0,0,0,0,
                0,1,0,0,0,
                1,0,0,0,0,
                0,0,0,1,0), 5)
rownames(Rho) <- colnames(phi) <- rownames(phi) <- paste0("P", 1:5)
colnames(D) <- colnames(Rho) <- paste0("S", 1:20)
Rhod <- apply(Rho, 2, function(x) { y <- x; y[which(y != max(x))] <- 0; y[which(y > 0)] <- 1; return(y) })
Rhod[, 11:20] <- -Rhod[, 11:20]
pdf("Scheme_Rho.pdf", width = 10, height = 5)
epiNEM::HeatmapOP(Rhod, col = "RdBu", colorkey = NULL, aspect = "iso", Rowv = 0, Colv = 0, cexRow = 2, cexCol = 2)
dev.off()
pdf("Scheme_R.pdf", width = 10, height = 9)
epiNEM::HeatmapOP(D, col = "RdBu", colorkey = NULL, aspect = "iso", Rowv = 0, Colv = 0, cexRow = 2, cexCol = 2)
dev.off()
pdf("Scheme_phi.pdf", width = 5, height = 5)
mnem::plotDnf(phi, edgelwd = 3, fontsize = 10)
dev.off()
Rhos <- apply(Rho, 2, function(x) { y <- x; y[which(y != max(x))] <- 0; y[which(y > 0)] <- 1; return(y) })
Rhos <- Rhos + runif(length(Rhos), 0, 0.1)
Rhos <- apply(Rhos, 2, function(x) return(x/sum(x)))
pdf("Scheme_Rho2.pdf", width = 10, height = 4)
epiNEM::HeatmapOP(Rhos, col = "RdBu", colorkey = NULL, aspect = "iso", Rowv = 0, Colv = 0, cexRow = 2, cexCol = 2)
dev.off()

## combinatorials:

library(Rgraphviz)

phi <- matrix(c(1,0,0,0,0,
                1,0,0,0,0,
                0,1,0,0,0,
                1,0,0,0,0,
                0,0,0,1,0), 5)

colnames(phi) <- rownames(phi) <- paste0("P", 1:5)

Rho <- matrix(0, 5, 10)

rownames(Rho) <- rownames(phi)

colnames(Rho) <- paste("S", 1:10, sep = "")

Rho[1, 1:3] <- 1
Rho[2, c(2,4:7)] <- 1
Rho[3, c(8:10, 10)] <- 1

pdf("temp.pdf", width = 5, height = 5)
p <- mnem::plotDnf(phi, edgelwd = 3, nodecol = list(P2 = "red", P3 = "red"), fontsize = 10)
epiNEM::HeatmapOP(Rho, col = "RdBu", Rowv = FALSE, aspect = "iso", colorkey = NULL, cexCol = 2, cexRow = 2)
Rho2 <- t(mnem:::mytc(phi))%*%Rho
Rho2[which(Rho2 > 1)] <- 1
epiNEM::HeatmapOP(Rho2, clusterx = Rho, col = "RdBu", Rowv = FALSE, aspect = "iso", colorkey = NULL, cexCol = 2, cexRow = 2)
dev.off()

## check data distribution:

path <- "mutclust/"
type <- "TCGA-BRCA"
load(paste0(path, type, "_nempi.rda"))

Sgenes <- 8; Egenes <- 10; nCells <- 1000; multi = c(0.2, 0.1); badCells <- 0.1
sim <- simData(Sgenes = Sgenes, Egenes = Egenes, mw = 1, nCells = nCells, Nem = 1, multi = multi, badCells = floor(nCells*badCells), uninform = 10)

pdf("nempi_data_dist.pdf", width = 16, height = 4)
par(mfrow=c(1,4))
sdata <- sim$data
sdata[which(sim$data == 1)] <- rnorm(sum(sim$data == 1), 1, 1)
sdata[which(sim$data == 0)] <- rnorm(sum(sim$data == 0), -1, 1)
hist(sdata, main = expression("Simulated with " ~ sigma ~ "= 1"),
     xlim = c(min(D2),max(D2)), freq = FALSE, ylim = c(0,0.3))
sdata <- sim$data
sdata[which(sim$data == 1)] <- rnorm(sum(sim$data == 1), 1, 3)
sdata[which(sim$data == 0)] <- rnorm(sum(sim$data == 0), -1, 3)
hist(sdata, main = expression("Simulated with " ~ sigma ~ "= 3"),
     xlim = c(min(D2),max(D2)), freq = FALSE, ylim = c(0,0.3))
sdata <- sim$data
sdata[which(sim$data == 1)] <- rnorm(sum(sim$data == 1), 0.5, 5)
sdata[which(sim$data == 0)] <- rnorm(sum(sim$data == 0), -0.1, 5)
hist(sdata, main = expression("Simulated with " ~ sigma ~ "= 5"),
     xlim = c(min(D2),max(D2)), freq = FALSE, ylim = c(0,0.3))
hist(D2, main = "TCGA breast cancer",
     xlim = c(min(D2),max(D2)), freq = FALSE, ylim = c(0,0.3))
dev.off()

pdf("temp.pdf", width = 20, height = 5)
tmp <- D2
# colnames(tmp) <- rownames(tmp) <- NULL
epiNEM::HeatmapOP(tmp)
dev.off()
















## warshall and other figures:

library(nanotime)
runs <- 1000
time <- matrix(0, runs, 3)
for (i in 1:runs) {
    cat(i)
    A <- simData(Sgenes = 10, Egenes = Egenes, mw = 1, nCells = nCells, Nem = 1, multi = multi)$Nem[[1]]
    A <- mytc(A)
    idx = which(A == 0)[1]
    nn <- dim(A)
    uv <- arrayInd(idx, nn)
    Phinew = A
    Phinew[idx[1]] = 1
    start <- nanotime(Sys.time())
    B <- mytc(Phinew, uv[1], uv[2])
    time[i, 1] <- as.numeric(nanotime(Sys.time()) - start)
    start <- nanotime(Sys.time())
    C <- transitive.closure(Phinew, mat = TRUE)
    time[i, 2] <- as.numeric(nanotime(Sys.time()) - start)
    start <- nanotime(Sys.time())
    D <- mytc(Phinew)
    time[i, 3] <- as.numeric(nanotime(Sys.time()) - start)
    if (any(B != C | C != D)) { stop() }
}

source("~/Documents/mnem/R/mnems_low.r")
pdf("temp.pdf", width = 6, height = 8)
myboxplot(time[1:i, ]/10^6, ylim = c(0.2,20), log = "y", xaxt = "n", main = "Wharshall vs Exponential", dens = 1, scatter = "", ylab = "seconds", lwd = 1, col = rgb(c(1,0,1), c(0,0,0.5), c(0,1,0), 0.5))
axis(1, 1:3, c("Warshall I.", "Exponential", "Warshall"))
dev.off()

microbenchmark(mytc(A), transitive.closure(A, mat = TRUE), time = 10)

Rho <- getRho(D)
pdf("temp.pdf", width = 20, height = 3)
epiNEM::HeatmapOP(Rho, col = "RdBu", cexRow = 0.75, cexCol = 0.75, rot = 0, aspect = "iso", colorkey = NULL)

epiNEM::HeatmapOP(ures2$Rho, col = "RdBu", cexRow = 0.75, cexCol = 0.75, rot = 0, aspect = "iso", clusterx = Rho, colorkey = NULL)

epiNEM::HeatmapOP(getRho(sim$data), col = "RdBu", cexRow = 0.75, cexCol = 0.75, rot = 0, aspect = "iso", clusterx = Rho, colorkey = NULL)

epiNEM::HeatmapOP(ures$Rho, col = "RdBu", cexRow = 0.75, cexCol = 0.75, rot = 0, aspect = "iso", clusterx = Rho, colorkey = NULL)

epiNEM::HeatmapOP(Rhosoft, col = "RdBu", cexRow = 0.75, cexCol = 0.75, rot = 0, aspect = "iso", clusterx = Rho, colorkey = NULL)
dev.off()

pdf("temp.pdf", width = 5, height = 5)
plotDnf(sim$Nem[[1]], edgelwd = 4, lwd = 4, fontsize = 14)
plotDnf(sim$Nem[[1]], edgelwd = 4, lwd = 4, fontsize = 14, nodecol = list("2" = "red", "9" = "red", "6" = "red"))
plotDnf(sim$Nem[[1]], edgelwd = 4, lwd = 4, fontsize = 14, nodecol = list("7" = "red", "4" = "red"))
dev.off()

pdf("temp.pdf", width = 9, height = 3)
par(mfrow=c(1,3))
pca <- rbind(cbind(rnorm(20, 0, 0.2), rnorm(20, 0, 0.2)),
             cbind(rnorm(30, 1, 0.2), rnorm(30, 1, 0.2)))
col <- c(sample(2:6, 15, replace = TRUE, prob = c(rep(0.1, 4), 0.6)), rep(1, 5),
                  sample(2:6, 20, replace = TRUE, prob = c(rep(0.1, 2), 0.6, rep(0.1, 2))), rep(1, 10))
plot(pca, col = col, xlab = "", ylab = "", main = "clusters")
col2 <- col
col2[which(col == 1)[1:5]] <- names(which.max(table(col[1:20])[-1]))
col2[which(col == 1)[6:15]] <- names(which.max(table(col[21:50])[-1]))
plot(pca, col = col2, xlab = "", ylab = "", main = "re-labeled")
col2[which(col != 6 & 1:50 <= 20)] <- names(which.max(table(col[1:20])[-1]))
col2[which(col != 4 & 1:50 > 20)] <- names(which.max(table(col[21:50])[-1]))
plot(pca, col = col2, xlab = "", ylab = "", main = "fully re-labeled")
dev.off()

pdf("temp.pdf", width = 8, height = 4)
par(mfrow=c(1,2))
plot(pca, col = col, xlab = "", ylab = "", main = "clusters")
d <- as.matrix(dist(pca))
col2 <- col
col2[which(col == 1)] <- col[which(col != 1)[apply(d[which(col == 1), which(col != 1)], 1, which.min)]]
plot(pca, col = col2, xlab = "", ylab = "", main = "re-labeled")
dev.off()

pdf("temp.pdf", width = 8, height = 4)
m <- matrix(c(c(1,1,0,0,1), c(0,0,1,1,0), c(1,1,1,1,1), rep(NA, 5*5), c(1,1,1,1,1)), 5)
rownames(m) <- 1:5
colnames(m) <- c(1,2,"1_2",rep("", 5),"sample")
epiNEM::HeatmapOP(m, Colv = 0, Rowv = 0, xrot = 0, colNA = "white", aspect = "iso", colorkey = NULL, col = "RdBu")
dev.off()

pdf("temp.pdf", width = 8, height = 4)
m <- matrix(c(c(1,1,0,0,1), c(0,0,1,1,0), c(0,1,1,0,0), rep(NA, 5*5), c(0,1,1,0,0)), 5)
rownames(m) <- 1:5
colnames(m) <- c(1,2,"1_2",rep("", 5),"sample")
epiNEM::HeatmapOP(m, Colv = 0, Rowv = 0, xrot = 0, colNA = "white", aspect = "iso", colorkey = NULL, col = "RdBu")
dev.off()

pdf("temp.pdf", width = 8, height = 4)
m <- matrix(c(c(1,1,0,0,1), c(1,1,1,1,1), c(1,1,1,1,1), rep(NA, 5*5), c(1,1,1,1,1)), 5)
rownames(m) <- 1:5
colnames(m) <- c(1,2,"1_2",rep("", 5),"sample")
epiNEM::HeatmapOP(m, Colv = 0, Rowv = 0, xrot = 0, colNA = "white", aspect = "iso", colorkey = NULL, col = "RdBu")
dev.off()







## try again the expected with respect to data creation: (does not seem to work.. how is it giving better results in unem?)

library(naturalsort)
library(nem)
library(cluster)
library(Rcpp)

source("~/Documents/mnem/R/mnems.r")
source("~/Documents/mnem/R/mnems_low.r")
sourceCpp("~/Documents/mnem/src/mm.cpp")

Sgenes <- 5
Egenes <- 10
samples <- 100

sim <- simData(Sgenes = Sgenes, Egenes = Egenes, mw = 1, nCells = Sgenes)

Rho <- matrix(0, Sgenes, samples)

Rho <- apply(Rho, 2, function(x) {
    y <- sample(seq_len(Sgenes), 1)
    x[y] <- 1
    return(x)
})

colnames(Rho) <- colnames(sim$data)[apply(Rho, 2, which.max)]

epiNEM::HeatmapOP(Rho)

ones <- which(Rho == 1)
zeros <- which(Rho == 0)

Rho[ones] <- runif(length(ones), 0.25, 1)

Rho[zeros] <- runif(length(zeros), 0, 0.75)

Rho <- apply(Rho, 2, function(x) return(x/sum(x)))

sdata <- sim$data[, naturalorder(colnames(sim$data))]%*%Rho

sdata <- (sdata - 0.5)/0.5

sdata <- sdata + rnorm(length(sdata), 0, 1)

epiNEM::HeatmapOP(sdata, col = "RdBu", cexRow = 0.75, cexCol = 0.75, bordercol = "transparent", rot = 0, dendrogram = "both")

D <- sdata

colnames(D) <- sample(seq_len(Sgenes), samples, replace = TRUE)

source("~/Documents/mnem/R/mnems_low.r")
res <- mynem(D, Rho = Rho, domean = FALSE)#, start = sim$Nem[[1]])

res$comp <- list()
res$comp[[1]] <- list()
res$comp[[1]]$phi <- res$adj

fitacc(res, sim)

Rho2 <- apply(Rho, 2, function(x) { y <- x*0; y[which.max(x)] <- 1; return(y) })

res <- mynem(D, Rho = Rho2, domean = FALSE)#, start = sim$Nem[[1]])

res$comp <- list()
res$comp[[1]] <- list()
res$comp[[1]]$phi <- res$adj

fitacc(res, sim)





res <- mnem(sdata, k = 2, search = "greedy", type = "random")

res <- mnem(sdata, k = 2, search = "greedy", type = "random")

fitacc(res, sim, strict = TRUE)

egenes <- numeric()

for (i in 1:5) {
    egenes <- c(egenes, sample(which(rownames(sdata) %in% i), 10))
}

sdataR <- sdata[egenes, ]

res <- mnem(sdataR, k = 2, search = "greedy", type = "random")

fitacc(res, sim, strict = TRUE)

## bugfixing:

#  D <- D_bkup; Gamma <- NULL
cbg-ethz/nempi documentation built on Nov. 5, 2024, 11:27 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
cbg-ethz/nempi
Inferring unobserved perturbations from gene expression data

other/nempi_sim.r
In cbg-ethz/nempi: Inferring unobserved perturbations from gene expression data

R Package Documentation

Browse R Packages

We want your feedback!

cbg-ethz/nempi Inferring unobserved perturbations from gene expression data

other/nempi_sim.r In cbg-ethz/nempi: Inferring unobserved perturbations from gene expression data

R Package Documentation

Browse R Packages

We want your feedback!

cbg-ethz/nempi
Inferring unobserved perturbations from gene expression data

other/nempi_sim.r
In cbg-ethz/nempi: Inferring unobserved perturbations from gene expression data