other/TCGA_nempi.r

method <- as.character(commandArgs(TRUE)[1])

type <- as.character(commandArgs(TRUE)[2])

goi <- as.numeric(commandArgs(TRUE)[3])

goitype <- as.numeric(commandArgs(TRUE)[4])

path <- "/cluster/work/bewi/members/mpirkl/"

## method <- "nempi"; type <- "TCGA-BRCA"; goi <- 0; goitype <- 1; path <- "~/Mount/Eulershare/"

library(naturalsort)
library(cluster)
library(Rcpp)
library(Rgraphviz)
library(mnem)
library(randomForest)
library(e1071)
library(missForest)
library(class)
library(nnet)

source("mnems.r")
source("mnems_low.r")
sourceCpp(code=readChar("mm.cpp", file.info("mm.cpp")$size))
source("nempi_main.r")
source("nempi_low.r")

## sourceCpp("~/Documents/mnem/src/mm.cpp"); source("~/Documents/mnem/R/mnems.r"); source("~/Documents/mnem/R/mnems_low.r"); source("~/Documents/nempi/R/nempi_main.r"); source("~/Documents/nempi/R/nempi_low.r")
## load("mutclust/TCGA-BRCA_final.rda")

type <- "TCGA-BRCA"
load(paste0(type, "_final.rda"))

M[which(M < 3)] <- 0
M[which(M > 0)] <- 1
M[grep("Silent", Mtype)] <- 0
M <- M[order(rownames(M)), order(colnames(M))]
cnv[which(cnv == 0)] <- 0
cnv[which(cnv != 0)] <- 1
cnv <- cnv[order(rownames(cnv)), order(colnames(cnv))]
meth[which(meth > 0.5)] <- 1
meth[which(meth <= 0.5)] <- 0
meth <- meth[order(rownames(meth)), order(colnames(meth))]
meth[is.na(meth)] <- 0
P <- matrix(0, length(unique(c(rownames(M), rownames(cnv), rownames(meth)))), length(unique(c(colnames(M), colnames(cnv), colnames(meth)))))
rownames(P) <- sort(unique(c(rownames(M), rownames(cnv), rownames(meth))))
colnames(P) <- sort(unique(c(colnames(M), colnames(cnv), colnames(meth))))
colnames(P) <- gsub("A$", "", lapply(strsplit(colnames(P), "-"), function(x) { y <- x[1:4]; y <- paste(y, collapse = "-"); return(y) }))
P <- P[, which(duplicated(colnames(P)) == FALSE)]
P[which(rownames(P) %in% rownames(M)), which(colnames(P) %in% colnames(M))] <- M[which(rownames(M) %in% rownames(P)), which(colnames(M) %in% colnames(P))]
Pmut <- P
P <- Pmut*0
P[which(rownames(P) %in% rownames(meth)), which(colnames(P) %in% colnames(meth))] <- P[which(rownames(P) %in% rownames(meth)), which(colnames(P) %in% colnames(meth))] + meth[which(rownames(meth) %in% rownames(P)), which(colnames(meth) %in% colnames(P))]
Pmeth <- P
P <- Pmeth*0
P[which(rownames(P) %in% rownames(cnv)), which(colnames(P) %in% colnames(cnv))] <- P[which(rownames(P) %in% rownames(cnv)), which(colnames(P) %in% colnames(cnv))] + cnv[which(rownames(cnv) %in% rownames(P)), which(colnames(cnv) %in% colnames(P))]
Pcnv <- P
P <- Pmut+Pmeth+Pcnv
P2 <- P # full abberations including cnv and meth
P <- Pmut
Ps <- list()
Ps$all <- P2
Ps$meth <- Pmeth
Ps$cnv <- Pcnv
Ps$mut <- Pmut

if (method=="auc") {
    ## goitype <- 4
    methods <- c("nempi", "svm", "nn", "rf", "mf", "knn")
    mutinc <- 1
    files <- list.files(paste0(path,"nempi_tcga/"))
    if (goitype==1) {
        files <- files[grep("_pan",files)]
    } else if (goitype==2) {
        files <- files[grep("_all",files)]
    } else if (goitype==3) {
        files <- files[grep("_mut",files)]
    } else if (goitype==4) {
        files <- files[grep("_org",files)]
    }
    pr <- roc <- list()
    aucrows <- max(unlist(lapply(methods,function(x) {
        y <- length(grep(paste0("^",x),files))
        return(y)
    })))
    pr$all <- pr$mut <- pr$cnv <- pr$meth <-
        pr$allp <- pr$mutp <- pr$cnvp <- pr$methp <-
            pr$time <- matrix(NA,aucrows,length(methods)+2,
                              dimnames=list(rownames=NULL,colnames=c(methods,c("random bin","random cont"))))
    roc$all <- roc$mut <- roc$cnv <- roc$meth <-
        roc$allp <- roc$mutp <- roc$cnvp <- roc$methp <-
            roc$time <- matrix(NA,aucrows,length(methods)+2,
                              dimnames=list(rownames=NULL,colnames=c(methods,c("random bin","random cont"))))
    pr$missing <- roc$missing <- numeric(aucrows)
    for (method in methods) {
        ## method <- "nempi"
        print(method)
        files.method <- files[grep(paste0("^",method),files)]
        idx <- which(methods==method)
        idx2 <- 0
        for (file in files.method) {
            if (length(grep(paste0("^",method),file))==0) { next() }
            idx2 <- idx2 + 1
            cat(paste0(idx2,"."))
            ## file <- "nempi_1.rds"
            ures <- readRDS(paste0(path,"nempi_tcga/",file))
            pr$time[idx2,idx] <- ures$time
            goi <- rownames(ures$Gamma)
            P <- Pmut
            P <- P[which(rownames(P) %in% goi), ]
            P[which(P > 1)] <- 1
            ## P <- apply(P, 2, function(x) return(x/sum(x)))
            ## P[is.na(P)] <- 0
            Rho <- cbind(P, matrix(0, nrow(P), sum(!(colnames(D) %in% colnames(P)))))
            colnames(Rho) <- c(colnames(P), colnames(D)[which(!(colnames(D) %in% colnames(P)))])
            Rho <- Rho[, colnames(D)]
            if (sum(apply(Rho, 1, sum) == 0) > 0) {
                Rho <- Rho[-which(apply(Rho, 1, sum) == 0), ]
            }
            Rho[is.na(Rho)] <- 0
            if (method=="nempi") {
                pr$missing[idx2] <- roc$missing[idx2] <- sum(apply(Rho, 2, sum) == 0)/ncol(Rho) # unlabelled
            }
            inc <- sort(apply(Rho, 1, sum))
            D2 <- D[which(apply(D, 1, median) != 0), ]
            D3 <- D2[, which(duplicated(colnames(D2)) == FALSE)]
            D4 <- D3
            Rho <- Rho[, which(duplicated(colnames(D2)) == FALSE)]
            colnames(D4) <- apply(Rho, 2, function(x) {
                Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_")
                return(Sgenes)
            })
            for (mtype in c("all")) { # ,"mut","cnv","meth")) {
                ## mtype <- "all"
                P <- Ps[[mtype]] # P2
                if (!mutinc) { # include mutations or not (0)
                    P[which(Pmut == 1)] <- 0
                }
                P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))]
                P <- P[order(rownames(P)), order(colnames(P))]
                Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P)))))
                colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))])
                P <- Ptmp[, colnames(Rho)]
                P[which(P > 1)] <- 1
                meas <- list()
                meas$data <- P
                colnames(meas$data) <- apply(P, 2, function(x) {
                    y <- paste(sort(rownames(P)[which(x==1)]), collapse = "_")
                    return(y)
                })
                measnet <- matrix(0, nrow(P), nrow(P))
                diag(measnet) <- 1
                meas$Nem[[1]] <- measnet
                meas$Theta[[1]] <- ures$res$subtopo
                if (idx == 1) {
                    pitmp <- pifit(ures, meas, D4)
                    randres <- ures
                    randres$Gamma[1:length(randres$Gamma)] <- sample(c(0,1),length(randres$Gamma),replace=TRUE)
                    rand <- pifit(randres, meas, D4, propagate=FALSE)
                    pr[[mtype]][idx2,length(methods)+1] <- rand$auc
                    roc[[mtype]][idx2,length(methods)+1] <- rand$roc
                    randres2 <- randres
                    randres2$Gamma[1:length(randres2$Gamma)] <- runif(length(randres2$Gamma),0,1)
                    randres2$Gamma <- apply(randres2$Gamma, 2, function(x) {
                        y <- runif(1,0,1)
                        return(x/(sum(x)+y))
                    })
                    rand <- pifit(randres2, meas, D4, propagate=FALSE)
                    pr[[mtype]][idx2,length(methods)+2] <- rand$auc
                    roc[[mtype]][idx2,length(methods)+2] <- rand$roc
                } else {
                    pitmp <- pifit(ures, meas, D4, propagate=FALSE)
                }
                pr[[mtype]][idx2,idx] <- pitmp$auc
                roc[[mtype]][idx2,idx] <- pitmp$roc
                meas$Nem[[1]] <- ures$res$adj
                if (idx == 1) {
                    pitmp <- pifit(ures, meas, D4)
                    rand <- pifit(randres, meas, D4, propagate=FALSE)
                    pr[[paste0(mtype,"p")]][idx2,length(methods)+1] <- rand$auc
                    roc[[paste0(mtype,"p")]][idx2,length(methods)+1] <- rand$roc
                    rand <- pifit(randres2, meas, D4, propagate=FALSE)
                    pr[[paste0(mtype,"p")]][idx2,length(methods)+2] <- rand$auc
                    roc[[paste0(mtype,"p")]][idx2,length(methods)+2] <- rand$roc
                } else {
                    pitmp <- pifit(ures, meas, D4, propagate=FALSE)
                }
                pr[[paste0(mtype,"p")]][idx2,idx] <- pitmp$auc
                roc[[paste0(mtype,"p")]][idx2,idx] <- pitmp$roc
            }
        }
    }
    if (goitype==1) {
        saveRDS(pr,file=paste0(path,"nempi_tcga/pr_results_pan.rds"))
        saveRDS(roc,file=paste0(path,"nempi_tcga/roc_results_pan.rds"))
    } else if (goitype==2) {
        saveRDS(pr,file=paste0(path,"nempi_tcga/pr_results_all.rds"))
        saveRDS(roc,file=paste0(path,"nempi_tcga/roc_results_all.rds"))
    } else if (goitype==3) {
        saveRDS(pr,file=paste0(path,"nempi_tcga/pr_results_mut.rds"))
        saveRDS(roc,file=paste0(path,"nempi_tcga/roc_results_mut.rds"))
    } else if (goitype==4) {
        saveRDS(pr,file=paste0(path,"nempi_tcga/pr_results_org.rds"))
        saveRDS(roc,file=paste0(path,"nempi_tcga/roc_results_org.rds"))
    }
    stop("success")
}

## genes of interest

if (goi!=0) {
    ## goi <- 1
    goi0 <- goi
    gois <- rownames(Pmut)[which(apply(Pmut,1,sum)>0)]
    prob <- rep(1/length(gois),length(gois))
    if (goitype==1) {
        gois <- intersect(c("JAK1","FGFR1","MSH2","HIST1H1C","MGMT","SMARCA1","ZC3H12A","SETBP1","PIK3CB","KRT222","HGF","IL7R","DAZAP1","LEMD2","MACF1","MYC","THRAP3","RPL5","H3F3C","AXIN2","POLQ","AR","UNCX","PIK3CG","ABL1","PMS1","IRF6","ALK","ESR1","PGR","CHD8","H3F3A","PTPRC","CACNA1A","TBL1XR1","EGR3","JAK3","GRIN2D","POLE","MSH3","RARA","ATXN3","PMS2","ACVR1B","TLR4","PLXNB2","RNF111","CDKN2C","MLH1","BRCA2","ARAF","IRF2","CHEK2","USP9X","TRAF3","EPHA3","NOTCH2","CBWD3","PAX5","RHEB","KMT2A","BRAF1","BCL2","JAK2"),gois) # pan-cancer drivers: too mutated/cnved/methed ?? no, don't think so.
    } else if (goitype==3) {
        prob0 <- apply(Pmut[which(apply(Pmut,1,sum)>0),],1,function(x) return(sum(x!=0)))
        prob <- prob0
        prob[which(prob<20)] <- 0
        prob <- prob/sum(prob)
    }
    ## goi <- 1
    set.seed(goi*9247)
    goi <- sample(gois,10,prob=prob)
    print(goi)
    epiNEM::HeatmapOP(Pmut[goi,])
    apply(Pmut[goi,],1,sum)
} else {
    goi0 <- goi
    goi <- c("MAP2K4", "GATA3", "GPS2", "TBX3", "PTPRD", "NCOR1", "CBFB", "CDKN1B") # BRCA
}

P <- Pmut
P <- P[which(rownames(P) %in% goi), ]
P[which(P > 1)] <- 1
P <- apply(P, 2, function(x) return(x/sum(x)))
P[is.na(P)] <- 0

## data imputation:

Rho <- cbind(P, matrix(0, nrow(P), sum(!(colnames(D) %in% colnames(P)))))
colnames(Rho) <- c(colnames(P), colnames(D)[which(!(colnames(D) %in% colnames(P)))])
Rho <- Rho[, colnames(D)]
if (sum(apply(Rho, 1, sum) == 0) > 0) {
    Rho <- Rho[-which(apply(Rho, 1, sum) == 0), ]
}
Rho[is.na(Rho)] <- 0
print("unlabelled:")
print(sum(apply(Rho, 2, sum) == 0)/ncol(Rho))
inc <- sort(apply(Rho, 1, sum))
D2 <- D[which(apply(D, 1, median) != 0), ]
D3 <- D2[, which(duplicated(colnames(D2)) == FALSE)]
Rho <- Rho[, which(duplicated(colnames(D2)) == FALSE)]
for (i in which(duplicated(colnames(D2)) == TRUE)) {
    j <- which(colnames(D3) %in% colnames(D2)[i])
    D3[, j] <- apply(D2[, which(colnames(D2) %in% colnames(D2)[i]), drop = FALSE], 1, median)
}
D2 <- D3
colnames(D2) <- c(rownames(Rho), sample(rownames(Rho), ncol(D2)-nrow(Rho), replace = TRUE))
D4 <- D2
colnames(D4) <- apply(Rho, 2, function(x) {
    Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_")
    return(Sgenes)
})

## pdf("temp.pdf", width = 12, height = 6);tmp <- Rho;colnames(tmp) <- NULL;epiNEM::HeatmapOP(tmp, col = "RdBu", Rowv = 0, bordercol = "transparent");dev.off()

start <- as.numeric(Sys.time())
if (method=="nempi") {
    converged <- 10
    nempires <- nempi(D2, Gamma = Rho, full = TRUE, converged = converged)
    ures <- nempires
} else if (method=="svm") {
    svmres <- classpi(D4, full = TRUE, method = "svm")
    ures <- svmres
} else if (method=="nn") {
    nnres <- classpi(D4, full = TRUE, method = "nnet", MaxNWts = 50000, size = 2)
    ures <- nnres
} else if (method=="mf") {
    mfdata <- cbind(as.data.frame(t(D4)), factor(colnames(D4)))
    mfdata[which(mfdata == "", arr.ind = TRUE)] <- NA
    mfimp <- missForest(mfdata)
    D4 <- D2
    colnames(D4) <- mfimp$ximp[, ncol(mfimp$ximp)]
    tmp <- nem(D4, multi = TRUE)
    Gamma <- getGamma(D4)
    ures <- list()
    ures$Gamma <- apply(Gamma, 2, function(x) return(x/sum(x)))
    ures$res <- list()
    ures$res$adj <- tmp$adj
    ures$null <- TRUE
    ures$combi <- 1
    mfres <- ures
} else if (method=="rf") {
    rfres <- classpi(D4, full = TRUE, method = "randomForest")
    ures <- rfres
} else if (method=="knn") {
    train <- t(D4[, which(colnames(D4) != "")])
    test <- t(D4[, which(colnames(D4) == "")])
    knn0 <- 0
    if (knn0) {
        train <- rbind(train, NULL = rep(0, ncol(train)))
        cl <- c(colnames(D4)[which(colnames(D4) != "")], "NULL")
    } else {
        cl <- colnames(D4)[which(colnames(D4) != "")]
    }
    knnres <- knn(train, test, cl, prob = TRUE)
    D3 <- D4
    colnames(D3)[which(colnames(D3) %in% "")] <- as.character(knnres)
    tmp <- nem(D3, multi = TRUE)
    Gamma <- getGamma(D3)
    ures <- list()
    ures$Gamma <- Gamma # apply(Gamma, 2, function(x) return(x/sum(x)))
    ures$res <- list()
    ures$res$adj <- tmp$adj
    ures$null <- TRUE
    ures$combi <- 1
    knnres <- ures
} else if (method=="mice") { # takes forever
    mfdata <- cbind(as.data.frame(t(D4)), factor(colnames(D4)))
    mfdata[which(mfdata == "", arr.ind = TRUE)] <- NA

    micedata <- mfdata
    colnames(micedata) <- paste0(LETTERS[1:ncol(micedata)], 1:ncol(micedata))
    miceres <- mice(micedata, method = c(rep('rfcont', ncol(micedata)-1), 'rfcat'), m = 2, maxit = 2)
    ures <- miceres
}
end <- as.numeric(Sys.time())
print(end - start)
ures$time <- end-start

if (goi0!=0) {
    if (goitype==1) {
        saveRDS(ures, file=paste0(path, "nempi_tcga/", method, "_", goi0, "_pan.rds"))
    } else if (goitype==2) {
        saveRDS(ures, file=paste0(path, "nempi_tcga/", method, "_", goi0, "_all.rds"))
    } else if (goitype==3) {
        saveRDS(ures, file=paste0(path, "nempi_tcga/", method, "_", goi0, "_mut.rds"))
    }
} else {
    saveRDS(ures, file=paste0(path, "nempi_tcga/", method, "_org.rds"))
}
stop("tcga done")

## commands for hpc:

system("scp ~/Documents/mnem/R/mnems.r euler.ethz.ch:")
system("scp ~/Documents/mnem/R/mnems_low.r euler.ethz.ch:")
system("scp ~/Documents/nempi/R/nempi_main.r euler.ethz.ch:")
system("scp ~/Documents/nempi/R/nempi_low.r euler.ethz.ch:")

system("scp testing/nempi/other/TCGA_nempi.r euler.ethz.ch:")

rm error.txt
rm output.txt
rm .RData

ram=8000 # 4h: nempi 8, mf 8, knn 8, svm 32, nn 32, rf 32, auc 16
queue=4
method="auc"
type="TCGA-BRCA"
goi=1
goitype=3

bsub -M ${ram} -q normal.${queue}h -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --max-ppsize=500000 --vanilla --silent --no-save --args '${method}' '${type}' '${goi}' '${goitype}' < TCGA_nempi.r"

for goi in {2..100}; do
bsub -M ${ram} -q normal.${queue}h -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --max-ppsize=500000 --vanilla --silent --no-save --args '${method}' '${type}' '${goi}' '${goitype}' < TCGA_nempi.r"
done

for i in {141716598..141716731}; do
bkill ${i}
done

## analysis:

method <- "nempi"
ures <- readRDS(paste0("~/Mount/Eulershare/nempi_tcga/",method,"_org.rds"))

sum(ures$lls[2:length(ures$lls)] - ures$lls[1:(length(ures$lls)-1)] < 0)

pdf("temp.pdf", width = 12, height = 6)
epiNEM::HeatmapOP(ures$Gamma, bordercol = rgb(0,0,0,0), col = "RdBu")
#plot(ures, edgewidth = 30)
dev.off()

pdf("temp.pdf", width = 9, height = 6)
par(mfrow=c(2,3))
plotConvergence(ures, type = "b", col = "blue")
dev.off()

pdf("temp.pdf", width = 6, height = 6)
Pgenes <- sort(unique(rownames(ures$Gamma)))
adjtmp <- ures$res$adj
colnames(adjtmp) <- rownames(adjtmp) <- Pgenes
plotDnf(adj2dnf(adjtmp), edgelwd = 2)
dev.off()

pdf("temp.pdf", width = 12, height = 6)
epiNEM::HeatmapOP(t(mytc(ures$res$adj))%*%ures$Gamma, bordercol = rgb(0,0,0,0), col = "RdBu")
#plot(ures, edgewidth = 30)
dev.off()

source("~/Documents/nempi/R/nempi_main.r")
D4 <- D2
colnames(D4) <- apply(Rho, 2, function(x) {
    Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_")
    return(Sgenes)
})

## new analysis:

goitype <- "pan"
stat <- readRDS(paste0("~/Mount/Eulershare/nempi_tcga/pr_results_",goitype,".rds"))

cex.main <- 1.5
cex.lab <- 1.5
idx <- c(1:6,8)
setEPS()
postscript("temp.eps", height = 5, width = 10)
par(mar=c(3.85,4,2.75,1),oma=c(0,0,0,0))
layout.mat <- matrix(c(rep(rep(1:3,each=1),3),4:6),4,byrow=1)
layout(layout.mat)
methods <- c("nempi", "svm", "nn", "rf", "mf", "knn")
wc.ps <- numeric((length(idx)-1))
wc.ps[1:length(wc.ps)] <- NA
for (i in 1:(length(idx)-1)) {
    wc.ps[i] <- wilcox.test(stat$all[,1],stat$all[,idx[1+i]],alternative="greater")$p.value
}
wc.psp <- numeric((length(idx)-1))
wc.psp[1:length(wc.psp)] <- NA
for (i in 1:(length(idx)-1)) {
    wc.psp[i] <- wilcox.test(stat$allp[,1],stat$allp[,idx[1+i]],alternative="greater")$p.value
}
col <- c("red", "blue", "darkgreen", "brown", "orange", "turquoise","grey","darkgrey")
ylim <- c(min(stat$all,na.rm=TRUE),1)
mnem:::myboxplot(stat$all[,idx],col=col,dens=0,xaxt = "n",main = "no perturbation propagation",ylab ="area under the precision-recall curve",box=1,border=col,medcol="black",ylim=ylim,cex.main=cex.main,cex.lab=cex.lab)
max.val <- apply(stat$all[,idx[-1]],2,max,na.rm=TRUE) + 0.05
if (goitype=="pan") {
    max.val <- max.val + rep(0.05,length(max.val))
}
text(2:(length(idx[-1])+1),max.val,labels=round(wc.ps,7),srt=90)
mnem:::myboxplot(stat$allp[,idx],col=col,dens=0,xaxt = "n",main = "perturbation propagation",ylab ="area under the precision-recall curve",box=1,border=col,medcol="black",ylim=ylim,cex.main=cex.main,cex.lab=cex.lab)
max.valp <- apply(stat$allp[,idx[-1]],2,min,na.rm=TRUE) - 0.05
if (goitype=="pan") {
    max.valp <- max.valp + c(0,0,0,-0.01,-0.02,-0.05)
}
##text(2:(length(idx[-1])+1),max.valp,labels=round(wc.psp,7))
text((length(idx[-1])+1),max.valp[length(max.valp)],labels=round(wc.psp[length(wc.psp)],7),srt=90)
mnem:::myboxplot(stat$time[,1:6],col=col,dens=0,xaxt = "n",main = "running time",ylab ="seconds",box=1,border=col,medcol="black",log="y",cex.main=cex.main,cex.lab=cex.lab)
##mnem:::myboxplot(stat$missing,col=col,dens=0,xaxt = "n",main = "fraction of unlabelled samples",ylab ="fraction",box=1,border=col,medcol="black",log="y")
cex.leg <- 1.5
par(mar=rep(0, 4))
plot.new()
legend("topleft",legend=c(expression(NEM~pi), "svm", "neural net"),col=c("red", "blue", "darkgreen"),fill=c("red", "blue", "darkgreen"),box.col = "transparent",cex = cex.leg,ncol = 1,border="transparent")
plot.new()
legend("topleft",legend=c("random forest", "missForest", "knn"),col=c("brown", "orange", "turquoise"),fill=c("brown", "orange", "turquoise"),box.col = "transparent",cex = cex.leg,ncol = 1,border="transparent")
plot.new()
legend("topleft",legend=c("random"),col=c("grey"),fill=c("grey"),box.col = "transparent",cex = cex.leg,ncol = 1,border="transparent")
dev.off()

## colnames
## rownames     nempi       svm        nn        rf        mf       knn random bin
##     [1,] 0.8830668 0.8682249 0.7571648 0.7754192 0.7362392 0.7826912  0.6653634
##         colnames
## rownames random cont
## [1,]   0.7385025

## colnames
## rownames     nempi       svm      nn        rf        mf       knn random bin
##     [1,] 0.6007286 0.6215111 0.57883 0.6321455 0.6049589 0.5877279  0.5210848
##         colnames
## rownames random cont
##     [1,]   0.5418864

## old analysis:

## save(nempires, knnres, rfres, mfres, svmres, nnres, Rho, D2, Pmut, Pmeth, Pcnv, file = paste0(path, type, "_nempi.rda"))

path <- "mutclust/"; type <- "TCGA-BRCA"

load(paste0(path, type, "_nempi.rda"))

source("~/Documents/nempi/R/nempi_main.r")
D4 <- D2
colnames(D4) <- apply(Rho, 2, function(x) {
    Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_")
    return(Sgenes)
})

ures <- nempires

## check against methylation and cnvs:

pdf("temp.pdf", width = 12, height = 6)
tmp <- ures$Gamma
colnames(tmp) <- NULL
epiNEM::HeatmapOP(tmp, bordercol = rgb(0,0,0,0), col = "RdBu", colorkey = NULL)
dev.off()

pdf("temp.pdf", width = 6, height = 6)
Pgenes <- sort(unique(colnames(D2)))
adjtmp <- ures$res$adj
colnames(adjtmp) <- rownames(adjtmp) <- Pgenes
plotDnf(adjtmp, edgelwd = 2)
dev.off()

## cnv/meth enrichment:

# load("~/Documents/nempi_backup/backup3/TCGA-BRCA_nempi.rda")

source("~/Documents/nempi/R/nempi_main.r")
source("~/Documents/nempi/R/nempi_low.r")

methods <- list("NEM$\\pi$" = nempires)#, knn = knnres, mf = mfres, nn = nnres, rf = rfres, svm = svmres)

mutinc <- 1

Lall <- Lcnv <- Lmeth <- Lmut <- Laucor <- list()

for (i in 1:length(methods)) {
    if (i != 8) {
        print(names(methods)[i])
        ures <- methods[[i]]
        newGamma <- ures$Gamma
    } else {
        print("random")
        newGamma <- newGamma*0
        newGamma[sample(1:length(newGamma), floor(0.45*length(newGamma)))] <- 1 # well that is included into the test...
    }
    hist(newGamma)
    if (i == 1) {
        rntmp <- rownames(newGamma); newGamma <- t(mytc(ures$res$adj))%*%newGamma; rownames(newGamma) <- rntmp
    }
    P <- Pmut+Pmeth+Pcnv
    if (!mutinc) { # include mutations or not (0)
        P[which(Pmut == 1)] <- 0
    }
    P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))]
    P <- P[order(rownames(P)), order(colnames(P))]
    Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P)))))
    colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))])
    P <- Ptmp[, colnames(Rho)]
    P <- t(mytc(methods[[1]]$res$adj))%*%P
    P[which(P > 1)] <- 1
    ## fisher:
    if (i %in% c(4,5)) {
        cut <- 0.07
    } else if (i == 6) {
        cut <- 0.1
    } else {
        cut <- 1/8
    }
    newGamma[which(newGamma > cut)] <- 1
    newGamma[which(newGamma <= cut)] <- 0
    pmeth <- newGamma
    pmeth[which(newGamma == 1 & P == 1)] <- 2
    pmeth[which(newGamma == 0 & P == 1)] <- -2
    if (!mutinc) { # include mutations or not (0)
        pmeth[which(Rho > 0)] <- 0
    }
    colnames(pmeth) <- NULL
    ##pdf(paste0("FigS_", names(methods)[i], ".pdf"), height = 6, width = 12)
    setEPS()
    postscript(paste0("FigS_", names(methods)[i], ".eps"), height = 6, width = 12)
    print(epiNEM::HeatmapOP(pmeth, bordercol = rgb(0,0,0,0), col = "RdBu", colorkey = NULL))
    dev.off()
    print("cnv")
    P <- Pcnv
    P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))]
    P <- P[order(rownames(P)), order(colnames(P))]
    P[which(P > 1)] <- 1
    Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P)))))
    colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))])
    P <- Ptmp[, colnames(Rho)]
    F <- matrix(c(sum(pmeth >= 1 & P == 1), sum(pmeth >= 1 & P == 0), sum(pmeth <= 0 & P == 1), sum(pmeth <= 0 & P == 0)), 2)
    print(1 - phyper(F[1,1]-1, sum(F[, 1]), sum(F[, 2]), sum(F[1, ])))
    Lcnv[[i]] <- F
    print("meth")
    P <- Pmeth
    P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))]
    P <- P[order(rownames(P)), order(colnames(P))]
    P[which(P > 1)] <- 1
    Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P)))))
    colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))])
    P <- Ptmp[, colnames(Rho)]
    F <- matrix(c(sum(pmeth >= 1 & P == 1), sum(pmeth >= 1 & P == 0), sum(pmeth <= 0 & P == 1), sum(pmeth <= 0 & P == 0)), 2)
    print(1 - phyper(F[1,1]-1, sum(F[, 1]), sum(F[, 2]), sum(F[1, ])))
    Lmeth[[i]] <- F
    print("mut")
    P <- Pmut
    P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))]
    P <- P[order(rownames(P)), order(colnames(P))]
    P[which(P > 1)] <- 1
    Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P)))))
    colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))])
    P <- Ptmp[, colnames(Rho)]
    F <- matrix(c(sum(pmeth >= 1 & P == 1), sum(pmeth >= 1 & P == 0), sum(pmeth <= 0 & P == 1), sum(pmeth <= 0 & P == 0)), 2)
    print(1 - phyper(F[1,1]-1, sum(F[, 1]), sum(F[, 2]), sum(F[1, ])))
    Lmut[[i]] <- F
    Fmat <- matrix(c(sum(pmeth == 2), sum(pmeth == 1), sum(pmeth == -2), sum(pmeth == 0)), 2)
    Lall[[i]] <- Fmat
    ## print(fisher.test(Fmat, alternative = "greater"))
    print("p-value")
    print(1 - phyper(Fmat[1,1]-1, sum(Fmat[, 1]), sum(Fmat[, 2]), sum(Fmat[1, ])))
    meas <- list()
    meas$data <- matrix(0, nrow(pmeth), ncol(pmeth))
    colnames(meas$data) <- apply(pmeth, 2, function(x) {
        y <- paste(sort(rownames(pmeth)[which(x %in% c(2,-2))]), collapse = "_")
        return(y)
    })
    measnet <- matrix(0, nrow(pmeth), nrow(pmeth))
    diag(measnet) <- 1
    meas$Nem[[1]] <- measnet
    meas$Theta[[1]] <- ures$res$subtopo
    print("AUC")
    if (i == 1) {
        pitmp <- pifit(ures, meas, D4)
    } else {
        pitmp <- pifit(ures, meas, D4, propagate=FALSE)
    }
    print(pitmp$auc)
    print("Cor")
    print(pitmp$cor)
    Laucor[[i]] <- c(pitmp$auc, pitmp$cor)
}

source("~/Documents/nempi/R/nempi_main.r")
source("~/Documents/nempi/R/nempi_low.r")

P <- Pmut+Pmeth+Pcnv
if (!mutinc) { # include mutations or not (0)
    P[which(Pmut == 1)] <- 0
}
P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))]
P <- P[order(rownames(P)), order(colnames(P))]
Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P)))))
colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))])
P <- Ptmp[, colnames(Rho)]
# P <- t(mytc(methods[[1]]$res$adj))%*%P
P[which(P > 1)] <- 1

## plot aucs:
setEPS()
postscript(paste0("Fig_auc.eps"), height = 5, width = 10)
cols <- c("red","turquoise","orange","darkgreen","brown","blue")
par(mfrow=c(1,2))
for (prop in 1:2) {
    for (i in 1:length(methods)) {
        print(names(methods)[i])
        ures <- methods[[i]]
        meas <- list()
        meas$data <- matrix(0, nrow(P), ncol(P))
        colnames(meas$data) <- apply(P, 2, function(x) {
            y <- paste(sort(rownames(P)[which(x == 1)]), collapse = "_")
            return(y)
        })
        meas$Theta[[1]] <- methods[[1]]$res$subtopo
        print("AUC")
        if (prop == 1) {
            meas$Nem[[1]] <- methods[[1]]$res$adj
        } else {
            meas$Nem[[1]] <- diag(nrow(pmeth))
        }
        if (i == 1) {
            pitmp <- pifit(ures, meas, D4)
        } else {
            pitmp <- pifit(ures, meas, D4, propagate = FALSE)
        }
        if (i == 1) {
            if (prop == 1) {
                main <- "precision-recall curves (propagated)"
            } else {
                main <- "precision-recall curves (not propagated)"
            }
            plot(pitmp$rec, pitmp$ppv, type="l", xlim = c(0,1), ylim = c(0,1), main = main, xlab = "recall", ylab = "precision", col = cols[i])
            abline(h=0.5,col=rgb(0,0,0,1),lty=3)
        } else {
            lines(pitmp$rec, pitmp$ppv, type="l", col = cols[i])
        }
        print(pitmp$auc)
    }
    if (prop == 1) {
        cols <- c("red", "blue", "darkgreen", "brown", "orange", "turquoise")
        legend <- c("svm","neural net","random forest","missForest","knn")
        legend("bottomright",legend=c(expression(NEM~pi),legend),fill=cols,col=cols)
        cols <- c("red","turquoise","orange","darkgreen","brown","blue")
    }
}
dev.off()

source("~/Documents/nempi/R/nempi_main.r")
source("~/Documents/nempi/R/nempi_low.r")

## create tables:

for (i in 1:length(methods)) {
    cat(paste0(names(methods)[i], " & ", Lcnv[[i]][1,1], " & ", Lcnv[[i]][2,1], " & ", Lcnv[[i]][2,2], " & ", Lcnv[[i]][1,2], "\\\\\n"))
}

for (i in 1:length(methods)) {
    cat(paste0(names(methods)[i], " & ", Lmeth[[i]][1,1], " & ", Lmeth[[i]][2,1], " & ", Lmeth[[i]][2,2], " & ", Lmeth[[i]][1,2], "\\\\\n"))
}

for (i in 1:length(methods)) {
    cat(paste0(names(methods)[i], " & ", Lmut[[i]][1,1], " & ", Lmut[[i]][2,1], " & ", Lmut[[i]][2,2], " & ", Lmut[[i]][1,2], "\\\\\n"))
}

for (i in 1:length(methods)) {
    if (names(methods)[i] == "nn") { next() }
    Fmat <- Lall[[i]]
    ptmp <- 1 - phyper(Fmat[1,1]-1, sum(Fmat[, 1]), sum(Fmat[, 2]), sum(Fmat[1, ]))
    if (ptmp == 0) {
        ptmp <- "$< 2.2\\times10^{-16}$"
    } else {
        ptmp <- paste0("$", signif(ptmp), "$")
    }
    cat(paste0(names(methods)[i], " & ", Lall[[i]][1,1], " & ", Lall[[i]][2,1], " & ", Lall[[i]][2,2], " & ", Lall[[i]][1,2], " & ", round(Lall[[i]][1,1]/(Lall[[i]][1,1]+Lall[[i]][2,1])*100), "\\% & ", round(Lall[[i]][1,1]/(Lall[[i]][1,1]+Lall[[i]][1,2])*100), "\\% & ", ptmp, " & ", round(Laucor[[i]][1], 2), "\\\\\n"))
}


## check correlation

## P4 <- apply(P3, 2, function(x) return(x/sum(x)))
## P4[is.na(P4)] <- 0

## cor(as.vector(newGamma), as.vector(P3))

##

cormat <- matrix(0, nrow(pmeth), 2)
fishres <- numeric(nrow(pmeth))
names(fishres) <- rownames(pmeth)
for (i in 1:nrow(pmeth)) {
    Fmat <- matrix(c(sum(pmeth[i, ] == 2), sum(pmeth[i, ] == 1), sum(pmeth[i, ] == -2), sum(pmeth[i, ] == 0)), 2)
    fishres[i] <- fisher.test(Fmat, alternative = "g")$p.value
    cormat[i, ] <- c(sum(Fmat[1, ]), sum(Fmat[, 1]))
}

## GATA3 & PTPRD:

Fmat <- matrix(c(sum(pmeth[3, ] %in% c(2,-2) & pmeth[7, ] %in% c(2,-2)),
                 sum(pmeth[3, ] %in% c(1,0) & pmeth[7, ] %in% c(-2,2)),
                 sum(pmeth[3, ] %in% c(-2,2) & pmeth[7, ] %in% c(1,0)),
                 sum(pmeth[3, ] %in% c(1,0) & pmeth[7, ] %in% c(1,0))), 2)

1 - phyper(Fmat[1,1]-1, sum(Fmat[, 1]), sum(Fmat[, 2]), sum(Fmat[1, ]))


## pca:

pca <- princomp(D2)

col <- apply(newGamma, 2, sum)

col <- col/max(col)

K <- kmeans(t(newGamma), nrow(newGamma))

plot(pca$loadings[, 1:2], col = K$cluster)#rgb(col,0,0,1))

## tsne:

sne <- tsne(t(newGamma))

plot(sne, col = K$cluster)

## R profiling:

Rprof("temp.txt", line.profiling=TRUE)
ures <- nempi(D2[1:20, ], Gamma = Gamma, complete = 1, full = TRUE, converged = converged, combi = combi)
Rprof(NULL)
summaryRprof("temp.txt", lines = "show")$sampling.time
head(summaryRprof("temp.txt", lines = "show")$by.self, 10)

##

type <- "TCGA-BRCA"
load(paste0(path, type, "_nempi.rda"))

pdf("Fig5.pdf", width = 10, height = 10)
plot(uresn, edgelwd = 2)
dev.off()

pdf("Fig6.pdf", width = 10, height = 5)
tmp <- uresn$Gamma
colnames(tmp) <- NULL
epiNEM::HeatmapOP(tmp, bordercol = rgb(0,0,0,0), col = "RdBu", colorkey = NULL)
dev.off()

pdf("Fig7.pdf", width = 10, height = 10)
phitmp <- mytc(uresn$res$adj)
tmp <- t(phitmp)%*%uresn$Gamma
colnames(tmp) <- NULL
rownames(tmp) <- rownames(uresn$Gamma)
tmp2 <- tmp
colnames(tmp) <- NULL
tmp <- Gamma
colnames(tmp) <- NULL
tmp3 <- tmp
tmp4 <- tmp2
p1 <- epiNEM::HeatmapOP(tmp2, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL)
p2 <- epiNEM::HeatmapOP(tmp, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL)
print(p1, position=c(0, .5, 1, 1), more=TRUE)
print(p2, position=c(0, 0, 1, .5))
sum(tmp == 1 & tmp2 == 1)/sum(tmp == 1)
dev.off()

pdf("Fig8.pdf", width = 10, height = 10)
plot(uresf, edgelwd = 2)
dev.off()

pdf("Fig9.pdf", width = 10, height = 5)
tmp <- uresf$Gamma
colnames(tmp) <- NULL
epiNEM::HeatmapOP(tmp, bordercol = rgb(0,0,0,0), col = "RdBu", colorkey = NULL)
dev.off()

pdf("Fig10.pdf", width = 10, height = 10)
phitmp <- mytc(uresf$res$adj)
tmp <- t(phitmp)%*%uresf$Gamma
colnames(tmp) <- NULL
rownames(tmp) <- rownames(uresf$Gamma)
tmp2 <- tmp
colnames(tmp) <- NULL
tmp <- Gamma
colnames(tmp) <- NULL
p1 <- epiNEM::HeatmapOP(tmp2, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL)
p2 <- epiNEM::HeatmapOP(tmp, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL)
print(p1, position=c(0, .5, 1, 1), more=TRUE)
print(p2, position=c(0, 0, 1, .5))
sum(tmp == 1 & tmp2 == 1)/sum(tmp == 1)
dev.off()

## new figure:

P <- t(mytc(uresf$res$adj))%*%uresf$Gamma
rownames(P) <- rownames(uresf$Gamma)
PM <- P
PM[which(PM > 1/6 & Gamma == 1)] <- P[which(PM > 1/6 & Gamma == 1)] + 1
PM[which(PM <= 1/6 & Gamma == 1)] <- P[which(PM <= 1/6 & Gamma == 1)] - 1
epiNEM::HeatmapOP(PM, bordercol = rgb(0,0,0,0), col = "RdYlBu", breaks = seq(-1,2,length.out=5), clusterx = tmp2)

## other plots:

pdf("temp.pdf", width = 10, height = 10)
plotDnf(c("M1=M4", "M2=M4", "M3=M5", "M1=M5"), edgelwd = 2)
dev.off()

M <- matrix(0, 5, 10)
rownames(M) <- paste0("M", 1:5)
colnames(M) <- paste0("S", 1:10)

M[1, 1:4] <- 1
M[2, c(2,7:9)] <- 1
M[3, 5:6] <- 1
M[3, 10] <- 1

phi <- matrix(0, 5, 5)
diag(phi) <- 1
phi[1, 1:5] <- 1
phi[2, 3] <- phi[4, 5] <- 1

## M <- t(phi)%*%M; M[M > 1] <- 1

rownames(M) <- paste0("M", 1:5)

pdf("temp.pdf", width = 8, height = 4)
epiNEM::HeatmapOP(M, Colv = 0, Rowv = 0, col = "RdBu", colorkey = NULL)
dev.off()

## check for mutation type (das führt zu nichts)

colnames(Mtype) <- unlist(lapply(strsplit(colnames(Mtype), "-"), function(x) {
    y <- x[1:3]
    y <- paste(c(y, "01"), collapse = "-")
    return(y)
    }))

checkgene <- "CBFB"

van <- intersect(colnames(Gamma)[which(PM[checkgene, ] < 0)], colnames(Mtype))

A <- sum(unlist(lapply(strsplit(Mtype[checkgene, van], " "), function(x) if ("Silent" %in% x) { return(1) } else { return(0) })))
B <- length(van) - A

van <- intersect(colnames(Gamma)[which(PM[checkgene, ] > 0)], colnames(Mtype))

C <- sum(unlist(lapply(strsplit(Mtype[checkgene, van], " "), function(x) if ("Silent" %in% x) { return(1) } else { return(0) })))
D <- length(van) - C

table(unlist(lapply(strsplit(Mtype[checkgene, van], " "), function(x) return(names(table(x))[which.max(table(x))]))))

## table(unlist(strsplit(as.vector(Mtype), " ")))

## pdf("Fig10.pdf", width = 10, height = 10)
## p1 <- epiNEM::HeatmapOP(tmp4, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL)
## p2 <- epiNEM::HeatmapOP(tmp3, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL)
## p3 <- epiNEM::HeatmapOP(tmp2, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL)
## p4 <- epiNEM::HeatmapOP(tmp, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL)
## print(p1, position=c(0, .5, .5, 1), more=TRUE)
## print(p2, position=c(0, 0, .5, .5), more=TRUE)
## print(p3, position=c(.5, .5, 1, 1), more=TRUE)
## print(p4, position=c(.5, 0, 1, .5))
## sum(tmp == 1 & tmp2 == 1)/sum(tmp == 1)
## dev.off()

source("~/Documents/testing/R/nempi.r")
source("~/Documents/testing/R/nempi_low.r")

bsres <- unembs(D2, Gamma = Gamma, complete = 1, full = TRUE, converged = converged, combi = combi, bsruns = 10, bssize = 0.5)

pdf("temp.pdf", width = 10, height = 5)
epiNEM::HeatmapOP(bsres$Gamma, bordercol = rgb(0,0,0,0), col = "RdBu", colorkey = NULL)
dev.off()

pdf("temp.pdf", width = 20, height = 10)
par(mfrow=c(1,2))
tmp <- bsres$phi
tmp[which(tmp > 0)] <- 1
diag(tmp) <- 0
freqtop <- as.vector(t(bsres$phi)[which(lower.tri(bsres$phi) == TRUE)])/10
freqtop <- freqtop[which(freqtop != 0)]
tmptop <- tmp
tmptop[lower.tri(tmptop)] <- 0
tmptop <- adj2dnf(tmptop)
tmptop <- tmptop[grep("=", tmptop)]
plotDnf(tmptop, edgelwd = 2, edgelab = freqtop, freq = freqtop)
freqbot <- as.vector(t(bsres$phi)[which(upper.tri(bsres$phi) == TRUE)])/10
freqbot <- freqbot[which(freqbot != 0)]
tmpbot <- tmp
tmpbot[upper.tri(tmpbot)] <- 0
tmpbot <- adj2dnf(tmpbot)
tmpbot <- tmpbot[grep("=", tmpbot)]
plotDnf(tmpbot, edgelwd = 2, edgelab = freqbot, freq = freqbot)
dev.off()

##

source("testing/vignettes/TCGA_cluster.r")

pcares <- prcomp(tmp)

library(naturalsort)
library(cluster)
library(Rcpp)

source("~/Documents/mnem/R/mnems.r")
source("~/Documents/mnem/R/mnems_low.r")
sourceCpp("~/Documents/mnem/src/mm.cpp")

res <- mnem(tmp, starts = 10, search = "greedy", type = "cluster3", complete = 1, multi = 1)

tmp2 <- tmp
Rprof("temp.txt", line.profiling=TRUE)
res <- mnem(tmp2, starts = 10, search = "greedy", type = "cluster3", complete = 1, multi = 1, k = 2)
Rprof(NULL)
summaryRprof("temp.txt", lines = "show")$sampling.time
head(summaryRprof("temp.txt", lines = "show")$by.self)

## resk <- mnemk(tmp2, starts = 10, search = "estimate", type = "cluster3", complete = 1, multi = 1)

cluster <- apply(getAffinity(res$probs, mw = res$mw, complete = TRUE), 2, which.max)

par(mfrow=c(1,2))
plot(pcares$rotation[, 1:2], col = cluster)

names(cluster) <- gsub("-01$|-03$", "", colnames(M))

cluster <- cluster[which(names(cluster) %in% clinical$submitter_id)]

cluster <- cluster[order(names(cluster))]

clinical <- rbind(clinical, clinical[which(clinical[, 1] %in% names(cluster)[which(duplicated(names(cluster)))]), ])

clinical <- clinical[order(clinical[, 1]), ]

print(all(clinical[, 1] == names(cluster)))

fit <- survfit(Surv(days_to_death, vital_status) ~ cluster, clinical)
plot(fit, col = 1:length(table(cluster)), lty = 1:length(table(cluster)))
legend(max(clinical$days_to_death, na.rm = TRUE), 1, 1:length(table(cluster)), lty = 1:length(table(cluster)), col = 1:length(table(cluster)), xjust = 1, yjust = 1)

fit <- coxph(Surv(days_to_death, vital_status) ~ cluster + age_at_diagnosis, clinical)
print(fit)

fit <- coxph(Surv(days_to_death, vital_status) ~ cluster + age_at_diagnosis + tumor_stage, clinical)
print(fit)

fit <- coxph(Surv(days_to_death, vital_status) ~ cluster + tumor_stage, clinical)
print(fit)

fit <- coxph(Surv(days_to_death, vital_status) ~ cluster, clinical)
print(fit)

kres <- clustNEM(tmp, nem = 0, k = length(res$comp), nstart = 10)

## kres <- clustNEM(tmp, nem = 0, nstart = 10)

plot(pcares$rotation[, 1:2], col = kres$cluster)

kcluster <- kres$cluster

names(kcluster) <- gsub("-01$|-03$", "", colnames(M))

kcluster <- kcluster[which(names(kcluster) %in% clinical$submitter_id)]

kcluster <- kcluster[order(names(kcluster))]

fit <- survfit(Surv(days_to_death, vital_status) ~ kcluster, clinical)
plot(fit, col = 1:length(table(cluster)), lty = 1:length(table(cluster)))
legend(max(clinical$days_to_death, na.rm = TRUE), 1, 1:length(table(cluster)), lty = 1:length(table(cluster)), col = 1:length(table(cluster)), xjust = 1, yjust = 1)

fit <- coxph(Surv(days_to_death, vital_status) ~ kcluster + age_at_diagnosis, clinical)
print(fit)

fit <- coxph(Surv(days_to_death, vital_status) ~ kcluster, clinical)
print(fit)
cbg-ethz/nempi documentation built on Nov. 9, 2023, 3:46 p.m.