library(parallel)
library(igraph)
library(matrixcalc)
library(MASS)
library(QUIC)
library(Hmisc)
library(robustbase)
library(GENIE3)
library(fitdistrplus)
library(ZIM)
library(ggplot2); theme_set(theme_bw())
library(tidyr)
library(dplyr)
library(zoo)
library(VGAM)
library(corpcor)
library(Matrix)
files.sources = list.files("./pcorSimulator", full.names = TRUE)
sapply(files.sources, source)
source("funs-get-res.R")
source("get_mix_parameters.R")
rdir = "./"
dir.create(rdir, recursive = TRUE)
### ------------------------------------------------------------------
### simulate data
### ------------------------------------------------------------------
nc = 100 # number of cells
B = 30
ncores = 6
thre_no = 10
simulate_hub = function(p, nc = 100, nclust, rho = 0.3,
pattern = "powerLaw",
seed = 1, pdf = TRUE, path, ...){
set.seed(seed)
sim_cor = pcorSimulatorSimple(nobs = 50, nclusters=nclust,
nnodesxcluster=rep(p,nclust),
pattern=pattern, ...)
theta = sim_cor$omega
sum(abs(theta) < 1e-5)
pars = readRDS("~/Box/scSimilarity/code/suppfuns/mixpara.rds")
pars = pars[complete.cases(pars), ]
pars = pars[, c("rate", "mu","sigma")]
pars = pars[order(pars[,"mu"],decreasing = TRUE)[1:1000], ]
pars_selected = pars[sample(1:1000, p*nclust), ]
sigma = pars_selected[, "sigma"]
cov = solve(theta)
dem = sqrt(diag(cov))
cov = sweep(cov, MARGIN = 2, dem, FUN = "/")
cov = sweep(cov, MARGIN = 1, dem, FUN = "/")
cov = sweep(cov, MARGIN = 2, sigma, FUN = "*")
cov = sweep(cov, MARGIN = 1, sigma, FUN = "*")
theta = solve(cov)
theta[abs(theta) < 1e-5] = 0
if(!is.positive.definite(theta)) stop("precision matrix is not PD!")
adj_true = ((theta)!=0)*1
if(pdf == TRUE){
g = graph_from_adjacency_matrix(adj_true, mode = "undirected", diag = FALSE, weighted = TRUE)
cols = rep("white", length(V(g)))
V(g)$color = cols
ecols = rep("#5C88DA", length(E(g)))
# ecols[E(g)$weight < 3] = "#CC0C00" # false negative
E(g)$color = ecols
ly = layout_nicely(g)
pdf(path, width = 6, height = 6)
# plot(g, vertex.label.dist=0, vertex.label.color="black", vertex.label=NA,
# vertex.cex = 2, edge.width = 2, layout = ly)
plot(g, vertex.label=NA, vertex.size = 7, edge.width = 3, layout = ly)
dev.off()
}
### simulate count matrices for all time points -----------------------
mu = pars_selected[, "mu"]
cnt = mvrnorm(n = nc, mu = mu, Sigma = cov)
cnt[cnt < 0] = 0
cnt_zero = cnt
droprate = exp(-rho * cnt^2)
dropind = rbinom(prod(dim(droprate)), size = 1, prob = as.vector(droprate))
dropvals = sample(c(0,log10(2)), sum(dropind == 1), prob = c(0.7,0.3), replace = TRUE)
cnt_zero[dropind == 1] = dropvals
# cnt_zero = cnt
# droprate = exp(-rho * mu^2)
# for(gid in 1:(p*nclust)){
# ntp = ceiling(nc*droprate[gid])
# cnt_zero[sample(1:nc, ntp),gid] = sample(c(0,log10(2)), ntp, prob = c(0.7,0.3), replace = TRUE)
# }
return(list(cov_true = cov, theta_true = theta,
adj_true = adj_true, count = cnt, countd = cnt_zero))
}
theta2pcor = function(theta){
denom = sqrt(diag(theta))
pcor = sweep(theta, MARGIN = 1, denom, FUN = "/")
pcor = sweep(pcor, MARGIN = 2, denom, FUN = "/")
return(-pcor)
}
nr = 10
l1 = seq(0.5, 0.001, length=nr)
p = 10
nclustvals = c(10, 20, 30)
rhovals = c(0.07, 0.10, 0.13, 0.16)
### simulate data
sim_list = lapply(nclustvals, function(nclust){
print(paste("nc", nclust))
tp_res_list = lapply(rhovals, function(rho){
print(paste("rho", rho))
rep = mclapply(1:B, function(b){
if(b %% 10 == 0) print(paste("rep", b)); gc()
### simulate data ---------------------------------
sim_data = simulate_hub(p = p, nc = nc, nclust = nclust, rho = rho, seed = b,
pattern = "powerLaw", low.strength = 0.4, sup.strength = 0.7,
pdf = FALSE)
# pdfpath = paste0(rdir, "graph-nc", nclust, "-b-", b, ".pdf")
# if(rho == 0.1){
# sim_data = simulate_hub(p = p, nc = nc, nclust = nclust, rho = rho, seed = b,
# pattern = "powerLaw", low.strength = 0.4, sup.strength = 0.7,
# pdf = TRUE, path = pdfpath)
# }else{
# }
# countf = sim_data$count
countd = sim_data$countd
# save countd
saveda = t(countd)
colnames(saveda) = paste0("cell", 1:ncol(saveda))
write.table(saveda, file = paste0(rdir, "graph-nc", nclust, "-rho-", rho, "-b-", b, ".txt"),
quote = FALSE)
return(sim_data)
}, mc.cores = ncores)
saveRDS(rep, file = paste0(rdir, "rep-nc", nclust, "-rho", rho, ".rds"))
gc()
return(rep)
})
return(0)
})
### ------------------------------------------------------------------
### Network Inference
### ------------------------------------------------------------------
res_list = lapply(nclustvals, function(nclust){
print(paste("nc", nclust))
tp_res_list = lapply(rhovals, function(rho){
print(paste("rho", rho))
simdata = readRDS(paste0(rdir, "rep-nc", nclust, "-rho", rho, ".rds"))
rep = mclapply(1:B, function(b){
# if(b %% 10 == 0)
print(paste("rep", b)); gc()
### simulate data ---------------------------------
sim_data = simdata[[b]]
countf = sim_data$count
countd = sim_data$countd
adj_true = sim_data$adj_true
theta_true = sim_data$theta_true
# pcor_true = theta2pcor(theta_true)
epercent = mean(adj_true[upper.tri(adj_true)])
zpercent = mean(countd == 0) - mean(countf == 0)
### estimate covariance matrices
cor_pearson = cor(countd)
x.q=apply(countd,2,sd)
cov_pearson=diag(x.q)%*%cor_pearson%*%diag(x.q)
nobs = nrow(countd)
cor_pearson.c = est_pearson.c(countd, cor.p = cor_pearson, thre_no = 10)
### estimate precision matrices
aucscore = t(sapply(1:length(methods), function(k){
method = methods[k]
# print(method)
if(method == "scLink"){
weights_p = 1-abs(cor_pearson.c)
cov_pearson.c=diag(x.q)%*%cor_pearson.c%*%diag(x.q)
cov_pearson.c = easy.psd(cov_pearson.c,method="perturb")
res_seq = get_res_wGL_quic(cov_pearson.c, weights_p, nobs, l1, adj_true)
}
if(method == "glasso"){res_seq = get_res_GL(cov_pearson, nobs, l1, adj_true)}
if(method == "pearson"){
res_seq = get_res_cors(countd, adj_true, method = method,
cmat = cor_pearson, thre_no = thre_no)
}
if(method == "spearman"){
res_seq = get_res_cors(countd, adj_true, method = method, thre_no = thre_no)
}
if(method == "PIDC"){
wpath = paste0(rdir, "PIDC/weight-p", p, "-rho-", rho, "-b-", b, ".txt")
res_seq = get_res_pidc(wpath, adj_true)
}
if(method == "GENIE3"){
res_seq = get_res_genie3(countd, adj_true)
}
# theta_list = res_seq$theta_list
accuracy = res_seq$accuracy
colnames(accuracy) = c("nE", "bic", "precision", "recall", "FP", "lambda1")
accuracy = as.data.frame(accuracy)
accuracy = accuracy[complete.cases(accuracy), , drop = FALSE]
if(nrow(accuracy) < 2) return(c(NA, NA))
acc = accuracy
tp = acc[, c("precision","recall")]
tp = tp[order(tp[,1]), ]
tp = rbind(c(tp[1,1],1), tp)
tp = rbind(tp, c(1,0))
AUPR = sum(diff(tp[,1])*rollmean(tp[,2],2))
tp = acc[, c("FP","recall")]
tp = tp[order(tp[,1]), ]
tp = rbind(c(0,0), tp)
tp = rbind(tp, c(1,1))
AUROC = sum(diff(tp[,1])*rollmean(tp[,2],2))
return(c(AUPR, AUROC))
}))
colnames(aucscore) = c("AUPR", "AUROC")
aucscore = as.data.frame(aucscore)
aucscore$method = methods
aucscore$b = b
aucscore$rho = rho
aucscore$p = p
aucscore$zpercent = zpercent
aucscore$epercent = epercent
return(aucscore)
}, mc.cores = ncores)
rep = Reduce(rbind, rep)
saveRDS(rep, file = paste0(rdir, "est-nc", nclust, "-rho", rho, ".rds"))
gc()
return(rep)
})
tp_res = Reduce(rbind, tp_res_list)
return(tp_res)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.