library(eclust)
library(magrittr)
parametersDf <- expand.grid(rho = c(0.90),
p = c(500),
# SNR = c(0.2,1,2),
SNR = 1,
n = c(100), # this is the total train + test sample size
# nActive = c(300), # must be even because its being split among two modules
#n0 = 200,
cluster_distance = c("tom","corr"),
Ecluster_distance = c("difftom", "diffcorr"),
rhoOther = 0.6,
betaMean = c(2),
alphaMean = c(1),
betaE = 3,
includeInteraction = FALSE,
includeStability = TRUE,
distanceMethod = "euclidean",
clustMethod = "hclust",
#cutMethod = "gap",
cutMethod = "dynamic",
agglomerationMethod = "average",
# agglomerationMethod = "ward.D",
K.max = 10, B = 10, stringsAsFactors = FALSE)
parametersDf <- transform(parametersDf, n0 = n/2, nActive = p*0.10)
parametersDf <- parametersDf[which(parametersDf$cluster_distance=="tom" & parametersDf$Ecluster_distance=="difftom" |
parametersDf$cluster_distance=="corr" & parametersDf$Ecluster_distance=="diffcorr"),]
parameterIndex = 2
simulationParameters <- parametersDf[parameterIndex,, drop = F]
print(simulationParameters)
## ---- generate-data ----
message("generating data")
p <- simulationParameters[,"p"];
n <- simulationParameters[,"n"];
n0 <- simulationParameters[,"n0"];
SNR <- simulationParameters[,"SNR"]
n1 <- n - n0
cluster_distance <- simulationParameters[,"cluster_distance"]
Ecluster_distance <- simulationParameters[,"Ecluster_distance"]
rhoOther <- simulationParameters[,"rhoOther"];
betaMean <- simulationParameters[,"betaMean"];
betaE <- simulationParameters[,"betaE"]
alphaMean <- simulationParameters[,"alphaMean"];
rho <- simulationParameters[,"rho"];
nActive <- simulationParameters[,"nActive"];
includeInteraction <- simulationParameters[,"includeInteraction"]
includeStability <- simulationParameters[,"includeStability"]
distanceMethod <- simulationParameters[,"distanceMethod"]
clustMethod <- simulationParameters[,"clustMethod"]
cutMethod <- simulationParameters[,"cutMethod"]
agglomerationMethod <- simulationParameters[,"agglomerationMethod"]
K.max <- simulationParameters[,"K.max"]
B <- simulationParameters[,"B"]
# in this simulation its blocks 3 and 4 that are important
# leaveOut: optional specification of modules that should be left out
# of the simulation, that is their genes will be simulated as unrelated
# ("grey"). This can be useful when simulating several sets, in some which a module
# is present while in others it is absent.
d0 <- s_modules(n = n0, p = p, rho = c(0,0), exposed = FALSE,
modProportions = c(0.15,0.15,0.15,0.15,0.15,0.25),
minCor = 0.01,
maxCor = 1,
corPower = 1,
propNegativeCor = 0.3,
backgroundNoise = 0.5,
signed = FALSE,
leaveOut = 1:4)
d1 <- s_modules(n = n1, p = p, rho = c(rho, rho), exposed = TRUE,
modProportions = c(0.15,0.15,0.15,0.15,0.15,0.25),
minCor = 0.4,
maxCor = 1,
corPower = 0.3,
propNegativeCor = 0.3,
backgroundNoise = 0.5,
signed = FALSE)
# these should be the same. if they arent, its because I removed the red and
# green modules from the E=0 group
truemodule0 <- d0$setLabels
t0 <- table(truemodule0)
truemodule1 <- d1$setLabels
t1 <- table(truemodule1)
table(truemodule0,truemodule1)
# Convert labels to colors for plotting
moduleColors <- WGCNA::labels2colors(truemodule1)
table(moduleColors, truemodule1)
# does not contain E, needs to bind unexposed first
X <- rbind(d0$datExpr, d1$datExpr) %>%
magrittr::set_colnames(paste0("Gene", 1:p)) %>%
magrittr::set_rownames(paste0("Subject",1:n))
dim(X)
# pheatmap::pheatmap(cor(X))
# pheatmap::pheatmap(cor(d1$datExpr))
# pheatmap::pheatmap(cor(d0$datExpr))
# pheatmap(cor(d1$datExpr)-cor(d0$datExpr))
# pheatmap(WGCNA::TOMsimilarityFromExpr(X))
# pheatmap(WGCNA::TOMsimilarityFromExpr(d1$datExpr))
# pheatmap(WGCNA::TOMsimilarityFromExpr(d1$datExpr)-WGCNA::TOMsimilarityFromExpr(d0$datExpr))
# betaMainEffect <- vector("double", length = p)
# betaMainInteractions <- vector("double", length = p)
# # first assign random uniform to every gene in cluster 3 and 4,
# # then randomly remove so that thers only nActive left
# betaMainEffect[which(truemodule1 %in% 3:4)] <- runif(sum(truemodule1 %in% 3:4),
# betaMean - 0.1, betaMean + 0.1)
#
# # randomly set some coefficients to 0 so that there are only nActive non zero
# betaMainEffect <- replace(betaMainEffect,
# sample(which(truemodule1 %in% 3:4), sum(truemodule1 %in% 3:4) - nActive,
# replace = FALSE), 0)
#
# betaMainInteractions[which(betaMainEffect!=0)] <- runif(nActive, alphaMean - 0.1, alphaMean + 0.1)
#
# beta <- c(betaMainEffect,
# betaE,
# betaMainInteractions)
# plot(beta)
betaMainEffect <- vector("double", length = p)
# betaMainInteractions <- vector("double", length = p)
# the first nActive/2 in the 3rd block are active
betaMainEffect[which(truemodule1 %in% 3)[1:(nActive/2)]] <- runif(
nActive/2, betaMean - 0.1, betaMean + 0.1)
# the first nActive/2 in the 4th block are active
betaMainEffect[which(truemodule1 %in% 4)[1:(nActive/2)]] <- runif(
nActive/2, betaMean+2 - 0.1, betaMean+2 + 0.1)
# betaMainInteractions[which(betaMainEffect!=0)] <- runif(nActive, alphaMean - 0.1, alphaMean + 0.1)
# must be in this order!!!! main effects, E, and then interactions... this order is being used
# by the generate_data function
beta <- c(betaMainEffect,
betaE)#,betaMainInteractions)
plot(beta)
simulated_data <- s_generate_data(p = p,
X = X,
beta = beta,
include_interaction = includeInteraction,
cluster_distance = cluster_distance,
n = n,
n0 = n0,
eclust_distance = Ecluster_distance,
signal_to_noise_ratio = SNR,
distance_method = distanceMethod,
cluster_method = clustMethod,
cut_method = cutMethod,
agglomeration_method = agglomerationMethod,
K.max = K.max, B = B, nPC = 1)
# devtools::use_data(simulated_data)
simulated_data$similarity %>% dim
simulated_data$DT %>% dim
simulated_data$DT %>% str
pryr::object_size(simulated_data$DT)
# pryr::object_size(simulated_data$DT[,1:1002])
#
# pryr::object_size(simulated_data$DT)
simdata <- simulated_data$DT[,c(1,502,2:501)]
simdata[1:5,1:5]
simdata %>% dim
devtools::use_data(simdata, overwrite = TRUE)
devtools::check()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.