Nothing
## ----eval=FALSE, echo=TRUE-----------------------------------------------
# install.packages("pacman")
# pacman::p_install_gh("sahirbhatnagar/eclust")
## ----setup, message=FALSE, echo=FALSE------------------------------------
library(knitr)
library(eclust)
library(data.table)
options(scipen = 1, digits = 5)
## ---- echo=FALSE---------------------------------------------------------
pander::pander(data.frame(`function name` = grep("^r_",pacman::p_functions("eclust"), value = T)))
## ---- echo=FALSE---------------------------------------------------------
pander::pander(data.frame(`function name` = grep("^s_",pacman::p_functions("eclust"), value = T)))
## ---- echo=FALSE---------------------------------------------------------
pander::pander(data.frame(`function name` = grep("^u_",pacman::p_functions("eclust"), value = T)))
## ---- eval = TRUE--------------------------------------------------------
# load the data
data("tcgaov")
tcgaov[1:5,1:6, with = FALSE]
# use log survival as the response
Y <- log(tcgaov[["OS"]])
# specify the environment variable
E <- tcgaov[["E"]]
# specify the matrix of genes only
genes <- as.matrix(tcgaov[,-c("OS","rn","subtype","E","status"),with = FALSE])
# for this example the training set will be all subjects.
# change `p` argument to create a train and test set.
trainIndex <- drop(caret::createDataPartition(Y, p = 1, list = FALSE, times = 1))
testIndex <- trainIndex
## ------------------------------------------------------------------------
cluster_res <- r_cluster_data(data = genes,
response = Y,
exposure = E,
train_index = trainIndex,
test_index = testIndex,
cluster_distance = "corr",
eclust_distance = "diffcorr",
measure_distance = "euclidean",
clustMethod = "hclust",
cutMethod = "dynamic",
method = "average",
nPC = 1,
minimum_cluster_size = 30)
# the number of clusters determined by the similarity matrices specified
# in the cluster_distance and eclust_distance arguments. This will always be larger
# than cluster_res$clustersAll$nclusters which is based on the similarity matrix
# specified in the cluster_distance argument
cluster_res$clustersAddon$nclusters
# the number of clusters determined by the similarity matrices specified
# in the cluster_distance argument only
cluster_res$clustersAll$nclusters
# what's in the cluster_res object
names(cluster_res)
## ------------------------------------------------------------------------
# prepare data for use with earth function
avg_eclust_interaction <- r_prepare_data(data = cbind(cluster_res$clustersAddon$averageExpr,
Y = Y[trainIndex],
E = E[trainIndex]),
response = "Y", exposure = "E")
head(avg_eclust_interaction[["X"]])
## ------------------------------------------------------------------------
# install and load earth package
pacman::p_load(char = "earth")
fit_earth <- earth::earth(x = avg_eclust_interaction[["X"]], y = avg_eclust_interaction[["Y"]],
pmethod = "backward",
keepxy = TRUE, degree = 2, trace = 1, nk = 1000)
coef(fit_earth)
## ------------------------------------------------------------------------
u_extract_selected_earth(fit_earth)
## ------------------------------------------------------------------------
# Genes in cluster 5
cluster_res$clustersAddonMembership[cluster %in% 5]
# variable importance
earth::evimp(fit_earth)
## ------------------------------------------------------------------------
pacman::p_load(eclust)
d0 <- s_modules(n = 100, p = 1000, rho = 0, exposed = FALSE,
modProportions = c(0.15,0.15,0.15,0.15,0.15,0.25),
minCor = 0.01,
maxCor = 1,
corPower = 1,
propNegativeCor = 0.3,
backgroundNoise = 0.5,
signed = FALSE,
leaveOut = 1:4)
d1 <- s_modules(n = 100, p = 1000, rho = 0.9, exposed = TRUE,
modProportions = c(0.15,0.15,0.15,0.15,0.15,0.25),
minCor = 0.4,
maxCor = 1,
corPower = 0.3,
propNegativeCor = 0.3,
backgroundNoise = 0.5,
signed = FALSE)
# get the true cluster labels
truemodule1 <- d1$setLabels
table(truemodule1)
## ------------------------------------------------------------------------
pacman::p_load(magrittr)
X <- rbind(d0$datExpr, d1$datExpr) %>%
magrittr::set_colnames(paste0("Gene", 1:1000)) %>%
magrittr::set_rownames(paste0("Subject",1:200))
## ---- fig.show='hold', tidy=FALSE----------------------------------------
pacman::p_load(pheatmap)
pacman::p_load(viridis)
pheatmap::pheatmap(cor(X[1:100,]),
show_rownames = F,
show_colnames = F,
color = viridis(100))
pheatmap::pheatmap(cor(X[101:200,]),
show_rownames = F,
show_colnames = F,
color = viridis(100))
## ---- eval=T-------------------------------------------------------------
betaMainEffect <- vector("double", length = 1000)
betaMainInteractions <- vector("double", length = 1000)
# the first 25 in the 3rd block are active
betaMainEffect[which(truemodule1 %in% 3)[1:50]] <- runif(50, 0.9, 1.1)
# the first 25 in the 4th block are active
betaMainEffect[which(truemodule1 %in% 4)[1:50]] <- runif(50, 0.9, 1.1)
# the interaction effects
betaMainInteractions[which(betaMainEffect!=0)] <- runif(50, 0.4, 0.6)
# the environment effect
betaE <- 2
# the total beta vector
beta <- c(betaMainEffect, betaE, betaMainInteractions)
## ------------------------------------------------------------------------
result <- s_generate_data(p = 1000,
X = X,
beta = beta,
include_interaction = TRUE,
cluster_distance = "tom",
n = 200,
n0 = 100,
eclust_distance = "difftom",
signal_to_noise_ratio = 1,
distance_method = "euclidean",
cluster_method = "hclust",
cut_method = "dynamic",
agglomeration_method = "average",
nPC = 1)
names(result)
## ---- echo=FALSE---------------------------------------------------------
pander::pander(data.frame(`function name` = c("s_pen_separate", "s_pen_clust", "s_mars_separate", "s_mars_clust"),
`General Approach` = rep(c("SEPARATE","CLUST, ECLUST"), 2),
`model` = c(rep(c("lasso, elasticnet, mcp, scad"), 2), rep("MARS",2))))
## ------------------------------------------------------------------------
# Provide ECLUST clustering results to the gene_groups argument
pen_res <- s_pen_clust(x_train = result[["X_train"]],
x_test = result[["X_test"]],
y_train = result[["Y_train"]],
y_test = result[["Y_test"]],
s0 = result[["S0"]],
gene_groups = result[["clustersAddon"]],
summary = "pc",
model = "lasso",
exp_family = "gaussian",
clust_type = "ECLUST",
include_interaction = TRUE)
unlist(pen_res)
## ---- echo=FALSE---------------------------------------------------------
pander::pander(data.frame(`object name` = c("tom_train_all","tom_train_diff","tom_train_e1","tom_train_e0","corr_train_all","corr_train_diff","corr_train_e1","corr_train_e0","fisherScore","corScor")))
## ------------------------------------------------------------------------
# check that the object is of class similarity
class(result$tom_train_e1)
# get clustering tree
hc <- hclust(as.dist(1 - result$tom_train_e1), method = "average")
plot(result$tom_train_e1,
truemodule = truemodule1,
cluster_rows = hc,
cluster_cols = hc,
active = as.numeric(betaMainEffect!=0))
## ------------------------------------------------------------------------
class(cluster_res)
## ------------------------------------------------------------------------
plot(cluster_res, show_column_names = FALSE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.