Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,eval = T,
comment = "#>"
)
## ----eval = FALSE-------------------------------------------------------------
# install.packages("penalizedclr")
## ----eval = FALSE-------------------------------------------------------------
# library(devtools)
# install_github("veradjordjilovic/penalizedclr")
## ----setup--------------------------------------------------------------------
library(penalizedclr)
## ----message=FALSE, warning=FALSE---------------------------------------------
set.seed(1234)
library(tidyverse)
## -----------------------------------------------------------------------------
# two groups of predictors
p <- c(12, 40)
# percentage and number of non-null variables
p_nz <- c(0.5, 0.2)
m_nz <- round(p*p_nz, 0)
# number of different strata (case-control pairs)
K <- 100
# number of cases and controls in each stratum (not necessarily 1:1 matching,
# other designs are also allowed)
n_cases <- 1
n_ctrl <- 1
# generating covariates
X = cbind(matrix(rnorm(p[1] * K * (n_cases + n_ctrl), 0, 1), ncol = p[1]),
matrix(rnorm(p[2] * K * (n_cases + n_ctrl), 0, 4), ncol = p[2]))
# coefficients
beta <- as.matrix(c(rnorm(m_nz[1], 4, 1),
rep(0, p[1] - m_nz[1]),
rnorm(m_nz[2], 2, 0.8),
rep(0, p[2] - m_nz[2])), ncol = 1)
# stratum membership
stratum <- rep(1:K, each= n_cases+n_ctrl)
# linear predictor
lin_pred <- X %*% beta
prob_case <- exp(lin_pred) / (1 + exp(lin_pred))
# generate the response
Y <- rep(0, length(stratum))
data_sim <- as_tibble(data.frame(stratum = stratum,
probability = prob_case,
obs_id = 1 : length(stratum)))
data_sim_cases <- data_sim %>%
group_by(stratum)%>%
sample_n(n_cases, weight = probability)
Y[data_sim_cases$obs_id] <- 1
## ----results = 'hide'---------------------------------------------------------
fit1 <- penalized.clr(response = Y, penalized = X, stratum = stratum,
lambda = c(1,4), p = p, standardize = TRUE)
## -----------------------------------------------------------------------------
str(fit1)
nonzero_index <- (beta != 0) * 1 #index of nonzero coefficients
table(fitted = (fit1$penalized != 0) * 1, nonzero_index)
## ----results = 'hide'---------------------------------------------------------
fit2 <- penalized.clr(response = Y, penalized = X, stratum = stratum,
lambda = c(1,4),p = p, standardize = TRUE, alpha = 0.6)
## -----------------------------------------------------------------------------
stable1 <- stable.clr(response = Y, B = 50,
penalized = X, stratum = stratum,
lambda.seq = c(1, 4, 6), parallel = TRUE)
## -----------------------------------------------------------------------------
hist(stable1$Pistab, xlab = "Selection probability", ylab="Frequency")
## -----------------------------------------------------------------------------
(tab1 <- table(stable = (stable1$P>0.65) * 1, nonzero_index))
(tab1[1,1] + tab1[2,2])/(sum(tab1))
## -----------------------------------------------------------------------------
# plotting selection probabilities for true non-zero (red) and zero (blue) coefficients
s_prob_nonzero <- cut(stable1$P[nonzero_index == 1],
breaks = seq(0,1, by = 0.1), ordered_result = T)
s_prob_zero <- cut(stable1$P[nonzero_index == 0],
breaks = seq(0,1, by = 0.1), ordered_result = T)
barplot(table(s_prob_nonzero), col = 2)
barplot(table(s_prob_zero), add =T, col = 4 )
## ----eval= T------------------------------------------------------------------
stable2 <- penalizedclr::stable.clr.g(response = Y, p = p,
standardize = TRUE,
penalized = X, stratum = stratum,
lambda.list = list(c(4,1), c(1,4)))
## ----echo = FALSE, results = 'hide'-------------------------------------------
# histogram of selection probabilities
#hist(stable2$P, xlab = "Selection probability", ylab="Frequency")
# table of true status vs. selection status
(tab2 <- table(stable = (stable2$P>0.65) * 1, nonzero_index))
# classification accuracy
(tab2[1,1] + tab2[2,2])/(sum(tab2))
## ---- echo = F----------------------------------------------------------------
# plotting selection probabilities for true non-zero (red) and zero (blue) coefficients
s_prob_nonzero <- cut(stable2$P[nonzero_index == 1],
breaks = seq(0,1, by = 0.1),
ordered_result = T)
s_prob_zero <- cut(stable2$P[nonzero_index == 0],
breaks = seq(0,1, by = 0.1),
ordered_result = T)
barplot(table(s_prob_nonzero), col = 2)
barplot(table(s_prob_zero), add = T, col = 4 )
## ---- eval = T----------------------------------------------------------------
(pf <- default.pf(response = Y, stratum = stratum, penalized = X,
nfolds = 10, alpha = 0.3,
standardize = TRUE, p = p, type.step1 = "comb"))
(lambda <- find.default.lambda(response = Y, penalized = X,
standardize = TRUE, stratum = stratum,
p = p, pf.list = pf))
## ----eval= T------------------------------------------------------------------
stable3 <- stable.clr.g(response = Y, p = p,
standardize = TRUE,
penalized = X, stratum = stratum,
lambda.list = list(c(pf[[1]][1]*as.numeric(lambda), pf[[1]][2]*as.numeric(lambda))))
## ---- echo = F----------------------------------------------------------------
# plotting selection probabilities for true non-zero (red) and zero (blue) coefficients
s_prob_nonzero <- cut(stable3$P[nonzero_index == 1],
breaks = seq(0, 1, by = 0.1), ordered_result = T)
s_prob_zero <- cut(stable3$P[nonzero_index == 0],
breaks = seq(0, 1, by = 0.1), ordered_result = T)
barplot(table(s_prob_nonzero), col = 2)
barplot(table(s_prob_zero), add = T, col = 4 )
## ----eval = T-----------------------------------------------------------------
alt.pf.list<- list(c(1,4), c(2,4), c(4,1), c(4,2), pf$pf)
alt.lambda <- find.default.lambda(response = Y, penalized = X,
standardize = TRUE, stratum = stratum,
p = p, pf.list = alt.pf.list)
lambda.matrix <- mapply(function (x,y) x*y, lambda, alt.pf.list)
lambda.list <- lapply(seq_len(ncol(lambda.matrix)), function(i) lambda.matrix[,i])
## -----------------------------------------------------------------------------
stable4 <- stable.clr.g(response = Y, p = p,
standardize = TRUE,
penalized = X, stratum = stratum,
lambda.list = lambda.list )
## ---- echo = F----------------------------------------------------------------
# plotting selection probabilities for true non-zero (red) and zero (blue) coefficients
s_prob_nonzero <- cut(stable4$P[nonzero_index == 1],
breaks = seq(0, 1, by = 0.1), ordered_result = T)
s_prob_zero <- cut(stable4$P[nonzero_index == 0],
breaks = seq(0, 1, by = 0.1), ordered_result = T)
barplot(table(s_prob_nonzero), col = 2)
barplot(table(s_prob_zero), add = T, col = 4 )
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.