Nothing
#' A simulated binomial data set.
#'
#' A data set simulated for illustrating logistic regression models. Generated by
#' \code{gen.binomial.data(n = 200, p = 20, seed = 2021)}.
#'
#' @format A list with three elements: design matrix \code{X}, response \code{y},
#' and the true coefficient vector \code{beta}.
#' \describe{
#' \item{X}{design matrix}
#' \item{y}{response}
#' \item{beta}{the true coefficient vector}
#' }
#'
#' @usage data(bin_data)
#'
#' @examples
#' data("bin_data")
#' cv.fit <- cv.glmtlp(bin_data$X, bin_data$y, family = "binomial", penalty = "l1")
#' plot(cv.fit)
#'
"bin_data"
#' A simulated gaussian data set.
#'
#' A data set simulated for illustrating linear regression models. Generated by
#' \code{gen.gaussian.data(n = 200, p = 20, seed = 2021)}.
#'
#' @format A list with five elements: design matrix \code{X}, response \code{y},
#' correlation structure of the covariates \code{Sigma}, true beta \code{beta},
#' and the noise level \code{sigma}.
#' \describe{
#' \item{X}{design matrix}
#' \item{y}{response}
#' \item{beta}{true beta values}
#' \item{sigma}{the noise level}
#' }
#'
#' @usage data(gau_data)
#'
#' @examples
#' data("gau_data")
#' cv.fit <- cv.glmtlp(gau_data$X, gau_data$y, family = "gaussian", penalty = "tlp")
#' plot(cv.fit)
#'
"gau_data"
#' Simulate a binomial data set
#'
#' @description
#' Simulate a data set with binary response following the logistic regression
#' model.
#'
#' @param n Sample size.
#' @param p Number of covariates.
#' @param rho The parameter defining the AR(1) correlation matrix.
#' @param kappa The number of nonzero coefficients.
#' @param beta.type Numeric indicator for choosing the beta type. For
#' \code{beta.type = 1}, the true coefficient vector has \code{kappa} components being 1,
#' roughly equally distributed between 1 to \code{p}. For \code{beta.type = 2},
#' the first \code{kappa} values are 1, and the rest are 0. For \code{beta.type = 3},
#' the first \code{kappa} values are equally-spaced values from 10 to 0.5, and
#' the rest are 0. For \code{beta.type = 4}, the first \code{kappa} values are
#' the first \code{kappa} values in c(-10, -6, -2, 2, 6, 10), and the rest are
#' 0. For \code{beta.type = 5}, the first \code{kappa} values are 1, and the
#' rest decay exponentially to 0 with base 0.5.
#' @param seed The seed for reproducibility. Default is 2021.
#'
#' @return A list containing the simulated data.
#' \item{X}{the covariate matrix, of dimension \code{n} x \code{p}.}
#' \item{y}{the response, of length \code{n}.}
#' \item{beta}{the true coefficients, of length \code{p}.}
#'
#' @examples
#' bin_data <- gen.binomial.data(n = 200, p = 20, seed = 2021)
#' head(bin_data$X)
#' head(bin_data$y)
#' head(bin_data$beta)
#'
#' @importFrom stats rnorm
#' @importFrom stats rbinom
#' @export gen.binomial.data
#'
gen.binomial.data <- function(n, p, rho = 0, kappa = 5, beta.type = 1, seed = 2021) {
set.seed(seed)
X <- matrix(rnorm(n * p), n, p)
if (rho != 0) {
for (j in 2:p) {
X[, j] <- sqrt(1 - rho^2) * X[, j] + rho * X[, j - 1]
}
}
beta <- gen.beta(kappa, p, beta.type)
mu <- plogis(as.numeric(X %*% beta))
y <- rbinom(n, 1, mu)
if (p > 1) {
colnames(X) <- paste("V", seq(p), sep = "")
}
list(X = X, y = y, beta = beta)
}
#' Simulate a gaussian data set
#'
#' @description
#' Simulate a data set with gaussian response following the linear regression
#' model.
#'
#' @param n Sample size.
#' @param p Number of covariates.
#' @param rho The parameter defining the AR(1) correlation matrix.
#' @param kappa The number of nonzero coefficients.
#' @param beta.type Numeric indicator for choosing the beta type. For
#' \code{beta.type = 1}, the true coefficient vector has \code{kappa} components being 1,
#' roughly equally distributed between 1 to \code{p}. For \code{beta.type = 2},
#' the first \code{kappa} values are 1, and the rest are 0. For \code{beta.type = 3},
#' the first \code{kappa} values are equally-spaced values from 10 to 0.5, and
#' the rest are 0. For \code{beta.type = 4}, the first \code{kappa} values are
#' the first \code{kappa} values in c(-10, -6, -2, 2, 6, 10), and the rest are
#' 0. For \code{beta.type = 5}, the first \code{kappa} values are 1, and the
#' rest decay exponentially to 0 with base 0.5.
#' @param snr Signal-to-noise ratio. Default is 1.
#' @param seed The seed for reproducibility. Default is 2021.
#'
#' @return A list containing the simulated data.
#' \item{X}{the covariate matrix, of dimension \code{n} x \code{p}.}
#' \item{y}{the response, of length \code{n}.}
#' \item{beta}{the true coefficients, of length \code{p}.}
#' \item{sigma}{the standard error of the noise.}
#'
#' @examples
#' gau_data <- gen.gaussian.data(n = 200, p = 20, seed = 2021)
#' head(gau_data$X)
#' head(gau_data$y)
#' head(gau_data$beta)
#' gau_data$sigma
#'
#' @importFrom stats rnorm
#' @export gen.gaussian.data
gen.gaussian.data <- function(n, p, rho=0, kappa=5, beta.type=1, snr=1, seed=2021) {
set.seed(seed)
X <- matrix(rnorm(n*p), n, p)
if (rho != 0) {
for (j in 2:p) {
X[, j] <- sqrt(1 - rho^2) * X[, j] + rho * X[, j - 1]
}
}
beta <- gen.beta(kappa, p, beta.type)
vmu <- sum((crossprod(cholesky.ar1.root(rho, p), beta))^2)
sigma <- sqrt(vmu/snr)
y <- as.numeric(X %*% beta + rnorm(n) * sigma)
if (p > 1) {
colnames(X) <- paste("V", seq(p), sep = "")
}
list(X = X, y = y, beta = beta, sigma = sigma)
}
gen.beta <- function(kappa, p, beta.type) {
kappa <- min(kappa, p)
beta <- rep(0, p)
if (beta.type == 1) {
beta[round(seq(1, p, length = kappa))] <- 1
} else if (beta.type == 2) {
beta[1:kappa] <- 1
} else if (beta.type == 3) {
beta[1:kappa] <- seq(10, 0.5, length = kappa)
} else if (beta.type == 4) {
beta[1:6] <- c(-10, -6, -2, 2, 6, 10)
} else {
beta[1:kappa] <- 1
if (kappa + 1 <= p) {
beta[(kappa + 1):p] <- 0.5^(1:(p - kappa))
}
}
beta
}
cholesky.ar1.root <- function(rho, p) {
# reference 1: https://blogs.sas.com/content/iml/2018/10/03/ar1-cholesky-root-simulation.html
# reference 2: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4455603/ (Direct formulation to Cholesky decomposition of a general nonsingular correlation matrix)
if (rho != 0) {
L <- matrix(0, nrow = p, ncol = p)
L[, 1] <- rho^(0:(p - 1))
c <- sqrt(1 - rho^2)
cL <- c * L[, 1]
for (i in 2:p) {
L[i:p, i] <- cL[1:(p - i + 1)]
}
} else {
L <- diag(1, p)
}
L
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.