##' @title Generate toy data for running GADAG2
##' @description This function generates toy data that can be used to run GADAG2: the adjacency matrix of a DAG with p nodes and the design matrix with n observations of the distribution of the p nodes.
##' @param n Number of samples in the design matrix.
##' @param p Number of nodes of the DAG.
##' @param Cov (optional) Covariance matrix for the noise variables (identity matrix by default)
##' @param type Form of the DAG. It can be chosen between 7 alternatives: \code{"star"}, \code{"bistar"}, \code{"full"}, \code{"path"}, \code{"quadristar"}, \code{"sixstar"} (see details below).
##' @param edgemin Minimal value for the non-null edges of the DAG (between 0 and 1).
##' @param seed Fix the seed.
##' @rawNamespace export(generateToyData)
##' @return A list containing the design nxp matrix X (with samples in rows and variables in columns) and the adjacency matrix G associated to the DAG with p nodes.
##' @author \packageAuthor{GADAG2}.
##' @details One of the following seven alternatives can be chosen for the DAG form:
##' \itemize{
##' \item{\code{"star"}}{ star-shaped DAG (all active edges start from node 1),}
##' \item{\code{"bistar"}}{ half of the edges start from node 1 and the other half from node 2,}
##' \item{\code{"full"}}{ full DAG (all the edges are active),}
##' \item{\code{"path"}}{ path-shaped DAG (all the nodes are connected by a single path),}
##' \item{\code{"quadristar"}}{ node 1 is connected to nodes 2 to 4, each being connected to 1/3 of the rest of the nodes,}
##' \item{\code{"sixstar"}}{ same as \code{"quadristar"}, with 6 nodes.}
##' }
##' @seealso \code{\link{GADAG2}}, \code{\link{GADAG2_Run}}.
##' @examples
##' #############################################################
##' # Generating toy data
##' #############################################################
##' toy_data <- generateToyData(n=100, p=10)
##'
##' # toy_data is a list of two matrices corresponding to a "star"
##' # DAG (node 1 activates all other nodes):
##' # - toy_data$X is a 100x10 design matrix
##' # - toy_data$G is the 10x10 adjacency matrix (ground trough)
##'
##' \dontrun{
##' # generate another type of data: a DAG with 100 nodes in a path form
##' toy_data <- generateToyData(n=100, p=100,type="path")
##' }
##'
##' \dontrun{
##' # set the minimal edge value to 1
##' toy_data <- generateToyData(n=100, p=10, edgemin=1) # all edges are set to 1
##' }
##'
##' \dontrun{
##' # non-independent noises
##' A <- matrix(runif(10^2)*2-1, ncol=10)
##' Sigma <- t(A) %*% A
##' toy_data <- generateToyData(n=100, p=10, edgemin=1, Cov = Sigma)
##' }
generateToyData <- function(n, p, edgemin=0, Cov = diag(p), type="star", seed=42) {
#-----------------------------------------------------
# Generates DAGs and associated data
# The form of the DAG can be chosen between 7 alternatives (see below)
# The intensity of the active edges is random (uniformly distributed)
#
# INPUTS
# n: number of observations
# p: number of nodes
# edgemin: minimal value for the non-null edges. Scalar in [0,1]
# Cov: covariance matrix for the noise variables
# type: "star": star-shaped DAG (all active edges start from node 1)
# "bistar": half of the edges start from node 1 and the other half from node 2
# "full": full DAG (all the edges are active)
# "path": path-shaped DAG (all the nodes are connected by a single path)
# "quadristar": node 1 is connected to nodes 2 to 4, each being connected to 1/3 of the rest of the nodes
# "sixstar": same as "quadristar", with 6 nodes
# seed: to fix the seed
# OUTPUTS
# X: matrix of observations (n*p)
# G: matrix of the DAG (p*p)
#-----------------------------------------------------
if (n < 0 || p < 0){
stop("Please choose p and n as non-negative numbers.")
}
if (n==1){
stop("You need more than one sample to run GADAG2.")
}
if (edgemin >1 || edgemin < 0){
stop("edgemin has to taken between O and 1.")
}
set.seed(seed)
G <- matrix(0,p,p)
if (type=="star") {
G[1,] <- c(0, edgemin+runif(p-1)*(1-edgemin))
} else if (type=="bistar") {
p1 <- round((p-1)/2)
p2 <- p - p1 - 1
G[1,] <- c(0, edgemin+runif(p1)*(1-edgemin), rep(0, p2))
G[2,] <- c(rep(0, p1+1), runif(p2))
} else if (type=="full") {
G <- matrix(edgemin+runif(p*p)*(1-edgemin), p, p)
G[lower.tri(G, diag=TRUE)] <- 0
} else if (type=="path") {
for (i in 2:p) {
G[i-1,i] <- edgemin+runif(1)*(1-edgemin)
}
} else if (type=="quadristar") {
p1 <- round((p-4)/3)
G[1,2:4] <- edgemin+runif(3)*(1-edgemin)
G[2,] <- edgemin+runif(p)*(1-edgemin)
G[3,] <- edgemin+runif(p)*(1-edgemin)
G[4,] <- edgemin+runif(p)*(1-edgemin)
G[2,c(1:4, (p1+5):p)] <- 0
G[3,c(1:(p1+4), (2*p1+5):p)] <- 0
G[4, 1:(2*p1+4)] <- 0
} else if (type=="sixstar") {
p1 <- round((p-6)/5)
G[1,2:6] <- edgemin+runif(5)*(1-edgemin)
G[2,] <- edgemin+runif(p)*(1-edgemin)
G[3,] <- edgemin+runif(p)*(1-edgemin)
G[4,] <- edgemin+runif(p)*(1-edgemin)
G[5,] <- edgemin+runif(p)*(1-edgemin)
G[6,] <- edgemin+runif(p)*(1-edgemin)
G[2,c(1:6, (p1+7):p)] <- 0
G[3,c(1:(p1+6), (2*p1+7):p)] <- 0
G[4,c(1:(2*p1+6), (3*p1+7):p)] <- 0
G[5,c(1:(3*p1+6), (4*p1+7):p)] <- 0
G[6, 1:(4*p1+6)] <- 0
}
#eps <- matrix(rnorm(n*p,mean=0,sd=1),n,p)
eps <- mvrnorm(n = n,mu = rep(0,p), Sigma=Cov, empirical = TRUE)
X <- eps %*% ginv(diag(1,p,p)-G)
X = X - matrix(1,n,1) %*% colMeans(X)
X = X / sqrt( (1/n) * matrix(1,n,1) %*% colSums(X^2))
return(list(X=X,G=G))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.