#' Randomly pick elements
#' Select each element of \code{vec} with probability \code{prob} and
#' produce a vector with the selected elements.
#'
#' @param vec Numeric vector. Vector containing the elements to select.
#' @param prob Numeric --- between 0 and 1. The probability of selecting
#' each element of \code{vec}.
#'
#' @return Numeric vector. A vector with the selected elements
#'
#' @noRd
pick_elements <- function(vec, prob){
r <- stats::rbinom(n = length(vec), size = 1, prob = prob)
vec[r == 1]
}
#' Inverse mirror uniform
#'
#' Produces the quantile of a mirrored uniform distribution
#' associated to the probability \code{prob}. The mirrored uniform
#' distribution has support [-max, -min] \eqn{\cup} [min, max].
#'
#' @param prob Numeric --- between 0 and 1. The probability associated
#' to the desired quantile.
#' @param min Numeric --- above 0. The lower bound of the positive half of
#' the support.
#' @param max Numeric --- above 0. The upper bound of the positive half of
#' the support. Note that \code{max} must be strictly greater than
#' \code{min}.
#'
#' @return Numeric --- between \code{-max} and \code{max}. The quantile
#' associated to the probability \code{prob}.
#' @noRd
inverse_mirror_uniform <- function(prob, min, max){
# check if max < min
if (!(max > min)){
stop(paste("The maximum value, max, must be strictly greater",
"than the minimum value, min."))
}
# check if min > 0 and max > 0
if (!(min > 0 & max > 0)){
stop(paste("Both the minimum and the maximum values, min and max,",
"must be positive."))
}
if (prob == 0){
return(-max)
} else if (prob > 0 & prob < 1 / 2) {
return(2 * prob * (max - min) - max)
} else if (prob >= 1 / 2 & prob < 1){
return( (2 * prob - 1) * (max - min) + min)
} else if (prob == 1){
return(max)
} else {
stop("The probability, prob, must be between 0 and 1.")
}
}
#' Sample from uniform family
#'
#' Sample n elements from a uniform distribution with lower and upper
#' bound equal to \code{min} and \code{max}, respectively.
#' If \code{mirror == TRUE}, the elements are sampled from a mirrored
#' uniform distribution, see \code{\link{inverse_mirror_uniform}}.
#'
#' @param n Integer. The number of elements to sample.
#' @param min Numeric. The lower bound of the distribution support.
#' If \code{mirror == TRUE}, this represents the lower bound of
#' the positive half of the support.
#' @param max Numeric. The upper bound of the distribution support.
#' If \code{mirror == TRUE}, this represents the upper bound of
#' the positive half of the support.
#' Note that \code{max} must be strictly greater than \code{min}.
#' @param mirror Logical. Should the elements be sampled from a
#' mirrored uniform distribution? If \code{mirror == TRUE},
#' \code{min} and \code{max} must be strictly greater than 0.
#'
#' @return Numeric vector. A vector with the sampled elements.
#' @noRd
sample_uniform <- function(n, min, max, mirror = FALSE){
# check if max < min
if (!(max > min)){
stop(paste("The maximum value, max, must be strictly greater",
"than the minimum value, min."))
}
if (mirror == TRUE){
purrr::map_dbl(stats::runif(n),
inverse_mirror_uniform, min = min, max = max)
} else {
stats::runif(n = n, min = min, max = max)
}
}
#' Simulate random DAG
#'
#' Simulates a directed acyclic graph (DAG) and returns its adjacency matrix.
#' Copyright (c) 2013 Jonas Peters \email{peters@@math.ku.dk}.
#' All rights reserved.
#'
#' @param p Integer --- greater than 0. Number of nodes.
#' @param prob_connect Numeric --- between 0 and 1. The probability that an edge
#' \eqn{i {\rightarrow} j} is added to the DAG.
#' @param caus_order Numeric vector. The causal order of the DAG.
#' If the argument is not provided it is generated randomly.
#'
#' @return Square binary matrix. A matrix representing the random DAG.
#' @keywords internal
random_dag <- function(p, prob_connect,
caus_order = sample(p, p, replace = FALSE)){
# check inputs
if (p < 1){
stop("The number of nodes p must be greater than 0.")
}
if (!missing(caus_order)){
if (length(caus_order) != p){
stop(paste("The number of nodes p does not match with the length of",
"the causal order."))
}
}
# get random dag
dag <- matrix(0, nrow = p, ncol = p)
if (p > 1){
for (i in 1:(p - 1)){
elms <- pick_elements(caus_order[(i + 1):p], prob_connect)
dag[caus_order[i], elms] <- 1
}
}
dag
}
#' Sample random coefficients
#'
#' Sample random coefficients from uniform distribution
#' for the given DAG \code{dag}.
#' Copyright (c) 2013 Jonas Peters \email{peters@@math.ku.dk}.
#' All rights reserved.
#'
#' @inheritParams compute_caus_order
#' @param lb Numeric. The lower bound of the coefficient that can be
#' sampled.
#' @param ub Numeric. The lower bound of the coefficient that can be
#' sampled. It must be stricly greater than \code{lb}.
#' @param two_intervals Logical. Should the coefficient be sampled
#' from two symmetric uniform distributions?
#' If \code{two_intervals == TRUE}, \code{lb} and \code{ub} must be
#' positive.
#' @return Square numeric matrix. The adjacency matrix of the underlying
#' DAG \code{dag}.
#' @noRd
random_coeff <- function(dag, lb = 0.1, ub = 0.9, two_intervals = TRUE){
# check if dag is a (non-weighted) adjacency matrix
if (!all(dag %in% c(0, 1))){
stop("The entries of dag must be either 0 or 1.")
}
adj_mat <- matrix(0, nrow = NROW(dag), ncol = NCOL(dag))
num_coeff <- sum(dag)
adj_mat[dag == 1] <- sample_uniform(num_coeff, lb, ub, two_intervals)
adj_mat
}
#' Add confounders to a DAG
#'
#' Add confounders (i.e., hidden variables) at random to the DAG \code{dag}.
#' Each confounder is a source node and acts only on two observed variables.
#' The probability that a pair of observed variables is confounded is set by
#' \code{prob_confound}.
#'
#' @inheritParams compute_caus_order
#' @param prob_confound Numeric --- between 0 and 1. The probability that
#' a pair of observed nodes is confounded.
#' @return List. The list is made of:
#' \itemize{
#' \item \code{dag_confounders} --- Square binary matrix. Represents the full
#' DAG made of observed and hidden variables.
#' \item \code{pos_confounders} --- Integer vector. Represents the position
#' of confounders (rows and columns) in \code{dag_confounders}.
#' }
#'
#' @noRd
add_random_confounders <- function(dag, prob_confound){
# check if dag is a (non-weighted) adjacency matrix
if (!all(dag %in% c(0, 1))){
stop("The entries of dag must be either 0 or 1.")
}
# Consider all possible pairs of nodes in the DAG
p <- NROW(dag)
node_pairs <- which(upper.tri(dag), arr.ind = TRUE)
max_n_confounders <- NROW(node_pairs)
# Choose the confounders randomly
n_confounders <- stats::rbinom(n = 1, size = max_n_confounders,
prob = prob_confound)
confounders <- sample(x = 1:max_n_confounders, size = n_confounders,
replace = FALSE)
# Exit function if there are no confounders
if (n_confounders == 0){
return(list (dag_confounders = dag,
pos_confounders = integer(0)))
}
# Add confounders to the DAG
dag_confounders <- dag
for (i in 1:n_confounders){
confounded.pair <- node_pairs[confounders[i], ]
col_to_append <- rep(0, NROW(dag_confounders))
row_to_append <- rep(0, NROW(dag_confounders) + 1)
row_to_append[confounded.pair] <- 1
dag_confounders <- cbind(dag_confounders, col_to_append)
dag_confounders <- rbind(dag_confounders, row_to_append)
}
dag_confounders <- unname(dag_confounders)
pos_confounders <- (p + 1):NROW(dag_confounders)
# Return data
return(list (dag_confounders = dag_confounders,
pos_confounders = pos_confounders))
}
#' Simulate noise observations
#'
#' Sample \code{n} observations for \code{p} independent noise variables
#' from a certain distribution \code{distr}.
#' @param n Positive integer. The number of observations, must be larger
#' than 1.
#' @param p Positive integer. The number of variables, must be larger
#' than 1.
#' @param distr Character. The distribution of the noise. It is one of:
#' \itemize{
#' \item \code{student_t}, in this case the user has to specify the
#' \code{tail_index}, i.e., the degrees of freedom,
#' \item \code{gaussian},
#' \item \code{log_normal}.
#' }
#' @param tail_index Positive numeric. The tail index, i.e., degrees
#' of freedom, of the noise.
#' @return Numeric matrix. Dataset matrix with \code{n}
#' rows (observations) and \code{p} columns (variables).
#' @keywords internal
simulate_noise <- function(n, p, distr = c("student_t", "gaussian",
"log_normal"), tail_index){
if (p <= 1 | n <= 1){
stop("n and p must be larger than 1!")
}
distr <- match.arg(distr)
switch(distr,
"student_t" = {
noise <- array(stats::rt(n * p, df = tail_index), dim = c(n, p))
},
"gaussian" = {
noise <- array(stats::rnorm(n * p), dim = c(n, p))
},
"log_normal" = {
noise <- array(stats::rlnorm(n * p), dim = c(n, p))
})
return(noise)
}
#' Generate data from non-linear Structural Equation Model
#'
#' Generate data from non-linear Structural Causal Model as described in the
#' paper "Causal discovery in heavy-tailed models".
#'
#' @inheritParams get_all_paths
#' @param noise Numeric matrix. Dataset matrix with \code{n}
#' rows (observations) and \code{p} columns (variables).
#' @return Numeric matrix. Dataset matrix with \code{n}
#' rows (observations) and \code{p} columns (variables).
#' @noRd
nonlinear_scm <- function(adj_mat, noise){
n <- NROW(noise)
p <- NROW(adj_mat)
dag <- (adj_mat != 0) * 1
nodes <- compute_caus_order(dag)
dataset <- matrix(0, nrow = n, ncol = p)
dataset_transf <- matrix(0, nrow = n, ncol = p)
for (i in nodes){
betas <- adj_mat[, i]
eps <- noise[, i]
dataset[, i] <- dataset_transf %*% betas + eps
dataset_transf[, i] <- broken_hockeystick(dataset[, i])
}
return(dataset)
}
#' Apply shifted-hockeystick function
#'
#' Apply shifted-hockeystick function to a numeric vector \code{v}.
#' The shifted-hockeystick function sets to zero all entries of
#' \code{v} greater or equal to the \code{q_low}-quantile and
#' strictly less than the \code{q_high}-quantile.
#'
#' @param v Numeric vector.
#' @param q_low Numeric, between 0 and 1.
#' @param q_high Numeric, between 0 and 1.
#' @return Numeric vector.
#' @noRd
broken_hockeystick <- function(v, q_low = 0.05, q_high = 0.95){
n <- length(v)
r <- rank(v, ties.method = "first")
ind <- which(r > floor(n * q_low) & r <= ceiling(n * q_high))
v[ind] <- 0
return(v)
}
#' Transform to uniform margins
#'
#' Transform the numeric vector \code{x} to uniform margin
#' between 0 and 1.
#'
#' @inheritParams broken_hockeystick
#' @return Numeric vector.
#' @noRd
uniform_margin <- function(v){
n <- length(v)
rank(v) / n
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.