Nothing
# Part of the mi package for multiple imputation of missing data
# Copyright (C) 2008, 2009, 2010, 2011 Andrew Gelman
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
## Function to draw from the relevant symmetric generalized beta distribution
.rgbeta <-
function(num, shape) {
if(shape > 0) -1 + 2 * rbeta(num, shape, shape)
else if(shape == 0) -1 + 2 * rbinom(num, 1, 0.5)
else stop("shape must be non-negative")
}
## Function to draw a Cholesky factor of a random correlation matrix
## as a function of canonical partial correlations (CPCs)
.rcorvine <-
function(n_full, n_partial, n_cat, eta,
restrictions, strong, experiment, treatment_cor, last_CPC) {
nom <- !is.null(n_cat)
n <- n_full + 2 * n_partial
if(nom) {
n <- n + sum(n_cat) - length(n_cat)
nc <- 2 * n_partial + sum(n_cat) - length(n_cat)
holder <- matrix(NA_real_, nrow = nc, ncol = nc)
}
else holder <- matrix(NA_real_, nrow = 2 * n_partial, ncol = 2 * n_partial)
count <- 1
if(eta <= 0) stop("'eta' must be positive")
alpha <- eta + (n - 2) / 2 # if eta == 1, then tcrossprod(L) is uniform over correlation matrices
L <- matrix(NA_real_, n, n)
L[upper.tri(L)] <- 0
L[1,1] <- 1
L <- .rcorvine_helper(L, holder, n_full, n_partial, n_cat, alpha, restrictions,
strong, experiment, treatment_cor, nom, last_CPC)
if(restrictions == "MARish") L <- .MAR_opt(L, n_full, n_partial)
return(L)
}
.rcorvine_helper <-
function(L, holder, n_full, n_partial, n_cat, alpha, restrictions,
strong, experiment, treatment_cor, nom, last_CPC) {
n <- nrow(L)
mark <- is.na(L[,1])
sum_mark <- sum(mark)
CPCs <- .rgbeta(sum(mark), alpha)
count <- 1
if(experiment) {
len <- length(treatment_cor)
if(len == 1 && treatment_cor == 0) treatment_cor <- rep(0, sum_mark)
else if(len != sum_mark) {
stop(paste("length of 'treatment_cor' must be", sum_mark))
}
treatment_mark <- is.na(treatment_cor)
treatment_cor[treatment_mark] <- CPCs[treatment_mark]
CPCs <- treatment_cor # treatment variable is first
}
if(n_full == 0 && restrictions != "none") {
CPCs[sum_mark:(sum_mark - n_partial)] <- 0
holder[1,mark] <- CPCs
count <- count + 1
}
else if(n_full == 0) {
holder[1,mark] <- CPCs
count <- count + 1
}
L[mark,1] <- CPCs
W <- log(1 - CPCs^2)
## NOTE: order of variables is:
## all fully observed (with the treatment first if applicable)
## all partially observed but not nominal variables (if any)
## the components of the nominal variable(s) (if any)
## all missingness indicators
# fully observed variables have arbitrary CPCs
start <- 2
end <- n_full
if(n_full >= 2) for(i in start:end) {
L[i,i] <- exp(0.5 * W[i-1])
gap <- which(is.na(L[,i]))
gap1 <- gap - 1
alpha <- alpha - 0.5
CPCs <- .rgbeta(length(gap), alpha)
if(restrictions == "MCAR") CPCs[length(gap):(length(gap) - n_partial + 1)] <- 0
L[gap,i] <- CPCs * exp(0.5 * W[gap1])
W[gap1] <- W[gap1] + log(1 - CPCs^2)
}
# partially observed variables have arbitrary CPCs among themselves
# but are conditionally uncorrelated with all missingness indicators under MAR
# note: we condition on all fully observed variables and all previous partially observed variables
# this triangle scheme implies that the errors when predicting the partially observed variables are uncorrelated
if(n_full >= 2) start <- end + 1
end <- start + n_partial - 1
if(nom) end <- end - length(n_cat)
if(start <= end) for(i in start:end) {
L[i,i] <- exp(0.5 * W[i-1])
gap <- which(is.na(L[,i]))
gap1 <- gap - 1
alpha <- alpha - 0.5
CPCs <- .rgbeta(length(gap), alpha)
if(restrictions %in% c("triangular", "stratified")) {
CPCs[length(gap):(length(gap) - n_partial + 1)] <- 0
if(i == end && !is.na(last_CPC)) CPCs[length(CPCs)] <- last_CPC
}
else if(restrictions == "MCAR") CPCs[] <- 0
L[gap,i] <- CPCs * exp(0.5 * W[gap1])
W[gap1] <- W[gap1] + log(1 - CPCs^2)
holder[count,(count+1):ncol(holder)] <- CPCs
count <- count + 1
}
# if there are nominal partially observed variables, make the category residuals uncorrelated (MNL assumption)
if(nom) {
#if(n_full >= 2)
start <- end + 1
end <- start + sum(n_cat) - 1
for(i in start:end) {
L[i,i] <- exp(0.5 * W[i-1])
gap <- which(is.na(L[,i]))
gap1 <- gap - 1
alpha <- alpha - 0.5
if(restrictions != "none") CPCs <- rep(0, length(gap))
else {
CPCs <- .rgbeta(length(gap), alpha)
CPCs[-(length(gap):(length(gap) - n_partial + 1))] <- 0
}
L[gap,i] <- CPCs * exp(0.5 * W[gap1])
W[gap1] <- W[gap1] + log(1 - CPCs^2)
holder[count,(count+1):ncol(holder)] <- CPCs
count <- count + 1
}
}
# missingness indicators can be constructed to be instruments if MAR holds whose strength can be manipulated
if(n_partial > 1) {
start <- end + 1
end <- n - 1
count <- if(n_full > 0) 1 else 2
if(start <= end) for(i in start:end) {
L[i,i] <- exp(0.5 * W[i-1])
gap <- which(is.na(L[,i]))
gap1 <- gap - 1
alpha <- alpha - 0.5
if(restrictions %in% c("none", "MARish")) CPCs <- .rgbeta(length(gap), alpha)
else if(restrictions %in% c("stratified", "MCAR")) CPCs <- rep(0, length(gap))
else if(strong == 2) CPCs <- holder[count,(count+1):(count + length(gap))]
else if(strong == 1) CPCs <- .rgbeta(length(gap), alpha)
else if(strong == 0) CPCs <- rep(0, length(gap))
L[gap,i] <- CPCs * exp(0.5 * W[gap1])
W[gap1] <- W[gap1] + log(1 - CPCs^2)
}
}
L[n,n] <- exp(0.5 * W[n-1])
return(L)
}
## Function to draw a Cholesky factor of a random correlation matrix
## as a function of canonical partial correlations (CPCs)
.rcorvine_partial <-
function(Sigma, n_partial, n_cat, eta, restrictions, strong, experiment, treatment_cor) {
n <- nrow(Sigma)
n_full <- n - n_partial
ldlt <- LDLt(Sigma)
U <- t(ldlt$L)
holder <- matrix(NA_real_, n, n)
holder[1,-1] <- U[1,-1]
W <- c(NA_real_, 1 - holder[1,-1]^2)
for(i in 2:(n-1)) {
denominator <- W[i]
gap <- (i+1):n
temp <- U[i,gap] / sqrt(W[gap] / denominator)
invalid <- is.na(temp)
temp[invalid] <- sign(U[i,gap][invalid])
invalid <- abs(temp) > 1
temp[invalid] <- sign(temp[invalid])
holder[i,gap] <- temp
W[gap] <- W[gap] * (1 - holder[i,gap]^2)
}
nom <- !is.null(n_cat)
n <- n + n_partial
if(nom) {
n <- n + sum(n_cat) - length(n_cat)
}
L <- t(U) * sqrt(diag(ldlt$D))
if(eta <= 0) stop("'eta' must be positive")
diff <- n - nrow(L)
alpha <- eta + diff / 2
L <- cbind(L, matrix(0, nrow(L), diff))
L <- rbind(L, matrix(NA_real_, diff, n))
L[upper.tri(L)] <- 0
holder <- matrix(NA_real_, nrow = diff, ncol = ncol(L))
L <- .rcorvine_helper(L, holder, n_full, n_partial, n_cat, alpha, restrictions, strong, experiment, treatment_cor, nom)
if(restrictions == "MARish") L <- .MAR_opt(L, n_full, n_partial)
return(L)
}
.MAR_opt <-
function(L, n_full, n_partial) {
n_p2 <- n_partial^2
lowers <- lower.tri(L)
cell_mark <- tail(which(lowers), n_p2)
lowers[] <- FALSE
lowers[cell_mark] <- TRUE
row_mark <- which(apply(lowers, 1, any))
diag(L)[row_mark] <- NA_real_
partials <- (n_full + 1):(n_full + n_partial)
missingness <- nrow(L):(nrow(L) - n_partial + 1)
block_mark <- which( row(L) %in% partials & col(L) %in% missingness )
foo <- function(theta) {
L[cell_mark] <- theta
diags <- 1 - rowSums(L[row_mark,,drop=FALSE]^2, na.rm = TRUE)
if(any(diags < 0)) return(NA_real_)
diag(L)[row_mark] <- sqrt(diags)
Sigma_inv <- chol2inv(t(L))
return(c(crossprod(Sigma_inv[block_mark])))
}
opt <- optim(L[cell_mark], foo, method = "BFGS")
L[cell_mark] <- opt$par
diag(L)[row_mark] <- sqrt(1 - rowSums(L[row_mark,,drop=FALSE]^2, na.rm = TRUE))
return(L)
}
.NMARness <-
function(L) {
xs <- grep("^x_", rownames(L), value = TRUE)
ys <- grep("^y_", rownames(L), value = TRUE)
us <- grep("^u_", rownames(L), value = TRUE)
sapply(ys, FUN = function(y) {
i <- y
sapply(us, FUN = function(u) {
j <- u
cons <- c(xs, setdiff(us, u))
mark <- c(i,j,cons)
D_ijcons <- det(tcrossprod(L[mark,,drop = FALSE]))
mark <- cons
D_cons <- det(tcrossprod(L[mark,,drop = FALSE]))
mark <- c(i,cons)
D_icons <- det(tcrossprod(L[mark,,drop = FALSE]))
mark <- c(j,cons)
D_jcons <- det(tcrossprod(L[mark,,drop = FALSE]))
return(1 - D_ijcons * D_cons / (D_icons * D_jcons))
})
})
}
## Function to construct a random data.frame with tunable missingness
rdata.frame <-
function(N = 1000, restrictions = c("none", "MARish", "triangular", "stratified", "MCAR"),
last_CPC = NA_real_, strong = FALSE, pr_miss = .25,
Sigma = NULL, alpha = NULL,
experiment = FALSE, treatment_cor = c(rep(0, n_full - 1), rep(NA, 2 * n_partial)),
n_full = 1, n_partial = 1, n_cat = NULL,
eta = 1, df = Inf, types = "continuous", estimate_CPCs = TRUE) {
if(length(N) != 1) stop("length of 'N' must be 1")
if(N <= 0) stop("'N' must be positive")
restrictions <- match.arg(restrictions)
if(strong && restrictions == "none") warning("instruments are not valid unless the MAR assumption is enforced")
if(n_full < 0) stop("'n_full' must be >= 0")
if(n_partial < 0) stop("'n_partial must be >= 0")
n <- n_partial + n_full
if(n == 0) stop("at least one of 'n_full' or 'n_partial' must be positive")
if(length(pr_miss) == 1) pr_miss <- rep(pr_miss, n_partial)
if(any(pr_miss <= 0)) stop("all elements of 'pr_miss' must be > 0")
if(any(pr_miss >= 1)) stop("all elements of 'pr_miss' must be < 1")
if(length(df) != 1) stop("'df' must be of length 1")
if(df <= 0) stop("'df' must be a positive")
if(length(types) == 1) types <- rep(types, n)
types <- match.arg(types, c("continuous", "count", "binary", "treatment", "ordinal", "nominal", "proportion", "positive"),
several.ok = TRUE)
if(any(types[1:n_full] == "nominal")) {
warning("fully observed nominal variables not supported, changing them to ordinal without loss of generality")
types <- ifelse(types == "nominal" & 1:length(types) <= n_full, "ordinal", types)
}
# else if(!is.null(n_cat)) types[n:(n - length(n_cat) + 1)] <- "nominal"
if(all( c("ordinal", "nominal") %in% types[-(1:n_full)] )) {
stop("including both ordinal and nominal partially observed variables is not supported yet")
}
if(any(types == "nominal")) {
has_nominal <- TRUE
if(is.null(n_cat)) {
if(types[n] != "nominal") {
warning("assuming the last partially observed variable is nominal with 3 categories")
types[n] <- "nominal"
}
n_cat <- 3
}
}
else has_nominal <- FALSE
if(has_nominal) {
if(any(n_cat < 3)) stop("nominal variables must have more than 2 categories")
types <- c(types[types != "nominal"], types[types == "nominal"])
}
if(experiment) {
if(types[1] != "treatment") stop("the first variable must be the treatment variable")
if(any(types[-1] == "treatment")) stop("only one treatment variable is permitted")
}
if(is.null(Sigma)) L <- .rcorvine(n_full, n_partial,
if(has_nominal) n_cat else NULL,
eta, restrictions, strong, experiment, treatment_cor, last_CPC)
else {
if(!isSymmetric(Sigma)) stop("'Sigma' must be symmetric")
if(ncol(Sigma) != (n_full + 2 * n_partial)) stop("'Sigma' must be of order 'n_full + 2 * n_partial'")
if(any(types == "nominal")) stop("nominal variables not supported when 'Sigma' is given")
if(experiment) stop("treatment variables not supported when 'Sigma' is given")
L <- chol(Sigma)
}
if(is.null(alpha)) {
Z <- matrix(rnorm(N * nrow(L)), nrow = nrow(L))
X <- as.data.frame(t(Z) %*% t(L))
}
else {
if(length(alpha) == 1 && is.na(alpha)) alpha <- rt(ncol(L), df)
else if(length(alpha) != ncol(L)) stop(paste("length of alpha must be", ncol(L)))
Sigma <- tcrossprod(L)
result <- find_Omega(Sigma, alpha, control = list(maxit = 1000))
X <- as.data.frame(sn::rmsn(N, Omega = result$Omega, alpha = alpha))
}
if(df < Inf) X <- X / sqrt(rchisq(N, df) / df)
if(!has_nominal) colnames(X) <- c(if(n_full) paste("x", 1:n_full, sep = "_"),
if(n_partial) paste("y", 1:n_partial, sep = "_"),
if(n_partial) paste("u", 1:n_partial, sep = "_") )
else {
if(length(n_cat) > 23) stop("number of nominal variables must be <= 23")
cn <- as.character(NULL)
for(i in seq_along(n_cat)) cn <- c(cn, paste(letters[i], 1:n_cat[i], sep = "_"))
colnames(X) <- c(if(n_full) paste("x", 1:n_full, sep = "_"), if(n_partial > length(n_cat))
paste("y", 1:(n_partial - length(n_cat)), sep = "_") else NULL,
cn,
paste("u", 1:n_partial, sep = "_") )
}
if(experiment) {
row_mark <- X[,1] == 1
col_mark <- c(FALSE, is.na(treatment_cor))
col_mark[grepl("^u_", colnames(X))] <- FALSE
if(any(col_mark)) X[row_mark,col_mark] <- X[row_mark,col_mark] + 1 # ATT
}
X_obs <- X
correlations <- rep(NA_real_, if(!has_nominal) n_partial else n_partial - length(n_cat) + sum(n_cat))
end <- n_partial - length(n_cat) * has_nominal
if(end > 0) for(i in 1:end) {
y_var <- paste("y", i, sep = "_")
u_var <- paste("u", i, sep = "_")
X_obs[X[,u_var] < quantile(X[,u_var], probs = pr_miss[i]), y_var] <- NA_real_
X_obs[[u_var]] <- NULL
if(!estimate_CPCs) next
f_miss <- colnames(X)
if(n_full > 0) f_miss <- f_miss[1:(n_full + i - 1)]
else f_miss <- "1"
f_miss <- paste(f_miss, collapse = " + ")
f_miss <- as.formula(paste(u_var, "~", f_miss))
ols_u <- lm(f_miss, data = X)
f_true <- colnames(X)
if(n_full > 0) f_true <- f_true[1:(n_full + i - 1)]
else f_true <- "1"
f_true <- paste(f_true, collapse = " + ")
f_true <- as.formula(paste(y_var, "~", f_true))
ols_y <- lm(f_true, data = X)
correlations[i] <- cor(residuals(ols_u), residuals(ols_y)) # this differs only randomly from 0 under MAR due to finite N
}
letter_mark <- 1
if(has_nominal) for(i in (end + 1):n_partial) {
y_var <- paste("y", i, sep = "_")
u_var <- paste("u", i, sep = "_")
mark <- grepl(paste("^", letters[letter_mark], "_", sep = ""), colnames(X))
lev <- as.character(NULL)
for(j in 1:ceiling(n_cat[letter_mark] / 26)) lev <- c(lev, rep(letters, each = j))
lev <- lev[1:n_cat[letter_mark]]
X_obs[[y_var]] <- X[[y_var]] <- factor(max.col(X[,mark]), labels = lev)
X_obs[X[,u_var] < quantile(X[,u_var], probs = pr_miss[i]), y_var] <- NA
if(!estimate_CPCs) {
letter_mark <- letter_mark + 1
next
}
f_miss <- colnames(X)
if(letter_mark == 1) f_miss <- f_miss[1:(n_full + n_partial - length(n_cat))]
else f_miss <- f_miss[1:(n_full + n_partial - length(n_cat) + sum(n_cat[1:(letter_mark - 1)]))]
f_miss <- paste(f_miss, collapse = " + ")
f_miss <- as.formula(paste(u_var, "~", f_miss))
ols_u <- lm(f_miss, data = X)
for(j in 1:n_cat[letter_mark]) {
f_true <- colnames(X)
if(letter_mark == 1) f_true <- f_true[1:(n_full + n_partial - length(n_cat))]
else f_true <- f_true[1:(n_full + n_partial - length(n_cat) + sum(n_cat[1:(letter_mark - 1)]))]
f_true <- paste(f_true, collapse = " + ")
n_var <- paste(letters[letter_mark], j, sep = "_")
f_true <- as.formula(paste(n_var, "~", f_true))
ols_n <- lm(f_true, data = X)
correlations[which(is.na(correlations))[1]] <- cor(residuals(ols_u), residuals(ols_n)) # this differs only randomly from 0 under MAR
}
letter_mark <- letter_mark + 1
}
if(!has_nominal) names(correlations) <- if(n_partial) paste("e", 1:n_partial, sep = "_") else NULL
else {
cn <- if(n_partial > length(n_cat)) paste("e", 1:(n_partial - length(n_cat)), sep = "_") else as.character(NULL)
for(i in seq_along(n_cat)) cn <- c(cn, paste("e:", letters[i], "_", 1:n_cat[i], sep = ""))
names(correlations) <- cn
}
X_obs <- X_obs[,grepl("^[xy]_", colnames(X_obs))]
mark_ord <- 1
for(i in seq_along(types)) {
mark <- is.na(X_obs[,i])
if(types[i] %in% c("binary", "treatment")) {
if(i == 1 && experiment) {
X_obs[,i] <- X[,i] <- as.factor(X[,i] > 0)
colnames(X_obs)[1] <- colnames(X)[1] <- "treatment"
}
else {
X[[toupper(colnames(X)[i])]] <- X[,i]
X_obs[,i] <- X[,i] <- cut(X[,i], breaks = 2, labels = c("FALSE", "TRUE"))
}
}
else if(types[i] == "ordinal") {
X[[toupper(colnames(X)[i])]] <- X[,i]
breaks <- 3
if(length(n_cat) == 1) breaks <- n_cat
else if(length(n_cat) > 1) {
breaks <- n_cat[mark_ord]
mark_ord <- mark_ord + 1
}
qs <- quantile(X[,i], prob = seq(from = 0, to = 1, length.out = breaks + 1))
qs[1] <- -Inf
qs[length(qs)] <- Inf
X_obs[,i] <- X[,i] <- cut(X[,i], breaks = qs, ordered_result = TRUE, labels = LETTERS[1:breaks])
}
else if(types[i] == "count") { # this is not quite consistent with the DGP
X[[toupper(colnames(X)[i])]] <- X[,i]
X_obs[,i] <- X[,i] <- as.integer(qpois(pt(X[,i], df = df), lambda = 5))
}
else if(types[i] == "proportion") { # this is not quite consistent with the DGP
X[[toupper(colnames(X)[i])]] <- X[,i]
X_obs[,i] <- X[,i] <- pt(X[,i], df = df)
}
else if(types[i] == "positive") {
X[[toupper(colnames(X)[i])]] <- X[,i]
X_obs[,i] <- X[,i] <- exp(X[,i])
}
X_obs[mark,i] <- NA
}
ord <- c(colnames(X_obs), grep("^u_", colnames(X), value = TRUE))
extras <- colnames(X)
extras <- extras[!(extras %in% ord)]
ord <- c(ord, extras)
X <- X[,ord]
cn <- colnames(X)
cn <- cn[sapply(1:ncol(X), FUN = function(i) {
!is.factor(X[,i]) && !(toupper(cn[i]) %in% cn[-i])
})]
resort <- function(s) {
ord <- order(as.integer(gsub("^[a-z,A-Z]_", "", s)))
return(s[ord])
}
cn <- c(if(experiment) "treatment_propensity",
resort(grep("^x", cn, ignore.case = TRUE, value = TRUE)),
resort(grep("^y", cn, ignore.case = TRUE, value = TRUE)),
grep("^[a-t]_", cn, ignore.case = FALSE, value = TRUE),
grep("^u", cn, ignore.case = FALSE, value = TRUE))
rownames(L) <- colnames(L) <- cn
out <- list(true = X, obs = X_obs, empirical_CPCs = correlations, L = L)
if(!is.null(alpha)) out <- c(out, list(alpha = alpha, skewness = result$sn_skewness, kurtosis = result$sn_kurtosis))
return(out)
}
## this function makes a positive definite correlation matrix given choose(n,2) unbounded parameters
make_O.cor <-
function(theta) {
n <- (1 + sqrt(1 + 8 * length(theta))) / 2
CPCs <- exp(2 * theta)
CPCs <- (CPCs - 1) / (CPCs + 1)
L <- matrix(0, n, n)
L[1,1] <- 1
start <- 1
end <- n - 1
L[-1,1] <- partials <- CPCs[start:end]
W <- log(1 - partials^2)
for(i in 2:(n-1)) {
start <- end + 1
end <- start + n - i - 1
gap <- (i+1):n
gap1 <- i:(n-1)
partials <- CPCs[start:end]
L[i,i] <- exp(0.5 * W[i-1])
L[gap,i] <- partials * exp(0.5 * W[gap1])
W[gap1] <- W[gap1] + log(1 - partials^2)
}
L[n,n] <- exp(0.5 * W[n-1])
return(tcrossprod(L))
}
## this objective function is the Frobenius norm of the difference between Sigma and Sigma_proposed
fmin <-
function(theta, Sigma, alpha, final = FALSE, ...) {
n <- nrow(Sigma)
omega <- exp(theta[1:n]) # standard deviations of the implicit Omega matrix
O.cor <- make_O.cor(theta[-(1:n)])
alphaTO.cor <- alpha %*% O.cor
Sigma_proposed <- ( O.cor - 2 / (pi * c(1 + alphaTO.cor %*% alpha)) * crossprod(alphaTO.cor) ) * tcrossprod(omega)
if(final) return(Sigma_proposed)
return(crossprod( c(Sigma - Sigma_proposed) )[1])
}
## this function makes a 3-factor Cholesky factorization of a PSD A matrix
LDLt <-
function(A) {
n <- nrow(A)
L <- diag(n)
D <- matrix(0, n, n)
for(j in 1:n) {
s <- 0
if(j > 1) for(k in 1:(j-1)) s <- s + L[j,k]^2 * D[k,k]
D[j,j] <- A[j,j] - s
if(D[j,j] < 1e-15) {
D[j,j] <- 0
break
}
if(j < n) for(i in (j+1):n) {
s <- 0
if(j > 1) for(k in 1:(j-1)) s <- s + L[i,k] * L[j,k] * D[k,k]
L[i,j] <- (A[i,j] - s) / D[j,j]
}
}
return(list(L = L, D = D))
}
## this function makes plausible starting values (basically treating alpha is if it were a zero vector)
make_start <-
function(Sigma) {
log_omega <- log(sqrt(diag(Sigma)))
Sigma <- cov2cor(Sigma)
n <- nrow(Sigma)
U <- t(LDLt(Sigma)$L)
holder <- matrix(NA_real_, n, n)
holder[1,-1] <- U[1,-1]
W <- c(NA_real_, 1 - holder[1,-1]^2)
for(i in 2:(n-1)) {
denominator <- W[i]
gap <- (i+1):n
temp <- U[i,gap] / sqrt(W[gap] / denominator)
invalid <- is.na(temp)
temp[invalid] <- sign(U[i,gap][invalid])
invalid <- abs(temp) > 1
temp[invalid] <- sign(temp[invalid])
holder[i,gap] <- temp
W[gap] <- W[gap] * (1 - holder[i,gap]^2)
}
holder <- t(holder)
CPCs <- holder[lower.tri(holder)]
return(c(log_omega, atanh(CPCs)))
}
## this function finds Omega via optim() and returns it as part of a list with
find_Omega <-
function(Sigma, alpha, method = "BFGS", start = make_start(Sigma), ...) {
stopifnot(isSymmetric(Sigma)) # Sigma is the intended covariance matrix of the multivariate skew-normal variable
stopifnot(all(eigen(Sigma, TRUE, TRUE)$values > 0))
n <- nrow(Sigma)
alpha <- c(alpha)
stopifnot(length(alpha) == n) # alpha is a shape parameter for the multivariate skew-normal variable
opt <- optim(start, fmin, method = method, Sigma = Sigma, alpha = alpha, ...)
if(opt$convergence != 0) {
gradients <- opt$counts["gradient"]
warning(paste("Convergence problem. Pass something like 'control = list(maxit = ",
5 * gradients, ")' if alpha is far from a zero vector", sep = ""))
}
theta <- opt$par
omega <- exp(theta[1:n])
O.cor <- make_O.cor(theta[-(1:n)])
opt$Omega <- O.cor * tcrossprod(omega)
alphaTO.cor <- c(alpha %*% O.cor)
delta <- c( (O.cor %*% alpha) / sqrt(1 + alphaTO.cor %*% alpha)[1] )
mu_z <- sqrt(2/pi) * delta
num <- c( mu_z %*% chol2inv(chol(O.cor)) %*% mu_z )
opt$delta <- delta
opt$sn_skewness <- ( (4 - pi) / 4 )^2 * ( num / (1 - num) )^3
opt$sn_kurtosis <- 2 * (pi - 3) * ( num / (1 - num) )^2
return(opt)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.