Nothing
library(genscore)
if (!requireNamespace("tmvtnorm", quietly = TRUE))
stop("Please install package \"tmvtnorm\".")
library(tmvtnorm)
readline("For a full tutorial on the package, please refer to the vignette instead.\n(Press enter to continue)")
## Domain definition
p <- 30
# The 30-dimensional real space R^30
domain <- make_domain("R", p=p)
# The non-negative orthant of the 30-dimensional real space, R+^30
domain <- make_domain("R+", p=p)
# x such that sum(x^2) > 10 && sum(x^(1/3)) > 10 with x allowed to be negative
domain <- make_domain("polynomial", p=p, rule="1 && 2",
ineqs=list(list("expression"="sum(x^2)>10", abs=FALSE, nonnegative=FALSE),
list("expression"="sum(x^(1/3))>10", abs=FALSE, nonnegative=FALSE)))
# Alternatively, equivalent domain
domain2 <- make_domain("polynomial", p=p, rule="1 && 2",
ineqs=list(list(uniform=FALSE, power_numers=2, power_denoms=1, const=10,
coeffs=1, larger=1, abs=FALSE, nonnegative=FALSE),
list(uniform=FALSE, power_numers=1, power_denoms=3, const=10,
coeffs=1, larger=1, abs=FALSE, nonnegative=FALSE)))
# ([0, 1] v [2,3]) ^ p
domain <- make_domain("uniform", p=p, lefts=c(0,2), rights=c(1,3))
# x such that {x1 > 1 && log(1.3) < x2 < 1 && x3 > log(1.3) && ... && xp > log(1.3)}
domain <- make_domain("polynomial", p=p, rule="1 && 2 && 3",
ineqs=list(list("expression"="x1>1", abs=FALSE, nonnegative=TRUE),
list("expression"="x2<1", abs=FALSE, nonnegative=TRUE),
list("expression"="exp(x)>1.3", abs=FALSE, nonnegative=FALSE)))
# Alternatively, equivalent domain
domain2 <- make_domain("polynomial", p=p, rule="1 && 2",
ineqs=list(list(uniform=FALSE, power_numers=1, power_denoms=1, const=1,
coeffs=c(1,rep(0,p-1)), larger=1, abs=FALSE, nonnegative=TRUE),
list(uniform=FALSE, power_numers=1, power_denoms=1, const=1,
coeffs=c(0,1,rep(0,p-2)), larger=0, abs=FALSE, nonnegative=TRUE),
list(uniform=TRUE, power_numers=1, power_denoms=0, const=1.3,
larger=1, abs=FALSE, nonnegative=FALSE)))
# x in R_+^p such that {sum(log(x))<2 || (x1^(2/3)-1.3x2^(-3)<1 && exp(x1)+2.3*x2>2)}
domain <- make_domain("polynomial", p=p, rule="1 || (2 && 3)",
ineqs=list(list("expression"="sum(log(x))<2", abs=FALSE, nonnegative=TRUE),
list("expression"="x1^(2/3)-1.3x2^(-3)<1", abs=FALSE, nonnegative=TRUE),
list("expression"="exp(x1)+2.3*x2^2>2", abs=FALSE, nonnegative=TRUE)))
# Alternatively, equivalent domain
domain2 <- make_domain("polynomial", p=p, rule="1 && 2",
ineqs=list(list(uniform=FALSE, power_numers=0, power_denoms=0, const=2,
coeffs=1, larger=0, abs=FALSE, nonnegative=TRUE),
list(uniform=FALSE, power_numers=c(2,-3,rep(1,p-2)), power_denoms=c(3,rep(1,p-1)),
const=1, coeffs=c(1.0,-1.3,rep(0,p-2)), larger=0, abs=FALSE, nonnegative=TRUE),
list(uniform=FALSE, power_numers=c(1,2,rep(1,p-2)), power_denoms=c(0,rep(1,p-1)),
const=2, coeffs=c(1,2.3,rep(0,p-2)), larger=1, abs=FALSE, nonnegative=TRUE)))
# x in R_+^p such that {x in R_+^p: sum_j j * xj <= 1}
domain <- make_domain("polynomial", p=p,
ineqs=list(list("expression"=paste(paste(sapply(1:p,
function(j){paste(j, "x", j, sep="")}), collapse="+"), "<1"),
abs=FALSE, nonnegative=TRUE)))
# Alternatively, equivalent domain
domain2 <- make_domain("polynomial", p=p,
ineqs=list(list(uniform=FALSE, power_numers=1, power_denoms=1, const=1,
coeffs=1:p, larger=0, abs=FALSE, nonnegative=TRUE)))
# The (p-1)-dimensional simplex
domain <- make_domain("simplex", p=p)
# The l-1 ball {sum(|x|) < 1}
domain <- make_domain("polynomial", p=p,
ineqs=list(list("expression"="sum(x)<1", abs=TRUE, nonnegative=FALSE)))
readline("Press enter to continue to the next example.")
## Gaussian Graphical Models on the non-negative orthant R_+^p
n <- 200
p <- 50
tol <- 1e-8
domain <- make_domain("R+", p=p)
# Mean parameter
mu <- rnorm(p) * 0.5
# Inverse covariance matrix
K <- cov_cons(mode="sub", p=p, seed=1, spars=0.2, eig=0.1, subgraphs=5)
# True edge set
true_edges <- which(abs(K) > tol & diag(p) == 0)
# Generate a sample from truncated GGMs
set.seed(1)
x <- rtmvnorm(n, mean = mu, sigma = solve(K), lower = rep(0, p), upper = rep(Inf, p), algorithm = "gibbs", burn.in.samples = 100, thinning = 10)
### Estimate the graph using estimate()
# max number of iterations
maxit <- 1000
# number of lambdas
nlambda <- 100
# diagonal multiplier
dm <- 1 + (1-1/(1+4*exp(1)*max(6*log(p)/n, sqrt(6*log(p)/n)))) # diagonal multiplier
# h function
h_mode <- "min_pow"
h_param1 <- 1
h_param2 <- 3
# estimate assuming mean unknown, using the symmetric profiled estimator
est <- estimate(x, setting="gaussian", domain=domain, centered=FALSE, symmetric="symmetric",
scale="norm", lambda_length=nlambda, lambda_ratio=Inf, mode=h_mode,
param1=h_param1, param2=h_param2, verbose=TRUE, tol=tol, maxit=maxit,
BIC_refit=TRUE, diagonal_multiplier=dm, return_raw=TRUE)
# The estimated graph for the largest automatically chosen lambda
print(est$edgess[[1]]) # empty graph
# The estimated graph for the smallest automatically chosen lambda
print(length(est$edgess[[nlambda]]) == p*(p-1)) # complete graph
# Estimated graph for the 50th lambda
print(length(est$edgess[[nlambda/2]]))
readline("Press enter to continue to the next example.")
### BIC scores
print(est$BICs[1:10,])
# Refitted BIC scores
print(est$BIC_refits[1:10, ])
readline("Press enter to continue to the next example.")
### lambda values
print(est$lambda1s) # lambda_Ks used
print(est$lambda2s) # lambda_etas, all 0 since with lambda_ratio=Inf we are in the profiled setting
readline("Press enter to continue to the next example.")
### eta estimates
print(est$etas[1:5,1:5]) # First 5 components for the first 5 lambdas
print(est$raw_estimates[[nlambda/2]][1:5,1:5]) # First few entries of the estimated K for the 50th lambda
readline("Press enter to continue to the next example.")
### ROC curve
ROC <- t(sapply(est$edgess, tp_fp, true_edges=true_edges, p=p)) # ROC curve
# The area under the curve
AUC(ROC)
# Plot refitted BIC and ROC
par(mfrow=c(1,2), mar=c(4,4,4,4))
colors_ <- rainbow(nlambda)
plot_BIC_ind <- !is.infinite(est$BIC_refits[,3])
plot(est$lambda1s[plot_BIC_ind], est$BIC_refits[plot_BIC_ind,3], col=colors_[plot_BIC_ind], pch=16, cex=1.5, xlab="lambda", ylab="eBIC", main="Refitted BIC (when exists)")
plot(c(),c(), ylim = c(0,1), xlim = c(0,1), cex.lab=1, xlab = "False Positives", ylab = "True Positives", main = "ROC curve")
points(ROC[,2], ROC[,1], type="l")
points(ROC[,2], ROC[,1], pch=16, cex=1.5, col=colors_)
points(c(0,1), c(0,1), type="l", lty=2)
readline("Press enter to continue to the next example.")
### Estimation using estimate() with get_elts()
# Get the h and h' functions
h_hp <- get_h_hp(mode=h_mode, para=h_param1, para2=h_param2) # h(x)=min(x,3), h'(x)=(x<=3)
# Sufficient statistics, named "elts"
elts_P <- get_elts(h_hp, x, setting="gaussian", domain=domain, centered=FALSE,
profiled_if_noncenter=Inf, scale="norm", diagonal_multiplier=dm,
use_C=TRUE, tol=tol) # P stands for Profiled
### Estimate using estimate() with elts_P
est2 <- estimate(x, setting="gaussian", elts=elts_P, domain=domain, centered=FALSE,
symmetric="symmetric", scale="norm", lambda_length=nlambda,
lambda_ratio=Inf, h_hp=h_hp, verbose=TRUE, tol=tol,
maxit=maxit, BIC_refit=TRUE, diagonal_multiplier=dm, return_raw=TRUE)
# Compare to the earlier results; should be all close to 0
print(compare_two_results(est, est2))
readline("Press enter to continue to the next example.")
### Estimation for one lambda value
i <- 10
# Re-estimate at the 10th lambda
lambda1 <- est$lambda1s[i]
sub_res <- get_results(elts_P, symmetric="symmetric", lambda1=lambda1, lambda2=0,
tol=tol, maxit=maxit, previous_res=NULL, is_refit=FALSE)
# Check if the result matches the previous one
print(sum(abs(est$raw_estimates[[i]] - sub_res$K)))
print(sum(abs(est$etas[i,] - sub_res$eta)))
print(all(sub_res$edges == est$edgess[[i]]))
readline("Press enter to continue to the next example.")
## Equivalently, use estimate() with one lambda value
est_sub <- estimate(x, setting="gaussian", centered=FALSE, domain=domain,
symmetric="symmetric", scale="norm", lambda1s = lambda1,
lambda_ratio=Inf, mode=h_mode, param1=h_param1, param2=h_param2,
verbose=TRUE, tol=tol, maxit=maxit, BIC_refit=TRUE, diagonal_multiplier=dm,
return_raw=TRUE)
# Check if the result matches the previous one
print(sum(abs(est_sub$raw_estimates[[1]] - sub_res$K)))
print(sum(abs(est_sub$etas[1,] - sub_res$eta)))
print(all(est_sub$edgess[[1]] == est$edgess[[1]]))
readline("Press enter to continue to the next example.")
## The same function can also be used for fitting an unpenalized model restricted
## to a specific edge set when e.g. calculating the refitted eBIC by
## restricting to the estimated edge set. One can thus manually calculate
## the refitted eBIC in this manner, but we encourage using the function
## wrapper `estimate()` directly, which produces this information automatically.
est_refit <- get_results(elts_P, symmetric="symmetric", lambda1=lambda1, lambda2=0,
tol=tol, maxit=maxit, previous_res=sub_res, is_refit=TRUE)
# Manually calculate the BIC score (eBIC with gamma = 0)
print(2*n*est_refit$crit + (length(est_refit$edges)/2+length(est_refit$eta_support))*log(n))
# Should match the number returned previously by estimate()
print(est$BIC_refit[i,1])
readline("Press enter to continue to the next example.")
### Aggregating multiple ROC curves
## The avgrocs() function aggregates multiple ROC curves using vertical
## averaging by adopting algorithm 3 from Fawcett (2006).
ROCs <- NULL
par(mfrow=c(2,2), mar=c(3,3,3,3))
for (i in 1:3){
set.seed(i)
x <- rtmvnorm(n, mean = mu, sigma = solve(K), lower = rep(0, p),
upper = rep(Inf, p), algorithm = "gibbs", burn.in.samples = 100,
thinning = 10)
est <- estimate(x, setting="gaussian", centered=FALSE, domain=domain,
symmetric="symmetric", scale="norm", lambda_length=nlambda,
lambda_ratio=Inf, mode=h_mode, param1=h_param1, param2=h_param2,
verbose=TRUE, tol=tol, maxit=maxit, BIC_refit=FALSE,
diagonal_multiplier=dm, return_raw=FALSE)
ROC <- t(sapply(est$edgess, function(edges){tp_fp(edges, true_edges, p)}))
ROCs[[i]] <- ROC
plot(c(), c(), ylim=c(0,1), xlim=c(0,1), cex.lab=1, main=paste("ROC, trial ",i,", AUC ",round(AUC(ROC),4),sep=""), xlab="False Positives", ylab="True Positives")
points(ROC[,2], ROC[,1], type="l")
points(c(0,1), c(0,1), type = "l", lty = 2)
}
average_ROC <- avgrocs(ROCs, length(true_edges), p)
plot(c(), c(), ylim=c(0,1), xlim=c(0,1), cex.lab=1, main=paste("Average ROC, AUC",round(AUC(average_ROC),4)), xlab="False Positives", ylab="True Positives")
points(average_ROC[,2], average_ROC[,1], type="l")
points(c(0,1), c(0,1), type = "l", lty = 2)
readline("Press enter to continue to the next example.")
## Exponential Square-Root Graphical Models on the non-negative orthant R_+^p
# Consider the exponential square-root graphical models from Inouye et al (2016),
# also defined as a special case of the a-b model in (1) in Yu et al (2019)
# with a=b=0.5.
x <- gen(n, setting="exp", abs=FALSE, eta=mu, K=K,
domain=domain, finite_infinity=100, seed=2, burn_in=1000, thinning=1000, verbose=TRUE,
remove_outofbound=TRUE)
## Estimation
# As suggested in Yu et al (2019), we use h(x)=min(x,3)^1.5.
# We also try with a finite lambda_ratio as it is known to improve
# the result compared to the profiled estimator, although the best ratio
# is subject to careful tuning.
h_mode <- "min_pow"
h_param1 <- 1.5
h_param2 <- 3
est_exp <- estimate(x, setting="exp", centered=FALSE, domain=domain, symmetric="symmetric",
scale="norm", lambda_length=nlambda, lambda_ratio=2, mode=h_mode,
param1=h_param1, param2=h_param2, verbose=TRUE, tol=tol, maxit=maxit,
BIC_refit=TRUE, diagonal_multiplier=dm, return_raw=FALSE)
ROC_exp <- t(sapply(est_exp$edgess, tp_fp, true_edges=true_edges, p=p)) # ROC curve
AUC(ROC_exp) # The area under the curve
par(mfrow=c(1,2), mar=c(4,4,4,4))
plot_BIC_ind <- !is.infinite(est_exp$BIC_refits[,3])
plot(est_exp$lambda1s[plot_BIC_ind], est_exp$BIC_refits[plot_BIC_ind,3], col=colors_[plot_BIC_ind], pch=16, cex=1.5, xlab="lambda", ylab="eBIC", main="Refitted BIC (when exists)")
plot(c(),c(), ylim = c(0,1), xlim = c(0,1), cex.lab=1, xlab = "False Positives", ylab = "True Positives", main = "ROC curve")
points(ROC_exp[,2], ROC_exp[,1], type="l")
points(ROC_exp[,2], ROC_exp[,1], pch=16, cex=1.5, col=colors_)
points(c(0,1), c(0,1), type="l", lty=2)
readline("Press enter to continue to the next example.")
## Gamma Graphical Models on the non-negative orthant R_+^p
# Gamma models (a=0.5, b=0), again with h(x)=\min(x,3)^1.5 and
# `lambda_ratio=2`. Recall that the gamma graphical models require
# all entries in the linear parameter to be strictly larger than -1.
mu[mu <= -1] <- abs(mu[mu <= -1])
x <- gen(n, setting="gamma", abs=FALSE, eta=mu, K=K,
domain=domain, finite_infinity=100, seed=3, burn_in=1000, thinning=1000, verbose=TRUE,
remove_outofbound=TRUE)
h_mode <- "min_pow"
h_param1 <- 1.5
h_param2 <- 3
est_gamma <- estimate(x, setting="gamma", centered=FALSE, domain=domain,
symmetric="symmetric", scale="norm", lambda_length=nlambda,
lambda_ratio=2, mode=h_mode, param1=h_param1, param2=h_param2,
verbose=TRUE, tol=tol, maxit=maxit, BIC_refit=TRUE,
diagonal_multiplier=dm, return_raw=FALSE)
ROC_gamma <- t(sapply(est_gamma$edgess, tp_fp, true_edges=true_edges, p=p)) # ROC curve
AUC(ROC_gamma) # The area under the curve
par(mfrow=c(1,2), mar=c(4,4,4,4))
plot_BIC_ind <- !is.infinite(est_gamma$BIC_refits[,3])
plot(est_gamma$lambda1s[plot_BIC_ind], est_gamma$BIC_refits[plot_BIC_ind,3], col=colors_[plot_BIC_ind], pch=16, cex=1.5, xlab="lambda", ylab="eBIC", main="Refitted BIC (when exists)")
plot(c(),c(), ylim = c(0,1), xlim = c(0,1), cex.lab=1, xlab = "False Positives", ylab = "True Positives", main = "ROC curve")
points(ROC_gamma[,2], ROC_gamma[,1], type="l")
points(ROC_gamma[,2], ROC_gamma[,1], pch=16, cex=1.5, col=colors_)
points(c(0,1), c(0,1), type="l", lty=2)
readline("Press enter to continue to the next example.")
## General a-b Graphical Models on the non-negative orthant R_+^p
# Now choose a=1.5, b=0.5, and estimate using h(x)=\min(x,3)^0.5
# and `lambda_ratio=2`.
x <- gen(n, setting="ab_3/2_1/2", abs=FALSE, eta=mu, K=K,
domain=domain, finite_infinity=100, seed=4, burn_in=1000, thinning=1000, verbose=TRUE,
remove_outofbound=TRUE)
h_mode <- "min_pow"
h_param1 <- 0.5
h_param2 <- 3
est_ab <- estimate(x, setting="ab_3/2_1/2", centered=FALSE, domain=domain,
symmetric="symmetric", scale="norm", lambda_length=nlambda,
lambda_ratio=2, mode=h_mode, param1=h_param1, param2=h_param2,
verbose=TRUE, tol=tol, maxit=maxit, BIC_refit=TRUE,
diagonal_multiplier=dm, return_raw=FALSE)
ROC_ab <- t(sapply(est_ab$edgess, tp_fp, true_edges=true_edges, p=p)) # ROC curve
AUC(ROC_ab) # The area under the curve
par(mfrow=c(1,2), mar=c(4,4,4,4))
plot_BIC_ind <- !is.infinite(est_ab$BIC_refits[,3])
plot(est_ab$lambda1s[plot_BIC_ind], est_ab$BIC_refits[plot_BIC_ind,3], col=colors_[plot_BIC_ind], pch=16, cex=1.5, xlab="lambda", ylab="eBIC", main="Refitted BIC (when exists)")
plot(c(),c(), ylim = c(0,1), xlim = c(0,1), cex.lab=1, xlab = "False Positives", ylab = "True Positives", main = "ROC curve")
points(ROC_ab[,2], ROC_ab[,1], type="l")
points(ROC_ab[,2], ROC_ab[,1], pch=16, cex=1.5, col=colors_)
points(c(0,1), c(0,1), type="l", lty=2)
readline("Press enter to continue to the next example.")
## (Untruncated) Gaussian Graphical Models on the entire R^p
# Now estimating the inverse covariance matrix from GGMs on the entire
# R^p. However, for Gaussian graphical models we use the penalized
# version of the original score matching from Hyv\"arinen (2005);
# see Lin et al (2016). Thus, the h and hp functions are ignored
# (where we fix h(x)=1). Here we estimate with `lambda_ratio=2`.
set.seed(5)
x <- rmvnorm(n, mean = mu, sigma = solve(K))
domain <- make_domain(type="R", p=p)
est_gauss <- estimate(x, setting="gaussian", centered=FALSE, domain=domain,
symmetric="symmetric", scale="norm", lambda_length=nlambda,
lambda_ratio=2, verbose=TRUE, tol=tol, maxit=maxit, BIC_refit=TRUE,
diagonal_multiplier=dm, return_raw=TRUE)
# Alternatively, get the summary statistics first (without specifying h and hp)
elts_gauss <- get_elts(NULL, x, setting="gaussian", centered=FALSE, domain=domain,
profiled_if_noncenter=FALSE, scale="norm", diagonal_multiplier=dm,
use_C=TRUE, tol=tol)
# Then pass it to estimate()
est_gauss2 <- estimate(x, setting="gaussian", elts=elts_gauss, centered=FALSE,
domain=domain, symmetric="symmetric", scale="norm",
lambda_length=nlambda, lambda_ratio=2, verbose=TRUE, tol=tol,
maxit=maxit, BIC_refit=TRUE, diagonal_multiplier=dm,
return_raw=TRUE)
# Compare with the previous results; should be all close to 0
print(compare_two_results(est_gauss, est_gauss2))
# ROC curves
ROC_gauss <- t(sapply(est_gauss$edgess, tp_fp, true_edges=true_edges, p=p)) # ROC curve
AUC(ROC_gauss) # The area under the curve
par(mfrow=c(1,2), mar=c(4,4,4,4))
plot_BIC_ind <- !is.infinite(est_gauss$BIC_refits[,3])
plot(est_gauss$lambda1s[plot_BIC_ind], est_gauss$BIC_refits[plot_BIC_ind,3], col=colors_[plot_BIC_ind], pch=16, cex=1.5, xlab="lambda", ylab="eBIC", main="Refitted BIC (when exists)")
plot(c(),c(), ylim = c(0,1), xlim = c(0,1), cex.lab=1, xlab = "False Positives", ylab = "True Positives", main = "ROC curve")
points(ROC_gauss[,2], ROC_gauss[,1], type="l")
points(ROC_gauss[,2], ROC_gauss[,1], pch=16, cex=1.5, col=colors_)
points(c(0,1), c(0,1), type="l", lty=2)
readline("Press enter to continue to the next example.")
## Aitchison $A^d$ Models on the Simplex
# Consider the $A^d$ models from Aitchison (1985), which has density proportional to
# exp(-1/2*t(log(x)) %*% K %*% log(x) - t(eta) %*% log(x)) on the (p-1)-dimensional simplex.
# In the original model, one assumes that all(rowSums(K) == 0) and all(colSums(K) == 0),
# and all(eta > -1). This is called the "log_log_sum0" model in the package.
eta <- rep(0.5, p)
K <- -cov_cons("band", p, seed=1, spars=3)
diag(K) <- diag(K) - rowSums(K) # So that K has row and column sums 0
eigen(K)$val # Verify that K has one 0 and (p-1) positive eigenvalues
true_edges <- which(abs(K) > tol & diag(p) == 0)
domain <- make_domain(type="simplex", p=p) # Simplex domain
x <- gen(n, setting="log_log_sum0", abs=FALSE, eta=eta, K=K,
domain=domain, finite_infinity=100, seed=6, burn_in=1000, thinning=1000,
verbose=TRUE, remove_outofbound=TRUE)
h_mode <- "pow" # Simplex domains are bounded by nature, so no truncation needed
h_param1 <- 2
# scale = "" since simplex not invariant to scaling
est_log_log_sum0 <- estimate(x, setting="log_log_sum0", domain=domain, centered=FALSE,
symmetric="symmetric", scale="", lambda_length=nlambda,
lambda_ratio=Inf, mode=h_mode, param1=h_param1, param2=NULL,
verbose=TRUE, tol=tol, maxit=maxit, BIC_refit=TRUE,
diagonal_multiplier=NULL, return_raw=FALSE)
ROC_log_log_sum0 <- t(sapply(est_log_log_sum0$edgess, tp_fp, true_edges=true_edges, p=p)) # ROC curve
AUC(ROC_log_log_sum0) # The area under the curve
par(mfrow=c(1,2), mar=c(4,4,4,4))
plot_BIC_ind <- !is.infinite(est_log_log_sum0$BIC_refits[,3])
plot(est_log_log_sum0$lambda1s[plot_BIC_ind], est_log_log_sum0$BIC_refits[plot_BIC_ind,3], col=colors_[plot_BIC_ind], pch=16, cex=1.5, xlab="lambda", ylab="eBIC", main="Refitted BIC (when exists)")
plot(c(),c(), ylim = c(0,1), xlim = c(0,1), cex.lab=1, xlab = "False Positives", ylab = "True Positives", main = "ROC curve")
points(ROC_log_log_sum0[,2], ROC_log_log_sum0[,1], type="l")
points(ROC_log_log_sum0[,2], ROC_log_log_sum0[,1], pch=16, cex=1.5, col=colors_)
points(c(0,1), c(0,1), type="l", lty=2)
readline("Press enter to continue to the next example.")
# If one does not assume the row and column sums of K are all zero, the setting is
# "log_log" instead of "log_log_sum0".
est_log_log <- estimate(x, setting="log_log", domain=domain, centered=FALSE,
symmetric="symmetric", scale="", lambda_length=40,
lambda_ratio=Inf, mode=h_mode, param1=h_param1, param2=NULL,
verbose=TRUE, tol=tol, maxit=maxit, BIC_refit=TRUE,
diagonal_multiplier=NULL, return_raw=FALSE)
ROC_log_log <- t(sapply(est_log_log$edgess, tp_fp, true_edges=true_edges, p=p)) # ROC curve
AUC(ROC_log_log)
readline("Press enter to continue to the next example.")
# Univariate Truncated Normal Distributions
# Consider univariate truncated normal distributions with probability
# density function proportional to exp(-(x-mu)^2/(2*sigma^2)) on (0,Inf)$.
# First fix an $h$ function, and estimate the parameters from a random sample.
mode <- "min_log_pow"
param1 <- 1
param2 <- 2
n <- 1000
mu <- 0
sigma <- 1
set.seed(6)
x <- tmvtnorm::rtmvnorm(n, mean=mu, sigma=sigma^2, lower=c(0), upper=c(Inf))
## Can provide one of the parameters if the true value is known, or estimate both.
# Assuming sigma is known, estimated muhat should be close to the truth 0
print(mu_sigmasqhat(x, mode, param1, param2, sigma=sigma^2))
# Assuming mu is known, estimated sigmasqhat should be close to the truth 1
print(mu_sigmasqhat(x, mode, param1, param2, mu=mu))
# Assuming both are unknown, estimates should be close to the truth c(0,1)
print(mu_sigmasqhat(x, mode, param1, param2))
readline("Press enter to continue to the next example.")
## Variance Estimation and Confidence Intervals
# varhat() calculates the asymptotic variance of the estimator of
# one parameter assuming knowing the other.
# The asymptotic variance for mu assuming sigmasq known
print(varhat(mu, sigma^2, mode, param1, param2, est_mu=TRUE))
# The asymptotic variance for sigmasq assuming mu known
print(varhat(mu, sigma^2, mode, param1, param2, est_mu=FALSE))
# Plug in the estimated values and construct a confidence interval using
# the asymptotic variance evaluated at the estimates.
alpha <- 0.05
muhat <- mu_sigmasqhat(x, mode, param1, param2, sigma=sigma^2)[1]
# Assuming sigmasq is known, plug in muhat
sdmu <- sqrt(varhat(muhat, sigma^2, mode, param1, param2, est_mu=TRUE)/n)
# The 95% confidence interval for mu
print(muhat + c(-1,1)*qnorm(1-alpha/2)*sdmu)
# Do the same for sigma
sigmasqhat <- mu_sigmasqhat(x, mode, param1, param2, mu=mu)[2]
# Assuming mu is known, plug in sigmasqhat
sdsigmasq <- sqrt(varhat(mu, sigmasqhat, mode, param1, param2, est_mu=FALSE)/n)
# The 95% confidence interval for sigmasq
print(sigmasqhat + c(-1,1)*qnorm(1-alpha/2)*sdsigmasq)
readline("Press enter to continue to the next example.")
# Generate 10000 samples, each with sample size 1000,
# and test the coverage probability of the CIs above.
samples <- 10000
# 1000*10000 random samples from TN(mu, sigma)
x <- matrix(tmvtnorm::rtmvnorm(n*samples, mean=mu, sigma=sigma^2, lower=c(0), upper=c(Inf)), nrow=n, ncol=samples)
# One muhat estimate for each of the 10000 samples
muhats <- apply(x, 2, mu_sigmasqhat, mode=mode, param1=param1, param2=param2, sigmasq=sigma^2)[1,]
# One standard error for each of the 10000 samples, plugging in the estimates for mu and true sigmasq
ses_mu <- sapply(muhats, function(muhat){sqrt(varhat(muhat, sigma^2, mode, param1, param2, est_mu=TRUE)/n)})
# Coverage probability of the confidence intervals for mu, should be close to 0.95
print(mean(mu >= muhats - qnorm(1-alpha/2)*ses_mu & mu <= muhats + qnorm(1-alpha/2)*ses_mu))
# One sigmasqhat estimate for each of the 10000 samples
sigmasqhats <- apply(x, 2, mu_sigmasqhat, mode=mode, param1=param1, param2=param2, mu=mu)[2,]
# One standard error for each of the 10000 samples, plugging in the estimates for sigmasq and true mu
ses_sigmasq <- sapply(sigmasqhats, function(sigmasqhat){sqrt(varhat(mu, sigmasqhat^2, mode, param1, param2, est_mu=FALSE)/n)})
# Coverage probability of the confidence intervals for sigmasq, should be close to 0.95
print(mean(sigma^2 >= sigmasqhats - qnorm(1-alpha/2)*ses_sigmasq & sigma^2 <= sigmasqhats + qnorm(1-alpha/2)*ses_sigmasq))
readline("Press enter to continue to the next example.")
# Compare the asymptotic variance of the estimator to the empirical
# variance, as well as the Cram\'er-Rao lower bound.
trials <- 100
# Reshape x into a 1000*100*100 array
x <- array(x, dim=c(n,trials,samples/trials))
# The asymptotic variance for mu assuming sigmasq known
print(varhat(mu, sigma^2, mode, param1, param2, est_mu=TRUE))
# Empirical variance by averaging 100 variance estimates over 100 samples, each with sample size 1000
print(n*mean(apply(apply(x, c(2,3), mu_sigmasqhat, mode=mode, param1=param1, param2=param2, sigmasq=sigma^2)[1,,], 1, var)))
# The Cram\'er-Rao lower bound on the variance of unbiased estimators of mu
print(crbound_mu(mu, sigma))
# The asymptotic variance for sigmasq assuming mu known
print(varhat(mu, sigma^2, mode, param1, param2, est_mu=FALSE))
# Empirical variance by averaging 100 variance estimates over 100 samples, each with sample size 1000
print(n*mean(apply(apply(x, c(2,3), mu_sigmasqhat, mode=mode, param1=param1, param2=param2, mu=mu)[2,,], 1, var)))
# The Cram\'er-Rao lower bound on the variance of unbiased estimators of sigmasq
print(crbound_sigma(mu, sigma))
readline("Press enter to continue to the next example.")
## Plots from Yu et al (2019)
## The following plot reproduces Figure 1 from Yu et al (2019),
# which compares the asymptotic variances and efficiencies
# (relative to the Cram\'er-Rao lower bound) of our estimators with
# differnt h functions when estimating the mu parameter,
# assuming sigmasq=1 is known.
modes <- c("min_log_pow", "min_log_pow", "log_pow", "min_pow", "min_pow", "min_pow", "pow", "pow")
param1s <- c(1,1,1,1,1,2,1,2)
param2s <- c(1,2,-1,1,2,1,-1,-1)
h_names <- c("min(log(1+x),1)", "min(log(1+x),2)", "log(1+x)", "min(x,1)", "min(x,2)","min(x^2,1)","x","x^2")
mus <- seq(-4,4,length.out=100)
sigma <- 1
curves <- matrix(NA, nrow=length(h_names)+1, ncol=length(mus))
for (mu_i in 1:length(mus)){
for (hi in 1:length(h_names))
curves[hi,mu_i] <- varhat(mus[mu_i],sigma^2,modes[hi],param1s[hi],param2s[hi],est_mu=TRUE)
curves[length(h_names)+1,mu_i] <- crbound_mu(mus[mu_i],sigma^2)
}
par(mfrow=c(1,2), mar=c(5,5,1,1), xpd=TRUE, bty="n")
plot(c(),c(), xlim=range(mus), ylim=log(range(curves)), xlab=expression(mu[0]), ylab="log(Asymptotic var)", cex.lab=1, cex.axis=1)
order <- c(1,2,3,4,7,6,5,8)
colors <- c("red","darkorange1","gold3","green","forestgreen",
"hotpink","blue",gray.colors(1,start=0.7,end=0.7))[order]
for (hi in 1:length(h_names)){
points(mus, log(curves[hi, ]), col = colors[hi], type="l", lwd=3)
}
points(mus, log(curves[length(h_names)+1,]), lty=2, type="l", lwd=3)
legend("topright", x.intersp = 0.2, y.intersp=0.7, inset = c(0.45,0.0), lty = c(rep(1,length(h_names)),2), ncol = 1, lwd=c(rep(3,length(h_names)),2), seg.len=1, bty = "n", text.width = .1, col=c(colors[match(1:length(h_names), order)],"black"), legend=c(h_names[match(1:length(h_names), order)],"C-R lower bound"), cex=1.2)
plot(c(),c(),xlim=range(mus),ylim=range(t(curves[length(h_names)+1,]/t(curves))), xlab=expression(mu[0]), ylab="Efficiency", cex.lab=1, cex.axis=1)
for (hi in 1:length(h_names)){
points(mus, curves[length(h_names)+1,]/curves[hi,], col=colors[hi], type="l", lwd=3)
}
readline("Press enter to continue to the next example.")
## The following plot reproduces Figure 2 from Yu et al (2019),
# which compares the asymptotic variances and efficiencies
# (relative to the Cram\'er-Rao lower bound) of our estimators with
# differnt h functions when estimating the sigmasq parameter,
# assuming mu=0.5 is known.
sigmas <- exp(seq(log(0.1),log(10),length.out=100))
mu <- 0.5
curves2 <- matrix(NA, nrow=length(h_names)+1, ncol=length(sigmas))
for (sigma_i in 1:length(sigmas)){
for (hi in 1:length(h_names))
curves2[hi,sigma_i] <- varhat(mu,sigmas[sigma_i]^2,modes[hi],param1s[hi],param2s[hi],est_mu=FALSE)
curves2[length(h_names)+1,sigma_i] <- crbound_sigma(mu,sigmas[sigma_i]^2)
}
par(mfrow=c(1,2), mar=c(5,5,1,1), xpd=TRUE, bty="n")
plot(c(),c(), xlim=range(sigmas), ylim=log(range(curves2)), xlab=expression(sigma[0]), ylab="log(Asymptotic var)", cex.lab=1, cex.axis=1)
order <- c(4,6,5,1,2,3,7,8)
colors <- c("red","darkorange1","gold3","green","forestgreen",
"hotpink","blue",gray.colors(1,start=0.7,end=0.7))[order]
for (hi in 1:length(h_names)){
points(sigmas, log(curves2[hi, ]), col = colors[hi], type="l", lwd=3)
}
points(sigmas, log(curves2[length(h_names)+1,]), type="l", lty=2, lwd=3)
legend("topright", x.intersp = 0.2, y.intersp=0.7, inset = c(0.5,0.3), lty = c(rep(1,length(h_names)),2), ncol = 1, lwd=c(rep(3,length(h_names)),2), seg.len=1, bty = "n", text.width = .1, col=c(colors[match(1:length(h_names), order)],"black"), legend=c(h_names[match(1:length(h_names), order)],"C-R lower bound"), cex=1)
plot(c(),c(),xlim=range(sigmas),ylim=range(t(curves2[length(h_names)+1,]/t(curves2))), xlab=expression(sigma[0]), ylab="Efficiency", cex.lab=1, cex.axis=1)
for (hi in 1:length(h_names)){
points(sigmas, curves2[length(h_names)+1,]/curves2[hi,], col=colors[hi], type="l", lwd=3)
}
print("Demo completed.")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.