sp | R Documentation |
sp
is the main function for computing the support points in Mak and Joseph (2018). Current options include support points on standard distributions (specified via dist.str
) or support points for reducing big data (specified via dist.samp
). For big data reduction, weights on each data point can be specified via wts
.
sp(n, p, ini=NA, dist.str=NA, dist.param=vector("list",p), dist.samp=NA, scale.flg=T, wts=NA, bd=NA, num.subsamp=ifelse(any(is.na(dist.samp)), max(10000,10*n),min(10000,nrow(dist.samp))), rnd.flg=ifelse(any(is.na(dist.samp)), TRUE,ifelse(num.subsamp<=10000,FALSE,TRUE)), iter.max=max(250,iter.min), iter.min=50, tol=1e-10, par.flg=TRUE, n0=n*p)
n |
Number of support points. |
p |
Dimension of sample space. |
ini |
An n x p matrix for initialization. |
dist.str |
A p-length string vector for desired distribution (assuming independence). Current options include uniform, normal, exponential, gamma, lognormal, student-t, weibull, cauchy and beta. Exactly one of |
dist.param |
A p-length list for desired distribution parameters in
|
dist.samp |
An N x p matrix for the big dataset (e.g., MCMC chain) to be reduced. Exactly one of |
scale.flg |
Should the big data |
wts |
Weights on each data point in |
bd |
A p x 2 matrix for the lower and upper bounds of each variable. |
num.subsamp |
Batch size for resampling. For distributions, the default is |
rnd.flg |
Should the big data be randomly subsampled? |
iter.max |
Maximum iterations for optimization. |
iter.min |
Minimum iterations for optimization. |
tol |
Error tolerance for optimization. |
par.flg |
Should parallelization be used? |
n0 |
Momentum parameter for optimization. |
sp |
An n x p matrix for support points. |
ini |
An n x p matrix for initial points. |
Mak, S. and Joseph, V. R. (2018). Support points. Annals of Statistics, 46(6A):2562-2592.
############################################################# # Support points on distributions ############################################################# #Generate 25 SPs for the 2-d i.i.d. N(0,1) distribution n <- 25 #number of points p <- 2 #dimension D <- sp(n,p,dist.str=rep("normal",p)) x1 <- seq(-3.5,3.5,length.out=100) #Plot contours x2 <- seq(-3.5,3.5,length.out=100) z <- exp(-outer(x1^2,x2^2,FUN="+")/2) contour.default(x=x1,y=x2,z=z,drawlabels=FALSE,nlevels=10) points(D$sp,pch=16,cex=1.25,col="red") ############################################################# # Generate 50 SPs for the 2-d i.i.d. Beta(2,4) distribution ############################################################# n <- 50 p <- 2 dist.param <- vector("list",p) for (l in 1:p){ dist.param[[l]] <- c(2,4) } D <- sp(n,p,dist.str=rep("beta",p),dist.param=dist.param) x1 <- seq(0,1,length.out=100) #Plot contours x2 <- seq(0,1,length.out=100) z <- matrix(NA,nrow=100,ncol=100) for (i in 1:100){ for (j in 1:100){ z[i,j] <- dbeta(x1[i],2,4) * dbeta(x2[j],2,4) } } contour.default(x=x1,y=x2,z=z,drawlabels=FALSE,nlevels=10 ) points(D$sp,pch=16,cex=1.25,col="red") ############################################################# # Generate 100 SPs for the 3-d i.i.d. Exp(1) distribution ############################################################# n <- 100 p <- 3 D <- sp(n,p,dist.str=rep("exponential",p)) pairs(D$sp,xlim=c(0,5),ylim=c(0,5),pch=16) ############################################################# # Support points for big data reduction: Franke's function ############################################################# ## Not run: library(MHadaptive) #Use modified Franke's function as posterior franke2d <- function(xx){ if ((xx[1]>1)||(xx[1]<0)||(xx[2]>1)||(xx[2]<0)){ return(-Inf) } else{ x1 <- xx[1] x2 <- xx[2] term1 <- 0.75 * exp(-(9*x1-2)^2/4 - (9*x2-2)^2/4) term2 <- 0.75 * exp(-(9*x1+1)^2/49 - (9*x2+1)/10) term3 <- 0.5 * exp(-(9*x1-7)^2/4 - (9*x2-3)^2/4) term4 <- -0.2 * exp(-(9*x1-4)^2 - (9*x2-7)^2) y <- term1 + term2 + term3 + term4 return(2*log(y)) } } #Generate MCMC samples li_func <- franke2d #Desired log-posterior ini <- c(0.5,0.5) #Initial point for MCMc NN <- 1e5 #Number of MCMC samples desired burnin <- NN/2 #Number of burn-in runs mcmc_r <- Metro_Hastings(li_func, pars=ini, prop_sigma=0.05*diag(2), iterations=NN, burn_in=burnin) #Compute n SPs n <- 100 D <- sp(n,2,dist.samp=mcmc_r$trace) #Plot SPs oldpar <- par(mfrow=c(1,2)) x1 <- seq(0,1,length.out=100) #contours x2 <- seq(0,1,length.out=100) z <- matrix(NA,nrow=100,ncol=100) for (i in 1:100){ for (j in 1:100){ z[i,j] <- franke2d(c(x1[i],x2[j])) } } plot(mcmc_r$trace,pch=4,col="gray",cex=0.75, xlab="",ylab="",xlim=c(0,1),ylim=c(0,1)) #big data points(D$sp,pch=16,cex=1.25,col="red") contour.default(x=x1,y=x2,z=z,drawlabels=TRUE,nlevels=10) #contour points(D$sp,pch=16,cex=1.25,col="red") par(oldpar) ############################################################# # Support points for big data: Rosenbrock distribution ############################################################# \dontrun{ #Use Rosenbrock function as posterior rosen2d <- function(x) { B <- 0.03 -x[1]^2/200 - 1/2*(x[2]+B*x[1]^2-100*B)^2 } #Generate MCMC samples li_func <- rosen2d #Desired log-posterior ini <- c(0,1) #Initial point for MCMc NN <- 1e5 #Number of MCMC samples desired burnin <- NN/2 #Number of burn-in runs mcmc_r <- Metro_Hastings(li_func, pars=ini, prop_sigma=0.25*diag(2), iterations=NN, burn_in=burnin) #Compute n SPs n <- 100 D <- sp(n,2,dist.samp=mcmc_r$trace) #Plot SPs x1 <- seq(-25,25,length.out=100) #contours x2 <- seq(-15,6,length.out=100) z <- matrix(NA,nrow=100,ncol=100) for (i in 1:100){ for (j in 1:100){ z[i,j] <- rosen2d(c(x1[i],x2[j])) } } plot(mcmc_r$trace,pch=4,col="gray",cex=0.75, xlab="",ylab="",xlim=c(-25,25),ylim=c(-15,6)) #big data points(D$sp,pch=16,cex=1.25,col="red") contour.default(x=x1,y=x2,z=z,drawlabels=TRUE,nlevels=10) #contour points(D$sp,pch=16,cex=1.25,col="red") } ## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.