knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
knitr::opts_chunk$set(error = TRUE)
library(ConformalSmallest) library(ggplot2)
This vignette is dived into three parts. 1. we explain how we use EFCP and VFCP vs. cross validation (CV) and other parametric methods for ridge regression 2. we generate the plots as shown in our paper 3. we compare the tuning parameters chosen by EFCP, VFCP, CV using two splits and CV using three splits.
library(glmnet) library(MASS) library(mvtnorm) #source("ridge_funs.R") name=paste("linear_fm_t3",sep="") df <- 3 #degrees of freedom l <- 60 #number of dimensions l.lambda <- 100 lambda_seq <- seq(0,200,l=l.lambda) dim <- round(seq(5,300,l=l)) alpha <- 0.1 n <- 200 #number of training samples n0 <- 100 #number of prediction points nrep <- 100 #number of independent trials rho <- 0.5 cov.efcp_linear_fm_t3 <- len.efcp_linear_fm_t3 <- matrix(0,nrep,l) cov.vfcp_linear_fm_t3 <- len.vfcp_linear_fm_t3 <- matrix(0,nrep,l) cov.naive_linear_fm_t3 <- len.naive_linear_fm_t3 <- matrix(0,nrep,l) cov.param_linear_fm_t3 <- len.param_linear_fm_t3 <- matrix(0,nrep,l) cov.star_linear_fm_t3 <- len.star_linear_fm_t3 <- matrix(0,nrep,l) cov.cv10_linear_fm_t3 <- len.cv10_linear_fm_t3 <- matrix(0,nrep,l) cov.cv5_linear_fm_t3 <- len.cv5_linear_fm_t3 <- matrix(0,nrep,l) cov.cvloo_linear_fm_t3 <- len.cvloo_linear_fm_t3 <- matrix(0,nrep,l) out.efcp.up <- out.efcp.lo <- matrix(0,n0,l) out.vfcp.up <- out.vfcp.lo <- matrix(0,n0,l) out.naive.up <- out.naive.lo <- matrix(0,n0,l) out.param.up <- out.param.lo <- matrix(0,n0,l) out.star.up <- out.star.lo <- matrix(0,n0,l) out.cv10.up <- out.cv10.lo <- matrix(0,n0,l) out.cv5.up <- out.cv5.lo <- matrix(0,n0,l) out.cvloo.up <- out.cvloo.lo <- matrix(0,n0,l) for(i in 1:nrep){ if(i%%10 == 0){ print(i) } for (r in 1:l){ d <- dim[r] set.seed(i) Sigma <- matrix(rho,d,d) diag(Sigma) <- rep(1,d) X <- rmvt(n,Sigma,df) #multivariate t distribution beta <- rep(1:5,d/5) eps <- rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2)) Y <- X%*%beta+eps X0 <- rmvt(n0,Sigma,df) eps0 <- rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2)) Y0 <- X0%*%beta+eps0 out.param <- ginverse.fun(X,Y,X0,alpha=alpha) out.param.lo[,r] <- out.param$lo out.param.up[,r] <- out.param$up cov.param_linear_fm_t3[i,r] <- mean(out.param.lo[,r] <= Y0 & Y0 <= out.param.up[,r]) len.param_linear_fm_t3[i,r] <- mean(out.param.up[,r]-out.param.lo[,r]) out.efcp <- efcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha) out.efcp.up[,r] <- out.efcp$up out.efcp.lo[,r] <- out.efcp$lo cov.efcp_linear_fm_t3[i,r] <- mean(out.efcp.lo[,r] <= Y0 & Y0 <= out.efcp.up[,r]) len.efcp_linear_fm_t3[i,r] <- mean(out.efcp.up[,r]-out.efcp.lo[,r]) out.vfcp <- vfcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha) out.vfcp.up[,r] <- out.vfcp$up out.vfcp.lo[,r] <- out.vfcp$lo cov.vfcp_linear_fm_t3[i,r] <- mean(out.vfcp.lo[,r] <= Y0 & Y0 <= out.vfcp.up[,r]) len.vfcp_linear_fm_t3[i,r] <- mean(out.vfcp.up[,r]-out.vfcp.lo[,r]) out.naive <- naive.fun(X,Y,X0,alpha=alpha) out.naive.up[,r] <- out.naive$up out.naive.lo[,r] <- out.naive$lo cov.naive_linear_fm_t3[i,r] <- mean(out.naive.lo[,r] <= Y0 & Y0 <= out.naive.up[,r]) len.naive_linear_fm_t3[i,r] <- mean(out.naive.up[,r]-out.naive.lo[,r]) out.star <- star.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha) out.star.up[,r] <- out.star$up out.star.lo[,r] <- out.star$lo cov.star_linear_fm_t3[i,r] <- mean(out.star.lo[,r] <= Y0 & Y0 <= out.star.up[,r]) len.star_linear_fm_t3[i,r] <- mean(out.star.up[,r] - out.star.lo[,r]) out.cv5 <- cv.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha,nfolds=5) out.cv5.up[,r] <- out.cv5$up out.cv5.lo[,r] <- out.cv5$lo cov.cv5_linear_fm_t3[i,r] <- mean(out.cv5.lo[,r] <= Y0 & Y0 <= out.cv5.up[,r]) len.cv5_linear_fm_t3[i,r] <- mean(out.cv5.up[,r] - out.cv5.lo[,r]) } } ridge_linear_len100_t3=list(len.param_linear_fm_t3, len.naive_linear_fm_t3, len.vfcp_linear_fm_t3, len.star_linear_fm_t3, len.cv5_linear_fm_t3, len.efcp_linear_fm_t3) save(ridge_linear_len100_t3, file = "ridge_linear_len100_t3.rda" ) ridge_linear_cov100_t3=list(dim_linear_t3,cov.param_linear_fm_t3, cov.naive_linear_fm_t3, cov.vfcp_linear_fm_t3, cov.star_linear_fm_t3, cov.cv5_linear_fm_t3, cov.efcp_linear_fm_t3) save(ridge_linear_cov100_t3, file = "ridge_linear_cov100_t3.rda" )
Similarly we do the same when the degree of freedom is 5 (code omitted).
library(glmnet) library(MASS) library(mvtnorm) name=paste("linear_fm_t5",sep="") df <- 5 #degrees of freedom l <- 60 #number of dimensions l.lambda <- 100 lambda_seq <- seq(0,200,l=l.lambda) dim <- round(seq(5,300,l=l)) alpha <- 0.1 n <- 200 #number of training samples n0 <- 100 #number of prediction points nrep <- 100 #number of independent trials rho <- 0.5 cov.efcp_linear_fm_t5 <- len.efcp_linear_fm_t5 <- matrix(0,nrep,l) cov.vfcp_linear_fm_t5 <- len.vfcp_linear_fm_t5 <- matrix(0,nrep,l) cov.naive_linear_fm_t5 <- len.naive_linear_fm_t5 <- matrix(0,nrep,l) cov.param_linear_fm_t5 <- len.param_linear_fm_t5 <- matrix(0,nrep,l) cov.star_linear_fm_t5 <- len.star_linear_fm_t5 <- matrix(0,nrep,l) cov.cv10_linear_fm_t5 <- len.cv10_linear_fm_t5 <- matrix(0,nrep,l) cov.cv5_linear_fm_t5 <- len.cv5_linear_fm_t5 <- matrix(0,nrep,l) cov.cvloo_linear_fm_t5 <- len.cvloo_linear_fm_t5 <- matrix(0,nrep,l) out.efcp.up <- out.efcp.lo <- matrix(0,n0,l) out.vfcp.up <- out.vfcp.lo <- matrix(0,n0,l) out.naive.up <- out.naive.lo <- matrix(0,n0,l) out.param.up <- out.param.lo <- matrix(0,n0,l) out.star.up <- out.star.lo <- matrix(0,n0,l) out.cv10.up <- out.cv10.lo <- matrix(0,n0,l) out.cv5.up <- out.cv5.lo <- matrix(0,n0,l) out.cvloo.up <- out.cvloo.lo <- matrix(0,n0,l) for(i in 1:nrep){ if(i%%10 == 0){ print(i) } for (r in 1:l){ d <- dim[r] set.seed(i) Sigma <- matrix(rho,d,d) diag(Sigma) <- rep(1,d) X <- rmvt(n,Sigma,df) #multivariate t distribution beta <- rep(1:5,d/5) eps <- rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2)) Y <- X%*%beta+eps X0 <- rmvt(n0,Sigma,df) eps0 <- rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2)) Y0 <- X0%*%beta+eps0 out.param <- ginverse.fun(X,Y,X0,alpha=alpha) out.param.lo[,r] <- out.param$lo out.param.up[,r] <- out.param$up cov.param_linear_fm_t5[i,r] <- mean(out.param.lo[,r] <= Y0 & Y0 <= out.param.up[,r]) len.param_linear_fm_t5[i,r] <- mean(out.param.up[,r]-out.param.lo[,r]) out.efcp <- efcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha) out.efcp.up[,r] <- out.efcp$up out.efcp.lo[,r] <- out.efcp$lo cov.efcp_linear_fm_t5[i,r] <- mean(out.efcp.lo[,r] <= Y0 & Y0 <= out.efcp.up[,r]) len.efcp_linear_fm_t5[i,r] <- mean(out.efcp.up[,r]-out.efcp.lo[,r]) out.vfcp <- vfcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha) out.vfcp.up[,r] <- out.vfcp$up out.vfcp.lo[,r] <- out.vfcp$lo cov.vfcp_linear_fm_t5[i,r] <- mean(out.vfcp.lo[,r] <= Y0 & Y0 <= out.vfcp.up[,r]) len.vfcp_linear_fm_t5[i,r] <- mean(out.vfcp.up[,r]-out.vfcp.lo[,r]) out.naive <- naive.fun(X,Y,X0,alpha=alpha) out.naive.up[,r] <- out.naive$up out.naive.lo[,r] <- out.naive$lo cov.naive_linear_fm_t5[i,r] <- mean(out.naive.lo[,r] <= Y0 & Y0 <= out.naive.up[,r]) len.naive_linear_fm_t5[i,r] <- mean(out.naive.up[,r]-out.naive.lo[,r]) out.star <- star.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha) out.star.up[,r] <- out.star$up out.star.lo[,r] <- out.star$lo cov.star_linear_fm_t5[i,r] <- mean(out.star.lo[,r] <= Y0 & Y0 <= out.star.up[,r]) len.star_linear_fm_t5[i,r] <- mean(out.star.up[,r] - out.star.lo[,r]) out.cv5 <- cv.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha,nfolds=5) out.cv5.up[,r] <- out.cv5$up out.cv5.lo[,r] <- out.cv5$lo cov.cv5_linear_fm_t5[i,r] <- mean(out.cv5.lo[,r] <= Y0 & Y0 <= out.cv5.up[,r]) len.cv5_linear_fm_t5[i,r] <- mean(out.cv5.up[,r] - out.cv5.lo[,r]) } } ridge_linear_cov100_t5=list(dim_linear_t5,cov.param_linear_fm_t5, cov.naive_linear_fm_t5, cov.vfcp_linear_fm_t5, cov.star_linear_fm_t5, cov.cv5_linear_fm_t5, cov.efcp_linear_fm_t5) save(ridge_linear_cov100_t5, file = "ridge_linear_cov100_t5.rda" ) ridge_linear_len100_t5=list(len.param_linear_fm_t5, len.naive_linear_fm_t5, len.vfcp_linear_fm_t5, len.star_linear_fm_t5, len.cv5_linear_fm_t5, len.efcp_linear_fm_t5) save(ridge_linear_len100_t5, file = "ridge_linear_len100_t5.rda" )
data(ridge_linear_cov100_t3,package = "ConformalSmallest") data(ridge_linear_len100_t3,package = "ConformalSmallest") dim=ridge_linear_cov100_t3[[1]] cov.param_linear_fm_t3=ridge_linear_cov100_t3[[2]] cov.naive_linear_fm_t3=ridge_linear_cov100_t3[[3]] cov.vfcp_linear_fm_t3=ridge_linear_cov100_t3[[4]] cov.star_linear_fm_t3=ridge_linear_cov100_t3[[5]] cov.cv5_linear_fm_t3=ridge_linear_cov100_t3[[6]] cov.efcp_linear_fm_t3=ridge_linear_cov100_t3[[7]] len.param_linear_fm_t3=ridge_linear_len100_t3[[1]] len.naive_linear_fm_t3=ridge_linear_len100_t3[[2]] len.vfcp_linear_fm_t3=ridge_linear_len100_t3[[3]] len.star_linear_fm_t3=ridge_linear_len100_t3[[4]] len.cv5_linear_fm_t3=ridge_linear_len100_t3[[5]] len.efcp_linear_fm_t3=ridge_linear_len100_t3[[6]] len.param = len.param_linear_fm_t3/len.vfcp_linear_fm_t3 len.naive = len.naive_linear_fm_t3/len.vfcp_linear_fm_t3 len.star = len.star_linear_fm_t3/len.vfcp_linear_fm_t3 len.cv5 = len.cv5_linear_fm_t3/len.vfcp_linear_fm_t3 len.efcp = len.efcp_linear_fm_t3/len.vfcp_linear_fm_t3 len.vfcp = len.vfcp_linear_fm_t3/len.vfcp_linear_fm_t3 df.cov3=data.frame(dim,apply(cov.param_linear_fm_t3,2,mean),apply(cov.naive_linear_fm_t3,2,mean),apply(cov.vfcp_linear_fm_t3,2,mean),apply(cov.star_linear_fm_t3,2,mean),apply(cov.cv5_linear_fm_t3,2,mean), apply(cov.efcp_linear_fm_t3,2,mean)) df.cov3_sd=data.frame(dim,apply(cov.param_linear_fm_t3,2,sd),apply(cov.naive_linear_fm_t3,2,sd),apply(cov.vfcp_linear_fm_t3,2,sd),apply(cov.star_linear_fm_t3,2,sd),apply(cov.cv5_linear_fm_t3,2,sd), apply(cov.efcp_linear_fm_t3,2,sd))/sqrt(nrow(len.param)) df.len3=data.frame(dim,apply(len.param,2,mean),apply(len.naive,2,mean),apply(len.vfcp,2,mean),apply(len.star,2,mean),apply(len.cv5,2,mean), apply(len.efcp,2,mean)) df.len3_sd=data.frame(dim,apply(len.param,2,sd),apply(len.naive,2,sd),apply(len.vfcp,2,sd),apply(len.star,2,sd),apply(len.cv5,2,sd), apply(len.efcp,2,sd))/sqrt(nrow(len.param)) data(ridge_linear_cov100_t5,package = "ConformalSmallest") data(ridge_linear_len100_t5,package = "ConformalSmallest") dim=ridge_linear_cov100_t5[[1]] cov.param_linear_fm_t5=ridge_linear_cov100_t5[[2]] cov.naive_linear_fm_t5=ridge_linear_cov100_t5[[3]] cov.vfcp_linear_fm_t5=ridge_linear_cov100_t5[[4]] cov.star_linear_fm_t5=ridge_linear_cov100_t5[[5]] cov.cv5_linear_fm_t5=ridge_linear_cov100_t5[[6]] cov.efcp_linear_fm_t5=ridge_linear_cov100_t5[[7]] len.param_linear_fm_t5=ridge_linear_len100_t5[[1]] len.naive_linear_fm_t5=ridge_linear_len100_t5[[2]] len.vfcp_linear_fm_t5=ridge_linear_len100_t5[[3]] len.star_linear_fm_t5=ridge_linear_len100_t5[[4]] len.cv5_linear_fm_t5=ridge_linear_len100_t5[[5]] len.efcp_linear_fm_t5=ridge_linear_len100_t5[[6]] len.param = len.param_linear_fm_t5/len.vfcp_linear_fm_t5 len.naive = len.naive_linear_fm_t5/len.vfcp_linear_fm_t5 len.star = len.star_linear_fm_t5/len.vfcp_linear_fm_t5 len.cv5 = len.cv5_linear_fm_t5/len.vfcp_linear_fm_t5 len.efcp = len.efcp_linear_fm_t5/len.vfcp_linear_fm_t5 len.vfcp = len.vfcp_linear_fm_t5/len.vfcp_linear_fm_t5 df.cov5=data.frame(dim,apply(cov.param_linear_fm_t5,2,mean),apply(cov.naive_linear_fm_t5,2,mean),apply(cov.vfcp_linear_fm_t5,2,mean),apply(cov.star_linear_fm_t5,2,mean),apply(cov.cv5_linear_fm_t5,2,mean), apply(cov.efcp_linear_fm_t5,2,mean)) df.cov5_sd=data.frame(dim,apply(cov.param_linear_fm_t5,2,sd),apply(cov.naive_linear_fm_t5,2,sd),apply(cov.vfcp_linear_fm_t5,2,sd),apply(cov.star_linear_fm_t5,2,sd),apply(cov.cv5_linear_fm_t5,2,sd), apply(cov.efcp_linear_fm_t5,2,sd))/sqrt(nrow(len.param)) df.len5=data.frame(dim,apply(len.param,2,mean),apply(len.naive,2,mean),apply(len.vfcp,2,mean),apply(len.star,2,mean),apply(len.cv5,2,mean), apply(len.efcp,2,mean)) df.len5_sd=data.frame(dim,apply(len.param,2,sd),apply(len.naive,2,sd),apply(len.vfcp,2,sd),apply(len.star,2,sd),apply(len.cv5,2,sd), apply(len.efcp,2,sd))/sqrt(nrow(len.param)) bgnd <- theme_get()$panel.background$fill names=c("Linear","Naive","VFCP","CV*","CV-5-fold","EFCP") #colors_manual=c("blue","#FF9933","#999000","66FFCC","#66CC99","#9999FF","#FF00CC") colors_manual=c("red","slategrey","darkorchid3","dodgerblue4","turquoise3","grey23") shape_manual=c(0,1,2,3,4,5) seq=seq(2,60,by=2) col_names=c("dim","cov.param","cov.naive","cov.vfcp","cov.star","cov.cv5","cov.efcp") names(df.cov3)=names(df.cov5)=names(df.cov3_sd)=names(df.cov5_sd)=col_names col_names=c("dim","len.param","len.naive","len.vfcp","len.star","len.cv5","len.efcp") names(df.len3)=names(df.len5)=names(df.len3_sd)=names(df.len5_sd)=col_names df.cov = rbind(df.cov3[seq,], df.cov5[seq,]) df.cov_sd = rbind(df.cov3_sd[seq,], df.cov5_sd[seq,]) df.len = rbind(df.len3[seq,], df.len5[seq,]) df.len_sd = rbind(df.len3_sd[seq,], df.len5_sd[seq,])
Moment = c("3rd moment", "5th moment") df_names = expand.grid(seq,Moment,names) cov_vec = as.vector(as.matrix(df.cov[,-1])) cov_sd_vec = as.vector(as.matrix(df.cov_sd[,-1])) cov = cbind(df_names[,1], cov_vec, cov_sd_vec , df_names[,-1] ) len_vec = as.vector(as.matrix(df.len[,-1])) len_sd_vec = as.vector(as.matrix(df.len_sd[,-1])) len = cbind(df_names[,1], len_vec, len_sd_vec , df_names[,-1]) cov$Var="Coverage" len$Var = "Width ratio" colnames(cov) = c("V1","V2","sd","Moment","Method","Var") colnames(len) = c("V1","V2","sd","Moment","Method","Var")
data_linear_fm=rbind(cov,len) dummy_coverage <- data.frame(Var=c("Coverage"),Z= 0.9) ggplot_linear_fm=ggplot(data=data_linear_fm, aes(x=V1, y=V2, group=Method,color=Method)) + geom_errorbar(aes(ymin=V2-sd, ymax=V2+sd), width=.1)+ geom_line(size = 0.8, aes(color=Method))+ #,linetype=Method)) + #geom_point(size=5, colour=bgnd)+ #geom_point(aes(shape=Method,color=Method)) + theme(#axis.text.y = element_blank(), #axis.ticks.y = element_blank(), axis.title.y = element_blank(), #plot.title = element_text(size = 8,hjust=0.5), #axis.text = element_text(size = 10), #axis.title =element_text(size = 10), legend.position="top") + facet_grid(Var~Moment,scales = "free")+ #scale_shape_manual(values=shape_manual) + scale_color_manual(values=colors_manual) + #ylim(0,1.2) + #xlab("dim") + ylab("Avg-Len") xlab("Dimension")+guides(shape=FALSE)+ theme(strip.text.x = element_text(size = 12, face = "bold"), strip.text.y = element_text(size=12, face="bold"), axis.ticks.length=unit(.25, "cm"),axis.title=element_text(size=12))+ geom_hline(data=dummy_coverage, aes(yintercept=Z), linetype="dashed") ggplot_linear_fm
library(repr) library(glmnet) library(MASS) library(mvtnorm) name=paste("linear_fm_t3",sep="") set.seed(2021) df <- 3 #degrees of freedom l <- 1 #number of dimensions l.lambda <- 100 lambda_seq <- seq(0,200,l=l.lambda) dim <- 5 alpha <- 0.1 n <- 200 #number of training samples n0 <- 100 #number of prediction points nrep <- 100 #number of independent trials rho <- 0.5 lambda.efcp <- lambda.vfcp <- lambda.star <- lambda.cv5 <- rep(0,nrep) for(i in 1:nrep){ for (r in 1:l){ d <- dim[r] set.seed(i) Sigma <- matrix(rho,d,d) diag(Sigma) <- rep(1,d) X <- rmvt(n,Sigma,df) #multivariate t distribution beta <- rep(1:5,d/5) eps <- rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2)) Y <- X%*%beta+eps X0 <- rmvt(n0,Sigma,df) eps0 <- rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2)) Y0 <- X0%*%beta+eps0 out.efcp <- efcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha) lambda.efcp[i] <- out.efcp$lambda out.vfcp <- vfcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha) lambda.vfcp[i] <- out.vfcp$lambda out.star <- star.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha) lambda.star[i] <- out.star$lambda out.cv5 <- cv.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha,nfolds=5) lambda.cv5[i] <- out.cv5$lambda } } #options(repr.plot.width=18, repr.plot.height=6) #par(mar=c(1,1,1,1)) oldpar=par(mfrow=c(2,2)) hist(lambda.cv5,breaks=seq(min(lambda.cv5)-1,max(lambda.cv5)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="CV-2splits") #make x,y, values, main larger hist(lambda.star,breaks=seq(min(lambda.star)-1,max(lambda.star)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="CV-3splits") hist(lambda.efcp,breaks=seq(min(lambda.efcp)-1,max(lambda.efcp)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="EFCP") hist(lambda.vfcp,breaks=seq(min(lambda.vfcp)-1,max(lambda.vfcp)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="VFCP")
The above plot shows that all four methods mostly use $\lambda$ close to zero, which is expected given that we are considering a dimension example where the underlying data generating mechanism is a linear setting.
name=paste("nonlinear_fm_t3",sep="") set.seed(2021) df <- 3 #degrees of freedom l <- 1 #number of dimensions l.lambda <- 100 lambda_seq <- seq(0,200,l=l.lambda) dim <- 5 alpha <- 0.1 n <- 200 #number of training samples n0 <- 100 #number of prediction points nrep <- 100 #number of independent trials rho <- 0.5 lambda.efcp <- lambda.vfcp <- lambda.star <- lambda.cv5 <- rep(0,nrep) for(i in 1:nrep){ for (r in 1:l){ d <- dim[r] set.seed(i) Sigma=matrix(rho,d,d) diag(Sigma)=rep(1,d) X=rmvt(n,Sigma,df) #multivariate t distribution eps1=rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2)) eps2=rt(n,df)*(1+sqrt(X[,1]^4+X[,2]^4)) Y=rpois(n,sin(X[,1])^2 + cos(X[,2])^4+0.01 )+0.03*X[,1]*eps1+25*(runif(n,0,1)<0.01*eps2) X0=rmvt(n0,Sigma,df) eps01=rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2)) eps02=rt(n0,df)*(1+sqrt(X0[,1]^4+X0[,2]^4)) Y0=rpois(n0,sin(X0[,1])^2 + cos(X0[,2])^4+0.01 )+0.03*X0[,1]*eps01+25*(runif(n0,0,1)<0.01)*eps02 out.efcp <- efcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha) lambda.efcp[i] <- out.efcp$lambda out.vfcp <- vfcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha) lambda.vfcp[i] <- out.vfcp$lambda out.star <- star.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha) lambda.star[i] <- out.star$lambda out.cv5 <- cv.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha,nfolds=5) lambda.cv5[i] <- out.cv5$lambda } } #options(repr.plot.width=18, repr.plot.height=6) oldpar=par(mfrow=c(2,2)) #par(mar=c(1,1,1,1)) hist(lambda.cv5,breaks=seq(min(lambda.cv5)-1,max(lambda.cv5)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="CV-2splits") #make x,y, values, main larger hist(lambda.star,breaks=seq(min(lambda.star)-1,max(lambda.star)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="CV-3splits") hist(lambda.efcp,breaks=seq(min(lambda.efcp)-1,max(lambda.efcp)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="EFCP") hist(lambda.vfcp,breaks=seq(min(lambda.vfcp)-1,max(lambda.vfcp)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="VFCP") par(oldpar)
The above plots show that all four methods mostly choose the penalty parameter $\lambda$ to be as large as possible, which is also expected given that the underlying data generating mechanism is a nonlinear setting.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.