inst/doc/Example-tuning_free_CQR.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(ConformalSmallest)

## -----------------------------------------------------------------------------
df=3
d = 1 
x_test <- seq(0,5,by=0.2)

alpha=0.1
n <- 500   #number of training samples
nrep <- 1 #number of independent trials
nrep2 <- 100
rho <- 0.5

evaluations <- expand.grid(1:nrep, n, x_test,"CQR")
no_eval <- nrow(evaluations)
up_mat <- lo_mat <- width_mat <- cov_mat <- data.frame(number = rep(0, no_eval), 
                                                       rep = evaluations[,1], 
                                                       nset = evaluations[,2],
                                                       X_test = evaluations[,3],
                                                       method = evaluations[,4])
colnames(width_mat) <- colnames(cov_mat) <- c("number", "rep", "sample size", "test_value","method")


set.seed(1)
  
X=as.matrix(runif(n,0,5))	
eps1=rnorm(n)
eps2=rnorm(n)
Y=rpois(n,sin(X[,1])^2 +0.1 )+0.03*X[,1]*eps1+25*(runif(n,0,1)<0.01)*eps2

#X0=as.matrix(x_test)
X0 = runif(nrep2,0,5)
X0 = as.matrix(X0[order(X0)])
eps01=rnorm(nrep2)
eps02=rnorm(nrep2)
Y0=rpois(nrep2,sin(X0)^2 +0.1 )+0.03*X0*eps01+25*(runif(nrep2,0,1)<0.01)*eps02


beta_fixed = 0.05
mtry_fixed = 1
ntree_fixed = 100
    
tmp =   try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha))
while (class(tmp)=="try-error"){
  tmp =   try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha),silent=TRUE)
}
    
width_vec_cqr = tmp$pred_set(X0, Y0)[[2]]
cov_vec_cqr = tmp$pred_set(X0, Y0)[[1]]
y_lo_cqr <- tmp$pred_set(X0, Y0)[[3]]
y_up_cqr <- tmp$pred_set(X0, Y0)[[4]]
quant_lo_cqr <- tmp$pred_set(X0, Y0)[[5]]
quant_hi_cqr <- tmp$pred_set(X0, Y0)[[6]]


#source('cqr_function_conditional.r') 
method = "efficient"
split <- 1/2
beta_grid <- seq(0.01, 0.4, by=0.01)
mtry_grid <- 1
ntree_grid <- seq(50, 400, by = 50)

tmp =   try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha))    
while (class(tmp)=="try-error"){
tmp =   try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha),silent=TRUE)
}
    
width_vec_efcp = tmp$pred_set(X0, Y0)[[2]]
cov_vec_efcp = tmp$pred_set(X0, Y0)[[1]]

y_lo_efcp <- tmp$pred_set(X0, Y0)[[3]]
y_up_efcp <- tmp$pred_set(X0, Y0)[[4]]

quant_lo_efcp <- tmp$pred_set(X0, Y0)[[5]]
quant_hi_efcp <- tmp$pred_set(X0, Y0)[[6]]

method = "valid"
split <- c(1/2,1/2)

tmp =   try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha))    
while (class(tmp)=="try-error"){
tmp =   try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha),silent=TRUE)
}
    
width_vec_vfcp = tmp$pred_set(X0, Y0)[[2]]
cov_vec_vfcp = tmp$pred_set(X0, Y0)[[1]]

y_lo_vfcp <- tmp$pred_set(X0, Y0)[[3]]
y_up_vfcp <- tmp$pred_set(X0, Y0)[[4]]

quant_lo_vfcp <- tmp$pred_set(X0, Y0)[[5]]
quant_hi_vfcp <- tmp$pred_set(X0, Y0)[[6]]

## ----plot---------------------------------------------------------------------
options(repr.plot.width=18, repr.plot.height=6)
par(mfrow=c(1,3))
plot(X0,Y0,pch=1,col="black",main="CQR: predicted intervals",ylim=c(-2.5,7.5), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
a = data.frame(x = X0, y = y_lo_cqr)
b = data.frame(x = c(1:100), y = y_up_cqr)
c1 <- rgb(173,216,230,max = 255, alpha = 200, names = "lt.blue")
polygon(c(a$x, rev(a$x)), c(b$y ,rev(a$y)), col = c1,border=NA )
points(X0, quant_lo_cqr,col="blue",pch=20)
points(X0, quant_hi_cqr,col="red",pch=20)

plot(X0,Y0,pch=1,col="black",main="EFCP: predicted intervals",ylim=c(-2.5,7.5), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
a = data.frame(x = X0, y = y_lo_efcp)
b = data.frame(x = c(1:100), y = y_up_efcp)
c1 <- rgb(173,216,230,max = 255, alpha = 200, names = "lt.blue")
polygon(c(a$x, rev(a$x)), c(b$y ,rev(a$y)), col = c1,border=NA )
points(X0, quant_lo_efcp,col="blue",pch=20)
points(X0, quant_hi_efcp,col="red",pch=20)

plot(X0,Y0,pch=1,col="black",main="VFCP: predicted intervals",ylim=c(-2.5,7.5), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
a = data.frame(x = X0, y = y_lo_vfcp)
b = data.frame(x = c(1:100), y = y_up_vfcp)
c1 <- rgb(173,216,230,max = 255, alpha = 200, names = "lt.blue")
polygon(c(a$x, rev(a$x)), c(b$y ,rev(a$y)), col = c1,border=NA )
points(X0, quant_lo_vfcp,col="blue",pch=20)
points(X0, quant_hi_vfcp,col="red",pch=20)

print(paste0("CQR average coverage: ",mean(cov_vec_cqr), " average width: ",mean(width_vec_cqr) ))
print(paste0("EFCP average coverage: ",mean(cov_vec_efcp), " average width: ",mean(width_vec_efcp) ))
print(paste0("VFCP average coverage: ",mean(cov_vec_vfcp), " average width: ",mean(width_vec_vfcp) ))

## ----eval=FALSE---------------------------------------------------------------
#  n <- 400
#  x_test = seq(0,5,by=0.2)
#  alpha <- 0.1 #miscoverage level
#  nrep  <-  10 #number of independent trials
#  nrep2 <- 100 #number of y's for each test point x
#  
#  evaluations <- expand.grid(1:nrep, n, x_test, c("efficient", "valid","CQR"))
#  no_eval <- nrow(evaluations)
#  ntree_mat <- beta_mat <- cqr_method_mat <- width_mat <- cov_mat <- data.frame(number = rep(0, no_eval),
#                                     rep = evaluations[,1],
#                                     nset = evaluations[,2],
#                                     X_test = evaluations[,3],
#                                     method = evaluations[,4])
#  colnames(width_mat) <- colnames(cov_mat) <- c("number", "rep", "sample size", "test_value","method")
#  
#  for(idx in 1:nrow(evaluations)){
#    set.seed(evaluations[idx, 1])
#  
#  
#    x0 = evaluations[idx, 3]
#  
#  
#    X <- as.matrix(runif(n,0,5))
#    eps1 <- rnorm(n)
#    eps2 <- rnorm(n)
#    Y <- rpois(n,sin(X[,1])^2 +0.1 )+0.03*X[,1]*eps1+25*(runif(n,0,1)<0.01*eps2)
#  
#    X0 <- as.matrix(rep(x0,nrep2))
#    eps01 <- rnorm(nrep2)
#    eps02 <- rnorm(nrep2)
#    Y0 <- rpois(nrep2,sin(X0)^2 +0.1 )+0.03*X0*eps01+25*(runif(nrep2,0,1)<0.01*eps02)
#  
#    width_mat[idx,3] <- cov_mat[idx, 3] <- n
#    method <- evaluations[idx, 4]
#  
#    if (method =="CQR"){
#      beta_fixed <- 0.05
#      mtry_fixed <- 1
#      ntree_fixed <- 100
#  
#      tmp <- try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha))
#  
#      while (class(tmp)=="try-error"){
#  
#        tmp <- try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha),silent=TRUE)
#  
#      }
#      width_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[2]])
#      cov_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[1]])
#    }else{ if(method == "valid"){
#      split <- c(1/2, 1/2)
#    } else {
#      split <- 1/2
#    }
#  
#    beta_grid <- seq(1e-03, 4, length = 20)*alpha
#    mtry_grid <- unique(ceiling(seq(1/10, 1, length = 20)*d))
#    ntree_grid <- seq(50, 400, by = 50)
#  
#    tmp <- try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha))
#  
#    while (class(tmp)=="try-error"){
#      tmp <- try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha),silent=TRUE)
#    }
#  
#    beta_mat[idx,1] = tmp$beta
#    ntree_mat[idx,1] = tmp$ntree
#    cqr_method_mat[idx, 1]= tmp$cqr_method
#    width_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[2]])
#    cov_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[1]])
#    }
#  }
#  
#  pois_n400_reps100=list(x_test, n,nrep,width_mat, cov_mat,beta_mat, ntree_mat, cqr_method_mat, evaluations, alpha)
#  save(pois_n400_reps100, file = "pois_n400_reps100.rda" )

## -----------------------------------------------------------------------------
data(pois_n400_reps100,package = "ConformalSmallest")
x_test=pois_n400_reps100[[1]]
width_mat=pois_n400_reps100[[4]]
cov_mat=pois_n400_reps100[[5]]
beta_mat=pois_n400_reps100[[6]]
ntree_mat=pois_n400_reps100[[7]]
cqr_method_mat=pois_n400_reps100[[8]]
evaluations=pois_n400_reps100[[9]]
alpha=pois_n400_reps100[[10]]

width_cqr <- sd_width_cqr  <-  width_efcp <- width_vfcp <- sd_width_efcp <- sd_width_vfcp <- NULL
for(x in x_test){
  TMP <- width_mat[evaluations[,4] == "efficient", ]
  TMP_prime <- TMP[TMP[,4] == x,]

  TMP <- width_mat[evaluations[,4] == "valid", ]
  TMP_prime_vfcp <- TMP[TMP[,4] == x,]

  TMP <- width_mat[evaluations[,4] == "CQR", ]
  TMP_prime_cqr <- TMP[TMP[,4] == x,]


    width_efcp <- c(width_efcp, mean(TMP_prime[,1] ))
    width_vfcp <- c(width_vfcp, mean(TMP_prime_vfcp[,1]))
    width_cqr <- c(width_cqr, mean(TMP_prime_cqr[,1]))
  #width_efcp <- c(width_efcp, mean(TMP_prime[,1] / TMP_prime_vfcp[,1]))
  #sd_width_efcp <- c(sd_width_efcp, sd(TMP_prime[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))

  #width_vfcp <- c(width_vfcp, mean(TMP_prime_vfcp[,1] / TMP_prime_vfcp[,1]))
  #sd_width_vfcp <- c(sd_width_vfcp, sd(TMP_prime_vfcp[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))

  #width_cqr <- c(width_cqr, mean(TMP_prime_cqr[,1] / TMP_prime_vfcp[,1]))
  #sd_width_cqr <- c(sd_width_cqr, sd(TMP_prime_cqr[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))

}

cov_cqr <-sd_cov_cqr <-cov_efcp <- cov_vfcp <-sd_cov_efcp <- sd_cov_vfcp <- NULL
for(x in x_test){
  TMP <- cov_mat[evaluations[,4] == "efficient", ]
  TMP_prime <- TMP[TMP[,4] == x,]
  cov_efcp <- c( cov_efcp, mean(TMP_prime[,1] ) )
  sd_cov_efcp <- c(sd_cov_efcp, sd(TMP_prime[,1])/sqrt(nrep))

  TMP <- cov_mat[evaluations[,4] == "valid", ]
  TMP_prime <- TMP[TMP[,4] == x,]
  cov_vfcp <- c(cov_vfcp, mean(TMP_prime[,1]))
  sd_cov_vfcp <- c(sd_cov_vfcp, sd(TMP_prime[,1])/sqrt(nrep))

  TMP <- cov_mat[evaluations[,4] == "CQR", ]
  TMP_prime <- TMP[TMP[,4] == x,]
  cov_cqr <- c(cov_cqr, mean(TMP_prime[,1]))
  sd_cov_cqr <- c(sd_cov_cqr, sd(TMP_prime[,1])/sqrt(nrep))
}

## -----------------------------------------------------------------------------
c1 <- rgb(173,216,230,max = 255, alpha = 80, names = "lt.blue")
c2 <- rgb(255,192,203, max = 255, alpha = 80, names = "lt.pink")

oldpar=par(mfrow=c(1,2))
plot(factor(cqr_method_mat[evaluations[,4] == "valid",1]),col=c1, main="EFCP",cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,xlab="Method",ylab="Frequency")
#legend("right", legend=c("EFCP", "VFCP"),fill=c(c1, c2))
plot(factor(cqr_method_mat[evaluations[,4] == "efficient",1]),col=c2, main="VFCP",cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,xlab="Method",ylab="Frequency")

## -----------------------------------------------------------------------------
oldpar=par(mfrow=c(1,2))
hist(ntree_mat[evaluations[,4] == "efficient",1],col=c1,breaks=ntree_grid,main="EFCP",xlab="ntree",cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
hist(ntree_mat[evaluations[,4] == "valid",1],col=c2,breaks=ntree_grid,main="VFCP",xlab="ntree",cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
#legend("left", legend=c("EFCP", "VFCP"), fill=c(c1, c2))

## -----------------------------------------------------------------------------
oldpar=par(mfrow=c(1,2))
hist(beta_mat[evaluations[,4] == "efficient",1],col=c1,breaks=seq(range(beta_mat[,1])[1],range(beta_mat[,1])[2],0.1),main="EFCP",xlab=expression(beta),cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
hist(beta_mat[evaluations[,4] == "valid",1],col=c2,breaks=seq(range(beta_mat[,1])[1],range(beta_mat[,1])[2],0.1),main="VFCP",xlab=expression(beta),cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
#legend("top", legend=c("EFCP", "VFCP"),fill=c(c1, c2))

## -----------------------------------------------------------------------------
oldpar=par(mfrow=c(1,2))
plot(x_test, width_efcp, type = 'l', ylim = c(0,10), lwd = 2,ylab="Width", cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
#lines(x_test, width_efcp - sd_width_efcp, type = 'l', lty = 2, lwd = 2)
#lines(x_test, width_efcp + sd_width_efcp, type = 'l', lty = 2, lwd = 2)
lines(x_test, width_vfcp, type = 'l', ylim = range(c(width_efcp, width_vfcp)), lwd = 2, col = "red")
#lines(x_test, width_vfcp - sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
#lines(x_test, width_vfcp + sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
lines(x_test, width_cqr, type = 'l', ylim = range(c(width_efcp, width_vfcp)), lwd = 2, col = "blue")
legend("topright", legend=c("EFCP", "VFCP","CQR"),
       col=c("black","red", "blue"), lty=1, lwd=2)

plot(x_test, cov_efcp, type = 'l', ylim = c(0, 1), lwd = 2,ylab="Conditional Coverage", cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
lines(x_test, cov_vfcp, type = 'l', col = "red", lwd = 2)
lines(x_test, cov_cqr, type = 'l', col = "blue", lwd = 2)
legend(0,0.2, legend=c("EFCP", "VFCP","CQR"),
       col=c("black","red", "blue"), lty=1)
abline(h = 1-alpha,lty=2)

print(paste0("CQR average coverage: ",mean(cov_cqr), " average width: ",mean(width_cqr) ) )
print(paste0("EFCP average coverage: ",mean(cov_efcp), " average width: ",mean(width_efcp) ) )
print(paste0("VFCP average coverage: ",mean(cov_vfcp), " average width: ",mean(width_vfcp) ) )
par(oldpar)

Try the ConformalSmallest package in your browser

Any scripts or data that you put into this service are public.

ConformalSmallest documentation built on Aug. 9, 2021, 5:07 p.m.